From 70651f8dd86f7931ae77d222ad248fa7ae26f8ae Mon Sep 17 00:00:00 2001 From: nrslib <38722970+nrslib@users.noreply.github.com> Date: Fri, 30 Jan 2026 14:53:25 +0900 Subject: [PATCH 1/6] =?UTF-8?q?feat:=20ai()=20=E6=9D=A1=E4=BB=B6=E5=BC=8F?= =?UTF-8?q?=E3=81=AB=E3=82=88=E3=82=8BAI=E9=81=B7=E7=A7=BB=E5=88=A4?= =?UTF-8?q?=E6=96=AD=E3=81=A8=E3=83=91=E3=83=A9=E3=83=AC=E3=83=AB=E3=82=B9?= =?UTF-8?q?=E3=83=86=E3=83=83=E3=83=97=E5=AE=9F=E8=A1=8C=E3=82=92=E5=AE=9F?= =?UTF-8?q?=E8=A3=85=20(#9,=20#20)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - rules の condition に ai("...") 式を追加し、別AIが遷移先を判断する仕組みを導入 - ワークフローステップに parallel フィールドを追加し、サブステップの並列実行を実装 - all()/any() 集約条件の仕様書を追加 --- docs/spec-aggregate-conditions.md | 104 ++++++++ src/__tests__/ai-judge.test.ts | 53 ++++ src/__tests__/instructionBuilder.test.ts | 42 +++ src/__tests__/parallel-and-loader.test.ts | 300 ++++++++++++++++++++++ src/claude/client.ts | 74 ++++++ src/config/workflowLoader.ts | 84 ++++-- src/models/schemas.ts | 27 +- src/models/types.ts | 6 + src/workflow/engine.ts | 191 ++++++++++++-- src/workflow/instruction-builder.ts | 10 +- 10 files changed, 837 insertions(+), 54 deletions(-) create mode 100644 docs/spec-aggregate-conditions.md create mode 100644 src/__tests__/ai-judge.test.ts create mode 100644 src/__tests__/parallel-and-loader.test.ts diff --git a/docs/spec-aggregate-conditions.md b/docs/spec-aggregate-conditions.md new file mode 100644 index 0000000..dcfbbde --- /dev/null +++ b/docs/spec-aggregate-conditions.md @@ -0,0 +1,104 @@ +# 集約条件 `all()` / `any()` 仕様 + +## 背景 + +パラレルステップでは複数のサブステップが並列実行される。各サブステップは自身のルールで結果(approved, rejected 等)を判定するが、親ステップが「全体としてどう遷移するか」を決定する必要がある。 + +現状、親ステップの遷移判定は結合テキストに対する `ai()` 評価かタグ検出しかない。しかし、「全員が承認したら次へ」「1人でも却下したらやり直し」といった集約判定はルールベースで十分であり、AI呼び出しは不要。 + +## 目的 + +パラレルステップの親ルールに `all("condition")` / `any("condition")` 構文を追加し、サブステップの判定結果をルールベースで集約する。 + +## YAML 構文 + +```yaml +- name: parallel-review + parallel: + - name: arch-review + agent: ~/.takt/agents/default/architect.md + rules: + - condition: approved + next: _ + - condition: rejected + next: _ + - name: security-review + agent: ~/.takt/agents/default/security-reviewer.md + rules: + - condition: approved + next: _ + - condition: rejected + next: _ + rules: + - condition: all("approved") + next: COMPLETE + - condition: any("rejected") + next: implement +``` + +## 式のセマンティクス + +| 式 | 意味 | +|---|------| +| `all("X")` | 全サブステップの判定結果が `X` のとき真 | +| `any("X")` | 1つ以上のサブステップの判定結果が `X` のとき真 | + +「判定結果」とは、サブステップのルール評価でマッチしたルールの `condition` 値を指す。 + +## エッジケースの定義 + +| ケース | `all("X")` | `any("X")` | +|--------|-----------|-----------| +| 全サブステップが X | true | true | +| 一部が X | false | true | +| いずれも X でない | false | false | +| 判定結果なし(ルール未定義 or マッチなし) | false | そのサブステップは判定対象外 | +| サブステップ 0 件 | false | false | +| 非パラレルステップで使用 | false | false | + +`all()` は「全員が確実に X」を要求するため、判定不能なサブステップがあれば false。 +`any()` は「誰か1人でも X」を探すため、判定不能なサブステップは無視する。 + +## 評価の優先順位 + +親ステップの `rules` 配列は先頭から順に評価される。各ルールの種類に応じた評価方式が適用される。 + +| 順位 | 種類 | 評価方式 | コスト | +|------|------|---------|--------| +| 1 | `all()` / `any()` | サブステップの判定結果を集計 | なし | +| 2 | 通常条件(`done` 等) | 結合テキストで `[STEP:N]` タグ検出 | なし | +| 3 | `ai("...")` | AI judge 呼び出し | API 1回 | + +最初にマッチしたルールで遷移が確定する。`all()` / `any()` を先に書けば、マッチした時点で `ai()` は呼ばれない。 + +## 他の条件式との混在 + +同一の `rules` 配列内で自由に混在できる。 + +```yaml +rules: + - condition: all("approved") # 集約(高速) + next: COMPLETE + - condition: any("rejected") # 集約(高速) + next: implement + - condition: ai("判断が難しい場合") # AI フォールバック + next: manual-review +``` + +## サブステップのルール + +サブステップの `rules` はサブステップ自身の判定結果を決めるために使う。`next` フィールドはパラレル文脈では使用されない(親の `rules` が遷移を決定する)。スキーマ互換性のため `next` は必須のまま残し、値は任意とする。 + +## ステータスタグの注入 + +親ステップの全ルールが `all()` / `any()` / `ai()` のいずれかである場合、ステータスタグ(`[STEP:N]` 系)の注入をスキップする。タグ検出が不要なため。 + +## 変更対象 + +| ファイル | 変更内容 | +|---------|---------| +| `src/models/types.ts` | `WorkflowRule` に集約条件フラグを追加 | +| `src/config/workflowLoader.ts` | `all()` / `any()` パターンの検出と正規化 | +| `src/workflow/engine.ts` | 集約条件の評価ロジックを追加 | +| `src/workflow/instruction-builder.ts` | ステータスタグスキップ条件を拡張 | +| テスト | パース、評価、エッジケース、混在ルール | diff --git a/src/__tests__/ai-judge.test.ts b/src/__tests__/ai-judge.test.ts new file mode 100644 index 0000000..d86e68b --- /dev/null +++ b/src/__tests__/ai-judge.test.ts @@ -0,0 +1,53 @@ +/** + * Tests for AI judge (ai() condition evaluation) + */ + +import { describe, it, expect } from 'vitest'; +import { detectJudgeIndex, buildJudgePrompt } from '../claude/client.js'; + +describe('detectJudgeIndex', () => { + it('should detect [JUDGE:1] as index 0', () => { + expect(detectJudgeIndex('[JUDGE:1]')).toBe(0); + }); + + it('should detect [JUDGE:3] as index 2', () => { + expect(detectJudgeIndex('Some output [JUDGE:3] more text')).toBe(2); + }); + + it('should return -1 for no match', () => { + expect(detectJudgeIndex('No judge tag here')).toBe(-1); + }); + + it('should return -1 for [JUDGE:0]', () => { + expect(detectJudgeIndex('[JUDGE:0]')).toBe(-1); + }); + + it('should be case-insensitive', () => { + expect(detectJudgeIndex('[judge:2]')).toBe(1); + }); +}); + +describe('buildJudgePrompt', () => { + it('should build a well-structured judge prompt', () => { + const agentOutput = 'Code implementation complete.\n\nAll tests pass.'; + const conditions = [ + { index: 0, text: 'No issues found' }, + { index: 1, text: 'Issues detected that need fixing' }, + ]; + + const prompt = buildJudgePrompt(agentOutput, conditions); + + expect(prompt).toContain('# Judge Task'); + expect(prompt).toContain('Code implementation complete.'); + expect(prompt).toContain('All tests pass.'); + expect(prompt).toContain('| 1 | No issues found |'); + expect(prompt).toContain('| 2 | Issues detected that need fixing |'); + expect(prompt).toContain('[JUDGE:N]'); + }); + + it('should handle single condition', () => { + const prompt = buildJudgePrompt('output', [{ index: 0, text: 'Always true' }]); + + expect(prompt).toContain('| 1 | Always true |'); + }); +}); diff --git a/src/__tests__/instructionBuilder.test.ts b/src/__tests__/instructionBuilder.test.ts index 77e9eaa..c7a5ecc 100644 --- a/src/__tests__/instructionBuilder.test.ts +++ b/src/__tests__/instructionBuilder.test.ts @@ -910,6 +910,48 @@ describe('instruction-builder', () => { }); }); + describe('ai() condition status tag skip', () => { + it('should skip status rules when ALL rules are ai() conditions', () => { + const step = createMinimalStep('Do work'); + step.rules = [ + { condition: 'ai("No issues")', next: 'COMPLETE', isAiCondition: true, aiConditionText: 'No issues' }, + { condition: 'ai("Issues found")', next: 'fix', isAiCondition: true, aiConditionText: 'Issues found' }, + ]; + const context = createMinimalContext({ language: 'en' }); + + const result = buildInstruction(step, context); + + expect(result).not.toContain('Status Output Rules'); + expect(result).not.toContain('[TEST-STEP:'); + }); + + it('should include status rules when some rules are NOT ai() conditions', () => { + const step = createMinimalStep('Do work'); + step.rules = [ + { condition: 'Error occurred', next: 'ABORT' }, + { condition: 'ai("Issues found")', next: 'fix', isAiCondition: true, aiConditionText: 'Issues found' }, + ]; + const context = createMinimalContext({ language: 'en' }); + + const result = buildInstruction(step, context); + + expect(result).toContain('Status Output Rules'); + }); + + it('should include status rules when no rules are ai() conditions', () => { + const step = createMinimalStep('Do work'); + step.rules = [ + { condition: 'Done', next: 'COMPLETE' }, + { condition: 'Blocked', next: 'ABORT' }, + ]; + const context = createMinimalContext({ language: 'en' }); + + const result = buildInstruction(step, context); + + expect(result).toContain('Status Output Rules'); + }); + }); + describe('isReportObjectConfig', () => { it('should return true for ReportObjectConfig', () => { expect(isReportObjectConfig({ name: '00-plan.md' })).toBe(true); diff --git a/src/__tests__/parallel-and-loader.test.ts b/src/__tests__/parallel-and-loader.test.ts new file mode 100644 index 0000000..b6dfde7 --- /dev/null +++ b/src/__tests__/parallel-and-loader.test.ts @@ -0,0 +1,300 @@ +/** + * Tests for parallel step execution and ai() condition loader + * + * Covers: + * - Schema validation for parallel sub-steps + * - Workflow loader normalization of ai() conditions and parallel steps + * - Engine parallel step aggregation logic + */ + +import { describe, it, expect } from 'vitest'; +import { WorkflowConfigRawSchema, ParallelSubStepRawSchema, WorkflowStepRawSchema } from '../models/schemas.js'; + +describe('ParallelSubStepRawSchema', () => { + it('should validate a valid parallel sub-step', () => { + const raw = { + name: 'arch-review', + agent: '~/.takt/agents/default/reviewer.md', + instruction_template: 'Review architecture', + }; + + const result = ParallelSubStepRawSchema.safeParse(raw); + expect(result.success).toBe(true); + }); + + it('should reject a sub-step without agent', () => { + const raw = { + name: 'no-agent-step', + instruction_template: 'Do something', + }; + + const result = ParallelSubStepRawSchema.safeParse(raw); + expect(result.success).toBe(false); + }); + + it('should accept optional fields', () => { + const raw = { + name: 'full-sub-step', + agent: '~/.takt/agents/default/coder.md', + agent_name: 'Coder', + allowed_tools: ['Read', 'Grep'], + model: 'haiku', + edit: false, + instruction_template: 'Do work', + report: '01-report.md', + pass_previous_response: false, + }; + + const result = ParallelSubStepRawSchema.safeParse(raw); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.agent_name).toBe('Coder'); + expect(result.data.allowed_tools).toEqual(['Read', 'Grep']); + expect(result.data.edit).toBe(false); + } + }); + + it('should accept rules on sub-steps', () => { + const raw = { + name: 'reviewed', + agent: '~/.takt/agents/default/reviewer.md', + instruction_template: 'Review', + rules: [ + { condition: 'No issues', next: 'COMPLETE' }, + { condition: 'Issues found', next: 'fix' }, + ], + }; + + const result = ParallelSubStepRawSchema.safeParse(raw); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.rules).toHaveLength(2); + } + }); +}); + +describe('WorkflowStepRawSchema with parallel', () => { + it('should accept a step with parallel sub-steps (no agent)', () => { + const raw = { + name: 'parallel-review', + parallel: [ + { name: 'arch-review', agent: 'reviewer.md', instruction_template: 'Review arch' }, + { name: 'sec-review', agent: 'security.md', instruction_template: 'Review security' }, + ], + rules: [ + { condition: 'All pass', next: 'COMPLETE' }, + ], + }; + + const result = WorkflowStepRawSchema.safeParse(raw); + expect(result.success).toBe(true); + }); + + it('should reject a step with neither agent nor parallel', () => { + const raw = { + name: 'orphan-step', + instruction_template: 'Do something', + }; + + const result = WorkflowStepRawSchema.safeParse(raw); + expect(result.success).toBe(false); + }); + + it('should accept a step with agent (no parallel)', () => { + const raw = { + name: 'normal-step', + agent: 'coder.md', + instruction_template: 'Code something', + }; + + const result = WorkflowStepRawSchema.safeParse(raw); + expect(result.success).toBe(true); + }); + + it('should reject a step with empty parallel array', () => { + const raw = { + name: 'empty-parallel', + parallel: [], + }; + + const result = WorkflowStepRawSchema.safeParse(raw); + expect(result.success).toBe(false); + }); +}); + +describe('WorkflowConfigRawSchema with parallel steps', () => { + it('should validate a workflow with parallel step', () => { + const raw = { + name: 'test-parallel-workflow', + steps: [ + { + name: 'plan', + agent: 'planner.md', + rules: [{ condition: 'Plan complete', next: 'review' }], + }, + { + name: 'review', + parallel: [ + { name: 'arch-review', agent: 'arch-reviewer.md', instruction_template: 'Review architecture' }, + { name: 'sec-review', agent: 'sec-reviewer.md', instruction_template: 'Review security' }, + ], + rules: [ + { condition: 'All approved', next: 'COMPLETE' }, + { condition: 'Issues found', next: 'plan' }, + ], + }, + ], + initial_step: 'plan', + max_iterations: 10, + }; + + const result = WorkflowConfigRawSchema.safeParse(raw); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.steps).toHaveLength(2); + expect(result.data.steps[1].parallel).toHaveLength(2); + } + }); + + it('should validate a workflow mixing normal and parallel steps', () => { + const raw = { + name: 'mixed-workflow', + steps: [ + { name: 'plan', agent: 'planner.md', rules: [{ condition: 'Done', next: 'implement' }] }, + { name: 'implement', agent: 'coder.md', rules: [{ condition: 'Done', next: 'review' }] }, + { + name: 'review', + parallel: [ + { name: 'arch', agent: 'arch.md' }, + { name: 'sec', agent: 'sec.md' }, + ], + rules: [{ condition: 'All pass', next: 'COMPLETE' }], + }, + ], + initial_step: 'plan', + }; + + const result = WorkflowConfigRawSchema.safeParse(raw); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.steps[0].agent).toBe('planner.md'); + expect(result.data.steps[2].parallel).toHaveLength(2); + } + }); +}); + +describe('ai() condition in WorkflowRuleSchema', () => { + it('should accept ai() condition as a string', () => { + const raw = { + name: 'test-step', + agent: 'agent.md', + rules: [ + { condition: 'ai("All reviews approved")', next: 'COMPLETE' }, + { condition: 'ai("Issues detected")', next: 'fix' }, + ], + }; + + const result = WorkflowStepRawSchema.safeParse(raw); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.rules?.[0].condition).toBe('ai("All reviews approved")'); + expect(result.data.rules?.[1].condition).toBe('ai("Issues detected")'); + } + }); + + it('should accept mixed regular and ai() conditions', () => { + const raw = { + name: 'mixed-rules', + agent: 'agent.md', + rules: [ + { condition: 'Regular condition', next: 'step-a' }, + { condition: 'ai("AI evaluated condition")', next: 'step-b' }, + ], + }; + + const result = WorkflowStepRawSchema.safeParse(raw); + expect(result.success).toBe(true); + }); +}); + +describe('ai() condition regex parsing', () => { + // Test the regex pattern used in workflowLoader.ts + const AI_CONDITION_REGEX = /^ai\("(.+)"\)$/; + + it('should match simple ai() condition', () => { + const match = 'ai("No issues found")'.match(AI_CONDITION_REGEX); + expect(match).not.toBeNull(); + expect(match![1]).toBe('No issues found'); + }); + + it('should match ai() with Japanese text', () => { + const match = 'ai("全てのレビューが承認している場合")'.match(AI_CONDITION_REGEX); + expect(match).not.toBeNull(); + expect(match![1]).toBe('全てのレビューが承認している場合'); + }); + + it('should not match regular condition text', () => { + const match = 'No issues found'.match(AI_CONDITION_REGEX); + expect(match).toBeNull(); + }); + + it('should not match partial ai() pattern', () => { + expect('ai(missing quotes)'.match(AI_CONDITION_REGEX)).toBeNull(); + expect('ai("")'.match(AI_CONDITION_REGEX)).toBeNull(); // .+ requires at least 1 char + expect('not ai("text")'.match(AI_CONDITION_REGEX)).toBeNull(); // must start with ai( + expect('ai("text") extra'.match(AI_CONDITION_REGEX)).toBeNull(); // must end with ) + }); + + it('should match ai() with special characters in text', () => { + const match = 'ai("Issues found (critical/high severity)")'.match(AI_CONDITION_REGEX); + expect(match).not.toBeNull(); + expect(match![1]).toBe('Issues found (critical/high severity)'); + }); +}); + +describe('parallel step aggregation format', () => { + it('should aggregate sub-step outputs in the expected format', () => { + // Mirror the aggregation logic from engine.ts + const subResults = [ + { name: 'arch-review', content: 'Architecture looks good.\n## Result: APPROVE' }, + { name: 'sec-review', content: 'No security issues.\n## Result: APPROVE' }, + ]; + + const aggregatedContent = subResults + .map((r) => `## ${r.name}\n${r.content}`) + .join('\n\n---\n\n'); + + expect(aggregatedContent).toContain('## arch-review'); + expect(aggregatedContent).toContain('Architecture looks good.'); + expect(aggregatedContent).toContain('---'); + expect(aggregatedContent).toContain('## sec-review'); + expect(aggregatedContent).toContain('No security issues.'); + }); + + it('should handle single sub-step', () => { + const subResults = [ + { name: 'only-step', content: 'Single result' }, + ]; + + const aggregatedContent = subResults + .map((r) => `## ${r.name}\n${r.content}`) + .join('\n\n---\n\n'); + + expect(aggregatedContent).toBe('## only-step\nSingle result'); + expect(aggregatedContent).not.toContain('---'); + }); + + it('should handle empty content from sub-steps', () => { + const subResults = [ + { name: 'step-a', content: '' }, + { name: 'step-b', content: 'Has content' }, + ]; + + const aggregatedContent = subResults + .map((r) => `## ${r.name}\n${r.content}`) + .join('\n\n---\n\n'); + + expect(aggregatedContent).toContain('## step-a\n'); + expect(aggregatedContent).toContain('## step-b\nHas content'); + }); +}); diff --git a/src/claude/client.ts b/src/claude/client.ts index 75849f3..5aa6bbc 100644 --- a/src/claude/client.ts +++ b/src/claude/client.ts @@ -165,6 +165,80 @@ export async function callClaudeCustom( }; } +/** + * Detect judge rule index from [JUDGE:N] tag pattern. + * Returns 0-based rule index, or -1 if no match. + */ +export function detectJudgeIndex(content: string): number { + const regex = /\[JUDGE:(\d+)\]/i; + const match = content.match(regex); + if (match?.[1]) { + const index = Number.parseInt(match[1], 10) - 1; + return index >= 0 ? index : -1; + } + return -1; +} + +/** + * Build the prompt for the AI judge that evaluates agent output against ai() conditions. + */ +export function buildJudgePrompt( + agentOutput: string, + aiConditions: { index: number; text: string }[], +): string { + const conditionList = aiConditions + .map((c) => `| ${c.index + 1} | ${c.text} |`) + .join('\n'); + + return [ + '# Judge Task', + '', + 'You are a judge evaluating an agent\'s output against a set of conditions.', + 'Read the agent output below, then determine which condition best matches.', + '', + '## Agent Output', + '```', + agentOutput, + '```', + '', + '## Conditions', + '| # | Condition |', + '|---|-----------|', + conditionList, + '', + '## Instructions', + 'Output ONLY the tag `[JUDGE:N]` where N is the number of the best matching condition.', + 'Do not output anything else.', + ].join('\n'); +} + +/** + * Call AI judge to evaluate agent output against ai() conditions. + * Uses a lightweight model (haiku) for cost efficiency. + * Returns 0-based index of the matched ai() condition, or -1 if no match. + */ +export async function callAiJudge( + agentOutput: string, + aiConditions: { index: number; text: string }[], + options: { cwd: string }, +): Promise { + const prompt = buildJudgePrompt(agentOutput, aiConditions); + + const spawnOptions: ClaudeSpawnOptions = { + cwd: options.cwd, + model: 'haiku', + maxTurns: 1, + }; + + const result = await executeClaudeCli(prompt, spawnOptions); + if (!result.success) { + log.error('AI judge call failed', { error: result.error }); + return -1; + } + + return detectJudgeIndex(result.content); +} + /** Call a Claude Code built-in agent (using claude --agent flag if available) */ export async function callClaudeAgent( claudeAgentName: string, diff --git a/src/config/workflowLoader.ts b/src/config/workflowLoader.ts index 93c9f9a..0688512 100644 --- a/src/config/workflowLoader.ts +++ b/src/config/workflowLoader.ts @@ -120,6 +120,64 @@ function normalizeReport( ); } +/** Regex to detect ai("...") condition expressions */ +const AI_CONDITION_REGEX = /^ai\("(.+)"\)$/; + +/** + * Parse a rule's condition for ai() expressions. + * If condition is `ai("some text")`, sets isAiCondition and aiConditionText. + */ +function normalizeRule(r: { condition: string; next: string; appendix?: string }): WorkflowRule { + const match = r.condition.match(AI_CONDITION_REGEX); + if (match?.[1]) { + return { + condition: r.condition, + next: r.next, + appendix: r.appendix, + isAiCondition: true, + aiConditionText: match[1], + }; + } + return { + condition: r.condition, + next: r.next, + appendix: r.appendix, + }; +} + +// eslint-disable-next-line @typescript-eslint/no-explicit-any +type RawStep = any; + +/** + * Normalize a raw step into internal WorkflowStep format. + */ +function normalizeStepFromRaw(step: RawStep, workflowDir: string): WorkflowStep { + const rules: WorkflowRule[] | undefined = step.rules?.map(normalizeRule); + const agentSpec: string = step.agent ?? ''; + + const result: WorkflowStep = { + name: step.name, + agent: agentSpec, + agentDisplayName: step.agent_name || (agentSpec ? extractAgentDisplayName(agentSpec) : step.name), + agentPath: agentSpec ? resolveAgentPathForWorkflow(agentSpec, workflowDir) : undefined, + allowedTools: step.allowed_tools, + provider: step.provider, + model: step.model, + permissionMode: step.permission_mode, + edit: step.edit, + instructionTemplate: resolveContentPath(step.instruction_template, workflowDir) || step.instruction || '{task}', + rules, + report: normalizeReport(step.report, workflowDir), + passPreviousResponse: step.pass_previous_response ?? true, + }; + + if (step.parallel && step.parallel.length > 0) { + result.parallel = step.parallel.map((sub: RawStep) => normalizeStepFromRaw(sub, workflowDir)); + } + + return result; +} + /** * Convert raw YAML workflow config to internal format. * Agent paths are resolved relative to the workflow directory. @@ -127,29 +185,9 @@ function normalizeReport( function normalizeWorkflowConfig(raw: unknown, workflowDir: string): WorkflowConfig { const parsed = WorkflowConfigRawSchema.parse(raw); - const steps: WorkflowStep[] = parsed.steps.map((step) => { - const rules: WorkflowRule[] | undefined = step.rules?.map((r) => ({ - condition: r.condition, - next: r.next, - appendix: r.appendix, - })); - - return { - name: step.name, - agent: step.agent, - agentDisplayName: step.agent_name || extractAgentDisplayName(step.agent), - agentPath: resolveAgentPathForWorkflow(step.agent, workflowDir), - allowedTools: step.allowed_tools, - provider: step.provider, - model: step.model, - permissionMode: step.permission_mode, - edit: step.edit, - instructionTemplate: resolveContentPath(step.instruction_template, workflowDir) || step.instruction || '{task}', - rules, - report: normalizeReport(step.report, workflowDir), - passPreviousResponse: step.pass_previous_response, - }; - }); + const steps: WorkflowStep[] = parsed.steps.map((step) => + normalizeStepFromRaw(step, workflowDir), + ); return { name: parsed.name, diff --git a/src/models/schemas.ts b/src/models/schemas.ts index e1d7aab..4ec6ef8 100644 --- a/src/models/schemas.ts +++ b/src/models/schemas.ts @@ -81,10 +81,28 @@ export const WorkflowRuleSchema = z.object({ appendix: z.string().optional(), }); +/** Sub-step schema for parallel execution (agent is required) */ +export const ParallelSubStepRawSchema = z.object({ + name: z.string().min(1), + agent: z.string().min(1), + agent_name: z.string().optional(), + allowed_tools: z.array(z.string()).optional(), + provider: z.enum(['claude', 'codex', 'mock']).optional(), + model: z.string().optional(), + permission_mode: PermissionModeSchema.optional(), + edit: z.boolean().optional(), + instruction: z.string().optional(), + instruction_template: z.string().optional(), + rules: z.array(WorkflowRuleSchema).optional(), + report: ReportFieldSchema.optional(), + pass_previous_response: z.boolean().optional().default(true), +}); + /** Workflow step schema - raw YAML format */ export const WorkflowStepRawSchema = z.object({ name: z.string().min(1), - agent: z.string().min(1), + /** Agent is required for normal steps, optional for parallel container steps */ + agent: z.string().optional(), /** Display name for the agent (shown in output). Falls back to agent basename if not specified */ agent_name: z.string().optional(), allowed_tools: z.array(z.string()).optional(), @@ -101,7 +119,12 @@ export const WorkflowStepRawSchema = z.object({ /** Report file(s) for this step */ report: ReportFieldSchema.optional(), pass_previous_response: z.boolean().optional().default(true), -}); + /** Sub-steps to execute in parallel */ + parallel: z.array(ParallelSubStepRawSchema).optional(), +}).refine( + (data) => data.agent || (data.parallel && data.parallel.length > 0), + { message: 'Step must have either an agent or parallel sub-steps' }, +); /** Workflow configuration schema - raw YAML format */ export const WorkflowConfigRawSchema = z.object({ diff --git a/src/models/types.ts b/src/models/types.ts index 8bce74c..88efc90 100644 --- a/src/models/types.ts +++ b/src/models/types.ts @@ -48,6 +48,10 @@ export interface WorkflowRule { next: string; /** Template for additional AI output */ appendix?: string; + /** Whether this condition uses ai() expression (set by loader) */ + isAiCondition?: boolean; + /** The condition text inside ai("...") for AI judge evaluation (set by loader) */ + aiConditionText?: string; } /** Report file configuration for a workflow step (label: path pair) */ @@ -96,6 +100,8 @@ export interface WorkflowStep { /** Report file configuration. Single string, array of label:path, or object with order/format. */ report?: string | ReportConfig[] | ReportObjectConfig; passPreviousResponse: boolean; + /** Sub-steps to execute in parallel. When set, this step runs all sub-steps concurrently. */ + parallel?: WorkflowStep[]; } /** Loop detection configuration */ diff --git a/src/workflow/engine.ts b/src/workflow/engine.ts index 6e311e8..ae11961 100644 --- a/src/workflow/engine.ts +++ b/src/workflow/engine.ts @@ -15,7 +15,7 @@ import { runAgent, type RunAgentOptions } from '../agents/runner.js'; import { COMPLETE_STEP, ABORT_STEP, ERROR_MESSAGES } from './constants.js'; import type { WorkflowEngineOptions } from './types.js'; import { determineNextStepByRules } from './transitions.js'; -import { detectRuleIndex } from '../claude/client.js'; +import { detectRuleIndex, callAiJudge } from '../claude/client.js'; import { buildInstruction as buildInstructionFromTemplate, isReportObjectConfig } from './instruction-builder.js'; import { LoopDetector } from './loop-detector.js'; import { handleBlocked } from './blocked-handler.js'; @@ -196,22 +196,19 @@ export class WorkflowEngine extends EventEmitter { } } - /** Run a single step */ + /** Run a single step (delegates to runParallelStep if step has parallel sub-steps) */ private async runStep(step: WorkflowStep): Promise<{ response: AgentResponse; instruction: string }> { - const stepIteration = incrementStepIteration(this.state, step.name); - const instruction = this.buildInstruction(step, stepIteration); - const sessionId = this.state.agentSessions.get(step.agent); - log.debug('Running step', { - step: step.name, - agent: step.agent, - stepIteration, - iteration: this.state.iteration, - sessionId: sessionId ?? 'new', - }); + if (step.parallel && step.parallel.length > 0) { + return this.runParallelStep(step); + } + return this.runNormalStep(step); + } - const agentOptions: RunAgentOptions = { + /** Build RunAgentOptions from a step's configuration */ + private buildAgentOptions(step: WorkflowStep): RunAgentOptions { + return { cwd: this.cwd, - sessionId, + sessionId: this.state.agentSessions.get(step.agent), agentPath: step.agentPath, allowedTools: step.allowedTools, provider: step.provider, @@ -222,23 +219,61 @@ export class WorkflowEngine extends EventEmitter { onAskUserQuestion: this.options.onAskUserQuestion, bypassPermissions: this.options.bypassPermissions, }; + } - let response = await runAgent(step.agent, instruction, agentOptions); + /** Update agent session and notify via callback if session changed */ + private updateAgentSession(agent: string, sessionId: string | undefined): void { + if (!sessionId) return; - if (response.sessionId) { - const previousSessionId = this.state.agentSessions.get(step.agent); - this.state.agentSessions.set(step.agent, response.sessionId); + const previousSessionId = this.state.agentSessions.get(agent); + this.state.agentSessions.set(agent, sessionId); - if (this.options.onSessionUpdate && response.sessionId !== previousSessionId) { - this.options.onSessionUpdate(step.agent, response.sessionId); - } + if (this.options.onSessionUpdate && sessionId !== previousSessionId) { + this.options.onSessionUpdate(agent, sessionId); + } + } + + /** + * Detect matched rule for a step's response. + * 1. Try standard [STEP:N] tag detection + * 2. Fallback to ai() condition evaluation via AI judge + */ + private async detectMatchedRule(step: WorkflowStep, content: string): Promise { + if (!step.rules || step.rules.length === 0) return undefined; + + const ruleIndex = detectRuleIndex(content, step.name); + if (ruleIndex >= 0 && ruleIndex < step.rules.length) { + return ruleIndex; } - if (step.rules && step.rules.length > 0) { - const ruleIndex = detectRuleIndex(response.content, step.name); - if (ruleIndex >= 0 && ruleIndex < step.rules.length) { - response = { ...response, matchedRuleIndex: ruleIndex }; - } + const aiRuleIndex = await this.evaluateAiConditions(step, content); + if (aiRuleIndex >= 0) { + return aiRuleIndex; + } + + return undefined; + } + + /** Run a normal (non-parallel) step */ + private async runNormalStep(step: WorkflowStep): Promise<{ response: AgentResponse; instruction: string }> { + const stepIteration = incrementStepIteration(this.state, step.name); + const instruction = this.buildInstruction(step, stepIteration); + log.debug('Running step', { + step: step.name, + agent: step.agent, + stepIteration, + iteration: this.state.iteration, + sessionId: this.state.agentSessions.get(step.agent) ?? 'new', + }); + + const agentOptions = this.buildAgentOptions(step); + let response = await runAgent(step.agent, instruction, agentOptions); + + this.updateAgentSession(step.agent, response.sessionId); + + const matchedRuleIndex = await this.detectMatchedRule(step, response.content); + if (matchedRuleIndex != null) { + response = { ...response, matchedRuleIndex }; } this.state.stepOutputs.set(step.name, response); @@ -246,6 +281,110 @@ export class WorkflowEngine extends EventEmitter { return { response, instruction }; } + /** + * Run a parallel step: execute all sub-steps concurrently, then aggregate results. + * The aggregated output becomes the parent step's response for rules evaluation. + */ + private async runParallelStep(step: WorkflowStep): Promise<{ response: AgentResponse; instruction: string }> { + const subSteps = step.parallel!; + const stepIteration = incrementStepIteration(this.state, step.name); + log.debug('Running parallel step', { + step: step.name, + subSteps: subSteps.map(s => s.name), + stepIteration, + }); + + // Run all sub-steps concurrently + const subResults = await Promise.all( + subSteps.map(async (subStep) => { + const subIteration = incrementStepIteration(this.state, subStep.name); + const subInstruction = this.buildInstruction(subStep, subIteration); + + const agentOptions = this.buildAgentOptions(subStep); + const subResponse = await runAgent(subStep.agent, subInstruction, agentOptions); + + this.updateAgentSession(subStep.agent, subResponse.sessionId); + + // Detect sub-step rule matches (tag detection + ai() fallback) + const matchedRuleIndex = await this.detectMatchedRule(subStep, subResponse.content); + const finalResponse = matchedRuleIndex != null + ? { ...subResponse, matchedRuleIndex } + : subResponse; + + this.state.stepOutputs.set(subStep.name, finalResponse); + this.emitStepReports(subStep); + + return { subStep, response: finalResponse, instruction: subInstruction }; + }), + ); + + // Aggregate sub-step outputs into parent step's response + const aggregatedContent = subResults + .map((r) => `## ${r.subStep.name}\n${r.response.content}`) + .join('\n\n---\n\n'); + + const aggregatedInstruction = subResults + .map((r) => r.instruction) + .join('\n\n'); + + // Evaluate parent step's rules against aggregated output + const matchedRuleIndex = await this.detectMatchedRule(step, aggregatedContent); + + const aggregatedResponse: AgentResponse = { + agent: step.name, + status: 'done', + content: aggregatedContent, + timestamp: new Date(), + ...(matchedRuleIndex != null && { matchedRuleIndex }), + }; + + this.state.stepOutputs.set(step.name, aggregatedResponse); + this.emitStepReports(step); + return { response: aggregatedResponse, instruction: aggregatedInstruction }; + } + + /** + * Evaluate ai() conditions via AI judge. + * Collects all ai() rules, calls the judge, and maps the result back to the original rule index. + * Returns the 0-based rule index in the step's rules array, or -1 if no match. + */ + private async evaluateAiConditions(step: WorkflowStep, agentOutput: string): Promise { + if (!step.rules) return -1; + + const aiConditions: { index: number; text: string }[] = []; + for (let i = 0; i < step.rules.length; i++) { + const rule = step.rules[i]!; + if (rule.isAiCondition && rule.aiConditionText) { + aiConditions.push({ index: i, text: rule.aiConditionText }); + } + } + + if (aiConditions.length === 0) return -1; + + log.debug('Evaluating ai() conditions via judge', { + step: step.name, + conditionCount: aiConditions.length, + }); + + // Remap: judge returns 0-based index within aiConditions array + const judgeConditions = aiConditions.map((c, i) => ({ index: i, text: c.text })); + const judgeResult = await callAiJudge(agentOutput, judgeConditions, { cwd: this.cwd }); + + if (judgeResult >= 0 && judgeResult < aiConditions.length) { + const matched = aiConditions[judgeResult]!; + log.debug('AI judge matched condition', { + step: step.name, + judgeResult, + originalRuleIndex: matched.index, + condition: matched.text, + }); + return matched.index; + } + + log.debug('AI judge did not match any condition', { step: step.name }); + return -1; + } + /** * Determine next step for a completed step using rules-based routing. */ diff --git a/src/workflow/instruction-builder.ts b/src/workflow/instruction-builder.ts index cb952c5..71abf8d 100644 --- a/src/workflow/instruction-builder.ts +++ b/src/workflow/instruction-builder.ts @@ -494,10 +494,14 @@ export function buildInstruction( } // 7. Status rules (auto-generated from rules) + // Skip when ALL rules are ai() conditions — agent doesn't need to output status tags if (step.rules && step.rules.length > 0) { - const statusHeader = renderStatusRulesHeader(language); - const generatedPrompt = generateStatusRulesFromRules(step.name, step.rules, language); - sections.push(`${statusHeader}\n${generatedPrompt}`); + const allAiConditions = step.rules.every((r) => r.isAiCondition); + if (!allAiConditions) { + const statusHeader = renderStatusRulesHeader(language); + const generatedPrompt = generateStatusRulesFromRules(step.name, step.rules, language); + sections.push(`${statusHeader}\n${generatedPrompt}`); + } } return sections.join('\n\n'); From 9c597a9b0d1b39e7f00755b9eb35bcab8b33aa2e Mon Sep 17 00:00:00 2001 From: nrslib <38722970+nrslib@users.noreply.github.com> Date: Fri, 30 Jan 2026 15:26:56 +0900 Subject: [PATCH 2/6] =?UTF-8?q?=E3=83=AC=E3=83=9D=E3=83=BC=E3=83=88?= =?UTF-8?q?=E5=87=BA=E5=8A=9B=E3=82=92=E3=83=95=E3=82=A7=E3=83=BC=E3=82=BA?= =?UTF-8?q?2=E3=81=AB=E5=88=86=E9=9B=A2=E3=81=97=E3=80=81=E6=9C=AC?= =?UTF-8?q?=E4=BD=93=E5=AE=9F=E8=A1=8C=E3=81=8B=E3=82=89Write=E3=82=92?= =?UTF-8?q?=E9=99=A4=E5=A4=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ステップ実行を2フェーズに分離: - フェーズ1(本体): allowed_toolsからWriteを除外、レポート情報を注入しない - フェーズ2(レポート出力): 同一セッションresume、Writeのみ付与、ステータス検出なし buildInstruction()からレポート関連コードを削除し、 buildReportInstruction()を新設してレポート出力の責務を完全分離。 --- src/__tests__/instructionBuilder.test.ts | 432 +++++++++++------------ src/agents/runner.ts | 6 + src/providers/claude.ts | 2 + src/providers/index.ts | 2 + src/workflow/engine.ts | 68 +++- src/workflow/instruction-builder.ts | 177 ++++++++-- 6 files changed, 419 insertions(+), 268 deletions(-) diff --git a/src/__tests__/instructionBuilder.test.ts b/src/__tests__/instructionBuilder.test.ts index c7a5ecc..d168624 100644 --- a/src/__tests__/instructionBuilder.test.ts +++ b/src/__tests__/instructionBuilder.test.ts @@ -5,12 +5,14 @@ import { describe, it, expect } from 'vitest'; import { buildInstruction, + buildReportInstruction, buildExecutionMetadata, renderExecutionMetadata, renderStatusRulesHeader, generateStatusRulesFromRules, isReportObjectConfig, type InstructionContext, + type ReportInstructionContext, } from '../workflow/instruction-builder.js'; import type { WorkflowStep, WorkflowRule } from '../models/types.js'; @@ -444,7 +446,7 @@ describe('instruction-builder', () => { expect(result).toContain('- Step: implement'); }); - it('should include single report file when report is a string', () => { + it('should NOT include report info even when step has report (phase separation)', () => { const step = createMinimalStep('Do work'); step.name = 'plan'; step.report = '00-plan.md'; @@ -455,14 +457,13 @@ describe('instruction-builder', () => { const result = buildInstruction(step, context); - expect(result).toContain('- Report Directory: 20260129-test/'); - expect(result).toContain('- Report File: 20260129-test/00-plan.md'); - expect(result).not.toContain('Report Files:'); + expect(result).toContain('## Workflow Context'); + expect(result).not.toContain('Report Directory'); + expect(result).not.toContain('Report File'); }); - it('should include multiple report files when report is ReportConfig[]', () => { + it('should NOT include report info for ReportConfig[] (phase separation)', () => { const step = createMinimalStep('Do work'); - step.name = 'implement'; step.report = [ { label: 'Scope', path: '01-scope.md' }, { label: 'Decisions', path: '02-decisions.md' }, @@ -474,16 +475,12 @@ describe('instruction-builder', () => { const result = buildInstruction(step, context); - expect(result).toContain('- Report Directory: 20260129-test/'); - expect(result).toContain('- Report Files:'); - expect(result).toContain(' - Scope: 20260129-test/01-scope.md'); - expect(result).toContain(' - Decisions: 20260129-test/02-decisions.md'); - expect(result).not.toContain('Report File:'); + expect(result).not.toContain('Report Directory'); + expect(result).not.toContain('Report Files'); }); - it('should include report file when report is ReportObjectConfig', () => { + it('should NOT include report info for ReportObjectConfig (phase separation)', () => { const step = createMinimalStep('Do work'); - step.name = 'plan'; step.report = { name: '00-plan.md' }; const context = createMinimalContext({ reportDir: '20260129-test', @@ -492,33 +489,6 @@ describe('instruction-builder', () => { const result = buildInstruction(step, context); - expect(result).toContain('- Report Directory: 20260129-test/'); - expect(result).toContain('- Report File: 20260129-test/00-plan.md'); - expect(result).not.toContain('Report Files:'); - }); - - it('should NOT include report info when reportDir is undefined', () => { - const step = createMinimalStep('Do work'); - step.report = '00-plan.md'; - const context = createMinimalContext({ language: 'en' }); - - const result = buildInstruction(step, context); - - expect(result).toContain('## Workflow Context'); - expect(result).not.toContain('Report Directory'); - expect(result).not.toContain('Report File'); - }); - - it('should NOT include report info when step has no report', () => { - const step = createMinimalStep('Do work'); - const context = createMinimalContext({ - reportDir: '20260129-test', - language: 'en', - }); - - const result = buildInstruction(step, context); - - expect(result).toContain('## Workflow Context'); expect(result).not.toContain('Report Directory'); expect(result).not.toContain('Report File'); }); @@ -534,99 +504,10 @@ describe('instruction-builder', () => { expect(result).toContain('- Step Iteration: 3(このステップの実行回数)'); }); - - it('should NOT include .takt/reports/ prefix in report paths', () => { - const step = createMinimalStep('Do work'); - step.report = '00-plan.md'; - const context = createMinimalContext({ - reportDir: '20260129-test', - language: 'en', - }); - - const result = buildInstruction(step, context); - - expect(result).not.toContain('.takt/reports/'); - }); }); - describe('ReportObjectConfig order/format injection', () => { - it('should inject order before instruction_template', () => { - const step = createMinimalStep('Do work'); - step.report = { - name: '00-plan.md', - order: '**Output:** Write to {report:00-plan.md}', - }; - const context = createMinimalContext({ - reportDir: '20260129-test', - language: 'en', - }); - - const result = buildInstruction(step, context); - - const orderIdx = result.indexOf('**Output:** Write to 20260129-test/00-plan.md'); - const instructionsIdx = result.indexOf('## Instructions'); - expect(orderIdx).toBeGreaterThan(-1); - expect(instructionsIdx).toBeGreaterThan(orderIdx); - }); - - it('should inject format after instruction_template', () => { - const step = createMinimalStep('Do work'); - step.report = { - name: '00-plan.md', - format: '**Format:**\n```markdown\n# Plan\n```', - }; - const context = createMinimalContext({ - reportDir: '20260129-test', - language: 'en', - }); - - const result = buildInstruction(step, context); - - const instructionsIdx = result.indexOf('## Instructions'); - const formatIdx = result.indexOf('**Format:**'); - expect(formatIdx).toBeGreaterThan(instructionsIdx); - }); - - it('should inject both order before and format after instruction_template', () => { - const step = createMinimalStep('Do work'); - step.report = { - name: '00-plan.md', - order: '**Output:** Write to {report:00-plan.md}', - format: '**Format:**\n```markdown\n# Plan\n```', - }; - const context = createMinimalContext({ - reportDir: '20260129-test', - language: 'en', - }); - - const result = buildInstruction(step, context); - - const orderIdx = result.indexOf('**Output:** Write to 20260129-test/00-plan.md'); - const instructionsIdx = result.indexOf('## Instructions'); - const formatIdx = result.indexOf('**Format:**'); - expect(orderIdx).toBeGreaterThan(-1); - expect(instructionsIdx).toBeGreaterThan(orderIdx); - expect(formatIdx).toBeGreaterThan(instructionsIdx); - }); - - it('should replace {report:filename} in order text', () => { - const step = createMinimalStep('Do work'); - step.report = { - name: '00-plan.md', - order: 'Output to {report:00-plan.md} file.', - }; - const context = createMinimalContext({ - reportDir: '20260129-test', - language: 'en', - }); - - const result = buildInstruction(step, context); - - expect(result).toContain('Output to 20260129-test/00-plan.md file.'); - expect(result).not.toContain('{report:00-plan.md}'); - }); - - it('should auto-inject report output instruction when report is a simple string', () => { + describe('buildInstruction report-free (phase separation)', () => { + it('should NOT include report output instruction in buildInstruction', () => { const step = createMinimalStep('Do work'); step.report = '00-plan.md'; const context = createMinimalContext({ @@ -636,20 +517,14 @@ describe('instruction-builder', () => { const result = buildInstruction(step, context); - // Auto-generated report output instruction should be injected before ## Instructions - expect(result).toContain('**Report output:** Output to the `Report File` specified above.'); - expect(result).toContain('- If file does not exist: Create new file'); - const reportIdx = result.indexOf('**Report output:**'); - const instructionsIdx = result.indexOf('## Instructions'); - expect(reportIdx).toBeGreaterThan(-1); - expect(instructionsIdx).toBeGreaterThan(reportIdx); + expect(result).not.toContain('**Report output:**'); + expect(result).not.toContain('Report File'); + expect(result).not.toContain('Report Directory'); }); - it('should auto-inject report output instruction when report is ReportConfig[]', () => { + it('should NOT include report format in buildInstruction', () => { const step = createMinimalStep('Do work'); - step.report = [ - { label: 'Scope', path: '01-scope.md' }, - ]; + step.report = { name: '00-plan.md', format: '**Format:**\n# Plan' }; const context = createMinimalContext({ reportDir: '20260129-test', language: 'en', @@ -657,84 +532,10 @@ describe('instruction-builder', () => { const result = buildInstruction(step, context); - // Auto-generated multi-file report output instruction - expect(result).toContain('**Report output:** Output to the `Report Files` specified above.'); - expect(result).toContain('- If file does not exist: Create new file'); + expect(result).not.toContain('**Format:**'); }); - it('should replace {report:filename} in instruction_template too', () => { - const step = createMinimalStep('Write to {report:00-plan.md}'); - const context = createMinimalContext({ - reportDir: '20260129-test', - language: 'en', - }); - - const result = buildInstruction(step, context); - - expect(result).toContain('Write to 20260129-test/00-plan.md'); - expect(result).not.toContain('{report:00-plan.md}'); - }); - - it('should replace {step_iteration} in order/format text', () => { - const step = createMinimalStep('Do work'); - step.report = { - name: '00-plan.md', - order: 'Append ## Iteration {step_iteration} section', - }; - const context = createMinimalContext({ - reportDir: '20260129-test', - stepIteration: 3, - language: 'en', - }); - - const result = buildInstruction(step, context); - - expect(result).toContain('Append ## Iteration 3 section'); - }); - - it('should auto-inject Japanese report output instruction for ja language', () => { - const step = createMinimalStep('作業する'); - step.report = { name: '00-plan.md' }; - const context = createMinimalContext({ - reportDir: '20260129-test', - language: 'ja', - }); - - const result = buildInstruction(step, context); - - expect(result).toContain('**レポート出力:** `Report File` に出力してください。'); - expect(result).toContain('- ファイルが存在しない場合: 新規作成'); - expect(result).toContain('- ファイルが存在する場合: `## Iteration 1` セクションを追記'); - }); - - it('should auto-inject Japanese multi-file report output instruction', () => { - const step = createMinimalStep('作業する'); - step.report = [{ label: 'Scope', path: '01-scope.md' }]; - const context = createMinimalContext({ - reportDir: '20260129-test', - language: 'ja', - }); - - const result = buildInstruction(step, context); - - expect(result).toContain('**レポート出力:** Report Files に出力してください。'); - }); - - it('should replace {step_iteration} in auto-generated report output instruction', () => { - const step = createMinimalStep('Do work'); - step.report = '00-plan.md'; - const context = createMinimalContext({ - reportDir: '20260129-test', - stepIteration: 5, - language: 'en', - }); - - const result = buildInstruction(step, context); - - expect(result).toContain('Append with `## Iteration 5` section'); - }); - - it('should prefer explicit order over auto-generated report instruction', () => { + it('should NOT include report order in buildInstruction', () => { const step = createMinimalStep('Do work'); step.report = { name: '00-plan.md', @@ -747,13 +548,11 @@ describe('instruction-builder', () => { const result = buildInstruction(step, context); - expect(result).toContain('Custom order instruction'); - expect(result).not.toContain('**Report output:**'); + expect(result).not.toContain('Custom order instruction'); }); - it('should auto-inject report output for ReportObjectConfig without order', () => { - const step = createMinimalStep('Do work'); - step.report = { name: '00-plan.md', format: '# Plan' }; + it('should still replace {report:filename} in instruction_template', () => { + const step = createMinimalStep('Write to {report:00-plan.md}'); const context = createMinimalContext({ reportDir: '20260129-test', language: 'en', @@ -761,20 +560,195 @@ describe('instruction-builder', () => { const result = buildInstruction(step, context); - expect(result).toContain('**Report output:** Output to the `Report File` specified above.'); + expect(result).toContain('Write to 20260129-test/00-plan.md'); + expect(result).not.toContain('{report:00-plan.md}'); }); + }); - it('should NOT inject report output when no reportDir', () => { + describe('buildReportInstruction (phase 2)', () => { + function createReportContext(overrides: Partial = {}): ReportInstructionContext { + return { + cwd: '/project', + reportDir: '20260129-test', + stepIteration: 1, + language: 'en', + ...overrides, + }; + } + + it('should include execution context with working directory', () => { const step = createMinimalStep('Do work'); step.report = '00-plan.md'; - const context = createMinimalContext({ - language: 'en', - }); + const ctx = createReportContext({ cwd: '/my/project' }); - const result = buildInstruction(step, context); + const result = buildReportInstruction(step, ctx); + expect(result).toContain('Working Directory: /my/project'); + }); + + it('should include no-source-edit rule in execution rules', () => { + const step = createMinimalStep('Do work'); + step.report = '00-plan.md'; + const ctx = createReportContext(); + + const result = buildReportInstruction(step, ctx); + + expect(result).toContain('Do NOT modify project source files'); + }); + + it('should include no-commit and no-cd rules', () => { + const step = createMinimalStep('Do work'); + step.report = '00-plan.md'; + const ctx = createReportContext(); + + const result = buildReportInstruction(step, ctx); + + expect(result).toContain('Do NOT run git commit'); + expect(result).toContain('Do NOT use `cd`'); + }); + + it('should include report directory and file for string report', () => { + const step = createMinimalStep('Do work'); + step.report = '00-plan.md'; + const ctx = createReportContext({ reportDir: '20260130-test' }); + + const result = buildReportInstruction(step, ctx); + + expect(result).toContain('- Report Directory: 20260130-test/'); + expect(result).toContain('- Report File: 20260130-test/00-plan.md'); + }); + + it('should include report files for ReportConfig[] report', () => { + const step = createMinimalStep('Do work'); + step.report = [ + { label: 'Scope', path: '01-scope.md' }, + { label: 'Decisions', path: '02-decisions.md' }, + ]; + const ctx = createReportContext(); + + const result = buildReportInstruction(step, ctx); + + expect(result).toContain('- Report Directory: 20260129-test/'); + expect(result).toContain('- Report Files:'); + expect(result).toContain(' - Scope: 20260129-test/01-scope.md'); + expect(result).toContain(' - Decisions: 20260129-test/02-decisions.md'); + }); + + it('should include report file for ReportObjectConfig report', () => { + const step = createMinimalStep('Do work'); + step.report = { name: '00-plan.md' }; + const ctx = createReportContext(); + + const result = buildReportInstruction(step, ctx); + + expect(result).toContain('- Report File: 20260129-test/00-plan.md'); + }); + + it('should include auto-generated report output instruction', () => { + const step = createMinimalStep('Do work'); + step.report = '00-plan.md'; + const ctx = createReportContext(); + + const result = buildReportInstruction(step, ctx); + + expect(result).toContain('**Report output:** Output to the `Report File` specified above.'); + expect(result).toContain('- If file does not exist: Create new file'); + expect(result).toContain('Append with `## Iteration 1` section'); + }); + + it('should include explicit order instead of auto-generated', () => { + const step = createMinimalStep('Do work'); + step.report = { + name: '00-plan.md', + order: 'Output to {report:00-plan.md} file.', + }; + const ctx = createReportContext(); + + const result = buildReportInstruction(step, ctx); + + expect(result).toContain('Output to 20260129-test/00-plan.md file.'); expect(result).not.toContain('**Report output:**'); }); + + it('should include format from ReportObjectConfig', () => { + const step = createMinimalStep('Do work'); + step.report = { + name: '00-plan.md', + format: '**Format:**\n```markdown\n# Plan\n```', + }; + const ctx = createReportContext(); + + const result = buildReportInstruction(step, ctx); + + expect(result).toContain('**Format:**'); + expect(result).toContain('# Plan'); + }); + + it('should replace {step_iteration} in report output instruction', () => { + const step = createMinimalStep('Do work'); + step.report = '00-plan.md'; + const ctx = createReportContext({ stepIteration: 5 }); + + const result = buildReportInstruction(step, ctx); + + expect(result).toContain('Append with `## Iteration 5` section'); + }); + + it('should include instruction body text', () => { + const step = createMinimalStep('Do work'); + step.report = '00-plan.md'; + const ctx = createReportContext(); + + const result = buildReportInstruction(step, ctx); + + expect(result).toContain('## Instructions'); + expect(result).toContain('Output the results of your previous work as a report'); + }); + + it('should NOT include user request, previous response, or status rules', () => { + const step = createMinimalStep('Do work'); + step.report = '00-plan.md'; + step.rules = [ + { condition: 'Done', next: 'COMPLETE' }, + ]; + const ctx = createReportContext(); + + const result = buildReportInstruction(step, ctx); + + expect(result).not.toContain('User Request'); + expect(result).not.toContain('Previous Response'); + expect(result).not.toContain('Additional User Inputs'); + expect(result).not.toContain('Status Output Rules'); + }); + + it('should render Japanese report instruction', () => { + const step = createMinimalStep('作業する'); + step.report = { name: '00-plan.md' }; + const ctx = createReportContext({ language: 'ja' }); + + const result = buildReportInstruction(step, ctx); + + expect(result).toContain('前のステップの作業結果をレポートとして出力してください'); + expect(result).toContain('プロジェクトのソースファイルを変更しないでください'); + expect(result).toContain('**レポート出力:** `Report File` に出力してください。'); + }); + + it('should throw error when step has no report config', () => { + const step = createMinimalStep('Do work'); + const ctx = createReportContext(); + + expect(() => buildReportInstruction(step, ctx)).toThrow('no report config'); + }); + + it('should include multi-file report output instruction for ReportConfig[]', () => { + const step = createMinimalStep('Do work'); + step.report = [{ label: 'Scope', path: '01-scope.md' }]; + const ctx = createReportContext(); + + const result = buildReportInstruction(step, ctx); + + expect(result).toContain('**Report output:** Output to the `Report Files` specified above.'); + }); }); describe('auto-injected User Request and Additional User Inputs sections', () => { diff --git a/src/agents/runner.ts b/src/agents/runner.ts index 62c4c40..531898d 100644 --- a/src/agents/runner.ts +++ b/src/agents/runner.ts @@ -31,6 +31,8 @@ export interface RunAgentOptions { agentPath?: string; /** Allowed tools for this agent run */ allowedTools?: string[]; + /** Maximum number of agentic turns */ + maxTurns?: number; /** Permission mode for tool execution (from workflow step) */ permissionMode?: PermissionMode; onStream?: StreamCallback; @@ -82,6 +84,7 @@ export async function runCustomAgent( cwd: options.cwd, sessionId: options.sessionId, allowedTools, + maxTurns: options.maxTurns, model: resolveModel(options.cwd, options, agentConfig), permissionMode: options.permissionMode, onStream: options.onStream, @@ -98,6 +101,7 @@ export async function runCustomAgent( cwd: options.cwd, sessionId: options.sessionId, allowedTools, + maxTurns: options.maxTurns, model: resolveModel(options.cwd, options, agentConfig), permissionMode: options.permissionMode, onStream: options.onStream, @@ -118,6 +122,7 @@ export async function runCustomAgent( cwd: options.cwd, sessionId: options.sessionId, allowedTools, + maxTurns: options.maxTurns, model: resolveModel(options.cwd, options, agentConfig), permissionMode: options.permissionMode, onStream: options.onStream, @@ -195,6 +200,7 @@ export async function runAgent( cwd: options.cwd, sessionId: options.sessionId, allowedTools: options.allowedTools, + maxTurns: options.maxTurns, model: resolveModel(options.cwd, options), systemPrompt, permissionMode: options.permissionMode, diff --git a/src/providers/claude.ts b/src/providers/claude.ts index e60ef74..9c8aec3 100644 --- a/src/providers/claude.ts +++ b/src/providers/claude.ts @@ -14,6 +14,7 @@ export class ClaudeProvider implements Provider { sessionId: options.sessionId, allowedTools: options.allowedTools, model: options.model, + maxTurns: options.maxTurns, systemPrompt: options.systemPrompt, permissionMode: options.permissionMode, onStream: options.onStream, @@ -31,6 +32,7 @@ export class ClaudeProvider implements Provider { sessionId: options.sessionId, allowedTools: options.allowedTools, model: options.model, + maxTurns: options.maxTurns, permissionMode: options.permissionMode, onStream: options.onStream, onPermissionRequest: options.onPermissionRequest, diff --git a/src/providers/index.ts b/src/providers/index.ts index c8d9c7f..5b013d0 100644 --- a/src/providers/index.ts +++ b/src/providers/index.ts @@ -18,6 +18,8 @@ export interface ProviderCallOptions { model?: string; systemPrompt?: string; allowedTools?: string[]; + /** Maximum number of agentic turns */ + maxTurns?: number; /** Permission mode for tool execution (from workflow step) */ permissionMode?: PermissionMode; onStream?: StreamCallback; diff --git a/src/workflow/engine.ts b/src/workflow/engine.ts index ae11961..34a5c43 100644 --- a/src/workflow/engine.ts +++ b/src/workflow/engine.ts @@ -16,7 +16,7 @@ import { COMPLETE_STEP, ABORT_STEP, ERROR_MESSAGES } from './constants.js'; import type { WorkflowEngineOptions } from './types.js'; import { determineNextStepByRules } from './transitions.js'; import { detectRuleIndex, callAiJudge } from '../claude/client.js'; -import { buildInstruction as buildInstructionFromTemplate, isReportObjectConfig } from './instruction-builder.js'; +import { buildInstruction as buildInstructionFromTemplate, buildReportInstruction as buildReportInstructionFromTemplate, isReportObjectConfig } from './instruction-builder.js'; import { LoopDetector } from './loop-detector.js'; import { handleBlocked } from './blocked-handler.js'; import { @@ -206,11 +206,16 @@ export class WorkflowEngine extends EventEmitter { /** Build RunAgentOptions from a step's configuration */ private buildAgentOptions(step: WorkflowStep): RunAgentOptions { + // Phase 1: exclude Write from allowedTools when step has report config + const allowedTools = step.report + ? step.allowedTools?.filter((t) => t !== 'Write') + : step.allowedTools; + return { cwd: this.cwd, sessionId: this.state.agentSessions.get(step.agent), agentPath: step.agentPath, - allowedTools: step.allowedTools, + allowedTools, provider: step.provider, model: step.model, permissionMode: step.permissionMode, @@ -266,11 +271,17 @@ export class WorkflowEngine extends EventEmitter { sessionId: this.state.agentSessions.get(step.agent) ?? 'new', }); + // Phase 1: main execution (Write excluded if step has report) const agentOptions = this.buildAgentOptions(step); let response = await runAgent(step.agent, instruction, agentOptions); - this.updateAgentSession(step.agent, response.sessionId); + // Phase 2: report output (resume same session, Write only) + if (step.report) { + await this.runReportPhase(step, stepIteration); + } + + // Status detection uses phase 1 response const matchedRuleIndex = await this.detectMatchedRule(step, response.content); if (matchedRuleIndex != null) { response = { ...response, matchedRuleIndex }; @@ -281,6 +292,50 @@ export class WorkflowEngine extends EventEmitter { return { response, instruction }; } + /** + * Phase 2: Report output. + * Resumes the agent session with Write-only tools to output reports. + * The response is discarded — only sessionId is updated. + */ + private async runReportPhase(step: WorkflowStep, stepIteration: number): Promise { + const sessionId = this.state.agentSessions.get(step.agent); + if (!sessionId) { + log.debug('Skipping report phase: no sessionId to resume', { step: step.name }); + return; + } + + log.debug('Running report phase', { step: step.name, sessionId }); + + const reportInstruction = buildReportInstructionFromTemplate(step, { + cwd: this.cwd, + reportDir: this.reportDir, + stepIteration, + language: this.language, + }); + + const reportOptions: RunAgentOptions = { + cwd: this.cwd, + sessionId, + agentPath: step.agentPath, + allowedTools: ['Write'], + maxTurns: 3, + provider: step.provider, + model: step.model, + permissionMode: step.permissionMode, + onStream: this.options.onStream, + onPermissionRequest: this.options.onPermissionRequest, + onAskUserQuestion: this.options.onAskUserQuestion, + bypassPermissions: this.options.bypassPermissions, + }; + + const reportResponse = await runAgent(step.agent, reportInstruction, reportOptions); + + // Update session (phase 2 may update it) + this.updateAgentSession(step.agent, reportResponse.sessionId); + + log.debug('Report phase complete', { step: step.name, status: reportResponse.status }); + } + /** * Run a parallel step: execute all sub-steps concurrently, then aggregate results. * The aggregated output becomes the parent step's response for rules evaluation. @@ -300,11 +355,16 @@ export class WorkflowEngine extends EventEmitter { const subIteration = incrementStepIteration(this.state, subStep.name); const subInstruction = this.buildInstruction(subStep, subIteration); + // Phase 1: main execution (Write excluded if sub-step has report) const agentOptions = this.buildAgentOptions(subStep); const subResponse = await runAgent(subStep.agent, subInstruction, agentOptions); - this.updateAgentSession(subStep.agent, subResponse.sessionId); + // Phase 2: report output for sub-step + if (subStep.report) { + await this.runReportPhase(subStep, subIteration); + } + // Detect sub-step rule matches (tag detection + ai() fallback) const matchedRuleIndex = await this.detectMatchedRule(subStep, subResponse.content); const finalResponse = matchedRuleIndex != null diff --git a/src/workflow/instruction-builder.ts b/src/workflow/instruction-builder.ts index 71abf8d..1dabef0 100644 --- a/src/workflow/instruction-builder.ts +++ b/src/workflow/instruction-builder.ts @@ -323,22 +323,31 @@ function renderWorkflowContext( `- ${s.step}: ${step.name}`, ]; - // Report info (only if step has report config AND reportDir is available) - if (step.report && context.reportDir) { - lines.push(`- ${s.reportDirectory}: ${context.reportDir}/`); + return lines.join('\n'); +} - if (typeof step.report === 'string') { - // Single file (string form) - lines.push(`- ${s.reportFile}: ${context.reportDir}/${step.report}`); - } else if (isReportObjectConfig(step.report)) { - // Object form (name + order + format) - lines.push(`- ${s.reportFile}: ${context.reportDir}/${step.report.name}`); - } else { - // Multiple files (ReportConfig[] form) - lines.push(`- ${s.reportFiles}:`); - for (const file of step.report as ReportConfig[]) { - lines.push(` - ${file.label}: ${context.reportDir}/${file.path}`); - } +/** + * Render report info for the Workflow Context section. + * Used only by buildReportInstruction() (phase 2). + */ +function renderReportContext( + report: string | ReportConfig[] | ReportObjectConfig, + reportDir: string, + language: Language, +): string { + const s = SECTION_STRINGS[language]; + const lines: string[] = [ + `- ${s.reportDirectory}: ${reportDir}/`, + ]; + + if (typeof report === 'string') { + lines.push(`- ${s.reportFile}: ${reportDir}/${report}`); + } else if (isReportObjectConfig(report)) { + lines.push(`- ${s.reportFile}: ${reportDir}/${report.name}`); + } else { + lines.push(`- ${s.reportFiles}:`); + for (const file of report) { + lines.push(` - ${file.label}: ${reportDir}/${file.path}`); } } @@ -466,20 +475,7 @@ export function buildInstruction( sections.push(`${s.additionalUserInputs}\n${escapeTemplateChars(userInputsStr)}`); } - // 6a. Report output instruction (auto-generated from step.report) - // If ReportObjectConfig has an explicit `order:`, use that (backward compat). - // Otherwise, auto-generate from the report declaration. - if (step.report && isReportObjectConfig(step.report) && step.report.order) { - const processedOrder = replaceTemplatePlaceholders(step.report.order.trimEnd(), step, context); - sections.push(processedOrder); - } else { - const reportInstruction = renderReportOutputInstruction(step, context, language); - if (reportInstruction) { - sections.push(reportInstruction); - } - } - - // 6b. Instructions header + instruction_template content + // 6. Instructions header + instruction_template content const processedTemplate = replaceTemplatePlaceholders( step.instructionTemplate, step, @@ -487,12 +483,6 @@ export function buildInstruction( ); sections.push(`${s.instructions}\n${processedTemplate}`); - // 6c. Report format (appended after instruction_template, from ReportObjectConfig) - if (step.report && isReportObjectConfig(step.report) && step.report.format) { - const processedFormat = replaceTemplatePlaceholders(step.report.format.trimEnd(), step, context); - sections.push(processedFormat); - } - // 7. Status rules (auto-generated from rules) // Skip when ALL rules are ai() conditions — agent doesn't need to output status tags if (step.rules && step.rules.length > 0) { @@ -506,3 +496,120 @@ export function buildInstruction( return sections.join('\n\n'); } + +/** Localized strings for report phase execution rules */ +const REPORT_PHASE_STRINGS = { + en: { + noSourceEdit: '**Do NOT modify project source files.** Only output report files.', + instructionBody: 'Output the results of your previous work as a report.', + }, + ja: { + noSourceEdit: '**プロジェクトのソースファイルを変更しないでください。** レポートファイルのみ出力してください。', + instructionBody: '前のステップの作業結果をレポートとして出力してください。', + }, +} as const; + +/** + * Context for building report phase instruction. + */ +export interface ReportInstructionContext { + /** Working directory */ + cwd: string; + /** Report directory path */ + reportDir: string; + /** Step iteration (for {step_iteration} replacement) */ + stepIteration: number; + /** Language */ + language?: Language; +} + +/** + * Build instruction for phase 2 (report output). + * + * Separate from buildInstruction() — only includes: + * - Execution Context (cwd + rules) + * - Workflow Context (report info only) + * - Report output instruction + format + * + * Does NOT include: User Request, Previous Response, User Inputs, + * Status rules, instruction_template. + */ +export function buildReportInstruction( + step: WorkflowStep, + context: ReportInstructionContext, +): string { + if (!step.report) { + throw new Error(`buildReportInstruction called for step "${step.name}" which has no report config`); + } + + const language = context.language ?? 'en'; + const s = SECTION_STRINGS[language]; + const r = REPORT_PHASE_STRINGS[language]; + const m = METADATA_STRINGS[language]; + const sections: string[] = []; + + // 1. Execution Context + const execLines = [ + m.heading, + `- ${m.workingDirectory}: ${context.cwd}`, + '', + m.rulesHeading, + `- ${m.noCommit}`, + `- ${m.noCd}`, + `- ${r.noSourceEdit}`, + ]; + if (m.note) { + execLines.push(''); + execLines.push(m.note); + } + execLines.push(''); + sections.push(execLines.join('\n')); + + // 2. Workflow Context (report info only) + const workflowLines = [ + s.workflowContext, + renderReportContext(step.report, context.reportDir, language), + ]; + sections.push(workflowLines.join('\n')); + + // 3. Instructions + report output instruction + format + const instrParts: string[] = [ + `${s.instructions}`, + r.instructionBody, + ]; + + // Report output instruction (auto-generated or explicit order) + const reportContext: InstructionContext = { + task: '', + iteration: 0, + maxIterations: 0, + stepIteration: context.stepIteration, + cwd: context.cwd, + userInputs: [], + reportDir: context.reportDir, + language, + }; + + if (isReportObjectConfig(step.report) && step.report.order) { + const processedOrder = replaceTemplatePlaceholders(step.report.order.trimEnd(), step, reportContext); + instrParts.push(''); + instrParts.push(processedOrder); + } else { + const reportInstruction = renderReportOutputInstruction(step, reportContext, language); + if (reportInstruction) { + instrParts.push(''); + instrParts.push(reportInstruction); + } + } + + // Report format + if (isReportObjectConfig(step.report) && step.report.format) { + const processedFormat = replaceTemplatePlaceholders(step.report.format.trimEnd(), step, reportContext); + instrParts.push(''); + instrParts.push(processedFormat); + } + + sections.push(instrParts.join('\n')); + + return sections.join('\n\n'); +} From b969f5a7f4ebca7e97627517309be6478c59de58 Mon Sep 17 00:00:00 2001 From: nrslib <38722970+nrslib@users.noreply.github.com> Date: Fri, 30 Jan 2026 15:29:28 +0900 Subject: [PATCH 3/6] =?UTF-8?q?=E4=B8=8D=E8=A6=81=E3=81=AA=E4=BB=95?= =?UTF-8?q?=E6=A7=98=E6=9B=B8=E3=82=92=E5=89=8A=E9=99=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/spec-aggregate-conditions.md | 104 ------------------------------ 1 file changed, 104 deletions(-) delete mode 100644 docs/spec-aggregate-conditions.md diff --git a/docs/spec-aggregate-conditions.md b/docs/spec-aggregate-conditions.md deleted file mode 100644 index dcfbbde..0000000 --- a/docs/spec-aggregate-conditions.md +++ /dev/null @@ -1,104 +0,0 @@ -# 集約条件 `all()` / `any()` 仕様 - -## 背景 - -パラレルステップでは複数のサブステップが並列実行される。各サブステップは自身のルールで結果(approved, rejected 等)を判定するが、親ステップが「全体としてどう遷移するか」を決定する必要がある。 - -現状、親ステップの遷移判定は結合テキストに対する `ai()` 評価かタグ検出しかない。しかし、「全員が承認したら次へ」「1人でも却下したらやり直し」といった集約判定はルールベースで十分であり、AI呼び出しは不要。 - -## 目的 - -パラレルステップの親ルールに `all("condition")` / `any("condition")` 構文を追加し、サブステップの判定結果をルールベースで集約する。 - -## YAML 構文 - -```yaml -- name: parallel-review - parallel: - - name: arch-review - agent: ~/.takt/agents/default/architect.md - rules: - - condition: approved - next: _ - - condition: rejected - next: _ - - name: security-review - agent: ~/.takt/agents/default/security-reviewer.md - rules: - - condition: approved - next: _ - - condition: rejected - next: _ - rules: - - condition: all("approved") - next: COMPLETE - - condition: any("rejected") - next: implement -``` - -## 式のセマンティクス - -| 式 | 意味 | -|---|------| -| `all("X")` | 全サブステップの判定結果が `X` のとき真 | -| `any("X")` | 1つ以上のサブステップの判定結果が `X` のとき真 | - -「判定結果」とは、サブステップのルール評価でマッチしたルールの `condition` 値を指す。 - -## エッジケースの定義 - -| ケース | `all("X")` | `any("X")` | -|--------|-----------|-----------| -| 全サブステップが X | true | true | -| 一部が X | false | true | -| いずれも X でない | false | false | -| 判定結果なし(ルール未定義 or マッチなし) | false | そのサブステップは判定対象外 | -| サブステップ 0 件 | false | false | -| 非パラレルステップで使用 | false | false | - -`all()` は「全員が確実に X」を要求するため、判定不能なサブステップがあれば false。 -`any()` は「誰か1人でも X」を探すため、判定不能なサブステップは無視する。 - -## 評価の優先順位 - -親ステップの `rules` 配列は先頭から順に評価される。各ルールの種類に応じた評価方式が適用される。 - -| 順位 | 種類 | 評価方式 | コスト | -|------|------|---------|--------| -| 1 | `all()` / `any()` | サブステップの判定結果を集計 | なし | -| 2 | 通常条件(`done` 等) | 結合テキストで `[STEP:N]` タグ検出 | なし | -| 3 | `ai("...")` | AI judge 呼び出し | API 1回 | - -最初にマッチしたルールで遷移が確定する。`all()` / `any()` を先に書けば、マッチした時点で `ai()` は呼ばれない。 - -## 他の条件式との混在 - -同一の `rules` 配列内で自由に混在できる。 - -```yaml -rules: - - condition: all("approved") # 集約(高速) - next: COMPLETE - - condition: any("rejected") # 集約(高速) - next: implement - - condition: ai("判断が難しい場合") # AI フォールバック - next: manual-review -``` - -## サブステップのルール - -サブステップの `rules` はサブステップ自身の判定結果を決めるために使う。`next` フィールドはパラレル文脈では使用されない(親の `rules` が遷移を決定する)。スキーマ互換性のため `next` は必須のまま残し、値は任意とする。 - -## ステータスタグの注入 - -親ステップの全ルールが `all()` / `any()` / `ai()` のいずれかである場合、ステータスタグ(`[STEP:N]` 系)の注入をスキップする。タグ検出が不要なため。 - -## 変更対象 - -| ファイル | 変更内容 | -|---------|---------| -| `src/models/types.ts` | `WorkflowRule` に集約条件フラグを追加 | -| `src/config/workflowLoader.ts` | `all()` / `any()` パターンの検出と正規化 | -| `src/workflow/engine.ts` | 集約条件の評価ロジックを追加 | -| `src/workflow/instruction-builder.ts` | ステータスタグスキップ条件を拡張 | -| テスト | パース、評価、エッジケース、混在ルール | From b10773d310667c133984f00d2a8fddc743019e54 Mon Sep 17 00:00:00 2001 From: nrslib <38722970+nrslib@users.noreply.github.com> Date: Fri, 30 Jan 2026 16:29:54 +0900 Subject: [PATCH 4/6] =?UTF-8?q?=E3=82=B9=E3=83=86=E3=83=BC=E3=82=BF?= =?UTF-8?q?=E3=82=B9=E5=88=A4=E5=AE=9A=E3=82=92Phase=203=E3=81=AB=E5=88=86?= =?UTF-8?q?=E9=9B=A2=E3=81=97=E3=80=81=E3=83=87=E3=83=83=E3=83=89=E3=82=B3?= =?UTF-8?q?=E3=83=BC=E3=83=89=E3=82=92=E6=95=B4=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - buildInstruction からステータスルール注入を除去(Phase 1はステータスタグなし) - buildStatusJudgmentInstruction を新設(Phase 3: セッション再開でステータスタグ出力) - detectMatchedRule のシグネチャを (agentContent, tagContent) に変更 - ルール存在時にマッチなしなら即座にthrow(Fail Fast) - runReportPhase / runStatusJudgmentPhase の共通部分を buildResumeOptions に抽出 - sessionId 欠落時のサイレントフォールバックをエラーに変更 - renderStatusRulesHeader / STATUS_RULES_HEADER_STRINGS を削除(デッドコード) - StatusJudgmentContext から未使用の cwd を削除 - Status 型および StatusSchema から未使用の in_progress を削除 --- src/__tests__/instructionBuilder.test.ts | 210 ++++++++++++++++---- src/__tests__/parallel-and-loader.test.ts | 221 ++++++++++++++++++++++ src/config/workflowLoader.ts | 27 ++- src/models/schemas.ts | 1 - src/models/types.ts | 7 +- src/workflow/engine.ts | 193 ++++++++++++++++--- src/workflow/instruction-builder.ts | 90 +++++---- 7 files changed, 636 insertions(+), 113 deletions(-) diff --git a/src/__tests__/instructionBuilder.test.ts b/src/__tests__/instructionBuilder.test.ts index d168624..285ad56 100644 --- a/src/__tests__/instructionBuilder.test.ts +++ b/src/__tests__/instructionBuilder.test.ts @@ -6,13 +6,14 @@ import { describe, it, expect } from 'vitest'; import { buildInstruction, buildReportInstruction, + buildStatusJudgmentInstruction, buildExecutionMetadata, renderExecutionMetadata, - renderStatusRulesHeader, generateStatusRulesFromRules, isReportObjectConfig, type InstructionContext, type ReportInstructionContext, + type StatusJudgmentContext, } from '../workflow/instruction-builder.js'; import type { WorkflowStep, WorkflowRule } from '../models/types.js'; @@ -296,30 +297,6 @@ describe('instruction-builder', () => { }); }); - describe('renderStatusRulesHeader', () => { - it('should render Japanese header when language is ja', () => { - const header = renderStatusRulesHeader('ja'); - - expect(header).toContain('# ⚠️ 必須: ステータス出力ルール ⚠️'); - expect(header).toContain('このタグがないとワークフローが停止します'); - expect(header).toContain('最終出力には必ず以下のルールに従ったステータスタグを含めてください'); - }); - - it('should render English header when language is en', () => { - const header = renderStatusRulesHeader('en'); - - expect(header).toContain('# ⚠️ Required: Status Output Rules ⚠️'); - expect(header).toContain('The workflow will stop without this tag'); - expect(header).toContain('Your final output MUST include a status tag'); - }); - - it('should end with trailing empty line', () => { - const header = renderStatusRulesHeader('en'); - - expect(header).toMatch(/\n$/); - }); - }); - describe('generateStatusRulesFromRules', () => { const rules: WorkflowRule[] = [ { condition: '要件が明確で実装可能', next: 'implement' }, @@ -385,8 +362,8 @@ describe('instruction-builder', () => { }); }); - describe('buildInstruction with rules', () => { - it('should auto-generate status rules from rules', () => { + describe('buildInstruction with rules (Phase 1 — no status tags)', () => { + it('should NOT include status rules even when rules exist (phase separation)', () => { const step = createMinimalStep('Do work'); step.name = 'plan'; step.rules = [ @@ -397,12 +374,10 @@ describe('instruction-builder', () => { const result = buildInstruction(step, context); - // Should contain status header - expect(result).toContain('⚠️ Required: Status Output Rules ⚠️'); - // Should contain auto-generated criteria table - expect(result).toContain('## Decision Criteria'); - expect(result).toContain('`[PLAN:1]`'); - expect(result).toContain('`[PLAN:2]`'); + // Phase 1 should NOT contain status header or criteria + expect(result).not.toContain('Status Output Rules'); + expect(result).not.toContain('Decision Criteria'); + expect(result).not.toContain('[PLAN:'); }); it('should not add status rules when rules do not exist', () => { @@ -411,7 +386,7 @@ describe('instruction-builder', () => { const result = buildInstruction(step, context); - expect(result).not.toContain('⚠️ Required'); + expect(result).not.toContain('Status Output Rules'); expect(result).not.toContain('Decision Criteria'); }); @@ -422,7 +397,7 @@ describe('instruction-builder', () => { const result = buildInstruction(step, context); - expect(result).not.toContain('⚠️ Required'); + expect(result).not.toContain('Status Output Rules'); expect(result).not.toContain('Decision Criteria'); }); }); @@ -884,8 +859,8 @@ describe('instruction-builder', () => { }); }); - describe('ai() condition status tag skip', () => { - it('should skip status rules when ALL rules are ai() conditions', () => { + describe('phase separation — buildInstruction never includes status rules', () => { + it('should NOT include status rules even with ai() conditions', () => { const step = createMinimalStep('Do work'); step.rules = [ { condition: 'ai("No issues")', next: 'COMPLETE', isAiCondition: true, aiConditionText: 'No issues' }, @@ -899,7 +874,7 @@ describe('instruction-builder', () => { expect(result).not.toContain('[TEST-STEP:'); }); - it('should include status rules when some rules are NOT ai() conditions', () => { + it('should NOT include status rules with mixed regular and ai() conditions', () => { const step = createMinimalStep('Do work'); step.rules = [ { condition: 'Error occurred', next: 'ABORT' }, @@ -909,10 +884,10 @@ describe('instruction-builder', () => { const result = buildInstruction(step, context); - expect(result).toContain('Status Output Rules'); + expect(result).not.toContain('Status Output Rules'); }); - it('should include status rules when no rules are ai() conditions', () => { + it('should NOT include status rules with regular conditions only', () => { const step = createMinimalStep('Do work'); step.rules = [ { condition: 'Done', next: 'COMPLETE' }, @@ -922,7 +897,47 @@ describe('instruction-builder', () => { const result = buildInstruction(step, context); - expect(result).toContain('Status Output Rules'); + expect(result).not.toContain('Status Output Rules'); + }); + + it('should NOT include status rules with aggregate conditions', () => { + const step = createMinimalStep('Do work'); + step.rules = [ + { condition: 'all("approved")', next: 'COMPLETE', isAggregateCondition: true, aggregateType: 'all' as const, aggregateConditionText: 'approved' }, + { condition: 'any("rejected")', next: 'fix', isAggregateCondition: true, aggregateType: 'any' as const, aggregateConditionText: 'rejected' }, + ]; + const context = createMinimalContext({ language: 'en' }); + + const result = buildInstruction(step, context); + + expect(result).not.toContain('Status Output Rules'); + }); + + it('should NOT include status rules with mixed ai() and aggregate conditions', () => { + const step = createMinimalStep('Do work'); + step.rules = [ + { condition: 'all("approved")', next: 'COMPLETE', isAggregateCondition: true, aggregateType: 'all' as const, aggregateConditionText: 'approved' }, + { condition: 'any("rejected")', next: 'fix', isAggregateCondition: true, aggregateType: 'any' as const, aggregateConditionText: 'rejected' }, + { condition: 'ai("Judgment needed")', next: 'manual', isAiCondition: true, aiConditionText: 'Judgment needed' }, + ]; + const context = createMinimalContext({ language: 'en' }); + + const result = buildInstruction(step, context); + + expect(result).not.toContain('Status Output Rules'); + }); + + it('should NOT include status rules with mixed aggregate and regular conditions', () => { + const step = createMinimalStep('Do work'); + step.rules = [ + { condition: 'all("approved")', next: 'COMPLETE', isAggregateCondition: true, aggregateType: 'all' as const, aggregateConditionText: 'approved' }, + { condition: 'Error occurred', next: 'ABORT' }, + ]; + const context = createMinimalContext({ language: 'en' }); + + const result = buildInstruction(step, context); + + expect(result).not.toContain('Status Output Rules'); }); }); @@ -943,4 +958,117 @@ describe('instruction-builder', () => { expect(isReportObjectConfig([{ label: 'Scope', path: '01-scope.md' }])).toBe(false); }); }); + + describe('buildStatusJudgmentInstruction (Phase 3)', () => { + function createJudgmentContext(overrides: Partial = {}): StatusJudgmentContext { + return { + language: 'en', + ...overrides, + }; + } + + it('should include header instruction (en)', () => { + const step = createMinimalStep('Do work'); + step.name = 'plan'; + step.rules = [ + { condition: 'Clear requirements', next: 'implement' }, + { condition: 'Unclear', next: 'ABORT' }, + ]; + const ctx = createJudgmentContext(); + + const result = buildStatusJudgmentInstruction(step, ctx); + + expect(result).toContain('Review your work results and determine the status'); + expect(result).toContain('Do NOT perform any additional work'); + }); + + it('should include header instruction (ja)', () => { + const step = createMinimalStep('Do work'); + step.name = 'plan'; + step.rules = [ + { condition: '要件が明確', next: 'implement' }, + { condition: '不明確', next: 'ABORT' }, + ]; + const ctx = createJudgmentContext({ language: 'ja' }); + + const result = buildStatusJudgmentInstruction(step, ctx); + + expect(result).toContain('作業結果を振り返り、ステータスを判定してください'); + expect(result).toContain('追加の作業は行わないでください'); + }); + + it('should include criteria table with tags', () => { + const step = createMinimalStep('Do work'); + step.name = 'plan'; + step.rules = [ + { condition: 'Clear requirements', next: 'implement' }, + { condition: 'Unclear', next: 'ABORT' }, + ]; + const ctx = createJudgmentContext(); + + const result = buildStatusJudgmentInstruction(step, ctx); + + expect(result).toContain('## Decision Criteria'); + expect(result).toContain('`[PLAN:1]`'); + expect(result).toContain('`[PLAN:2]`'); + }); + + it('should include output format section', () => { + const step = createMinimalStep('Do work'); + step.name = 'review'; + step.rules = [ + { condition: 'Approved', next: 'COMPLETE' }, + { condition: 'Rejected', next: 'fix' }, + ]; + const ctx = createJudgmentContext(); + + const result = buildStatusJudgmentInstruction(step, ctx); + + expect(result).toContain('## Output Format'); + expect(result).toContain('`[REVIEW:1]` — Approved'); + expect(result).toContain('`[REVIEW:2]` — Rejected'); + }); + + it('should throw error when step has no rules', () => { + const step = createMinimalStep('Do work'); + const ctx = createJudgmentContext(); + + expect(() => buildStatusJudgmentInstruction(step, ctx)).toThrow('no rules'); + }); + + it('should throw error when step has empty rules', () => { + const step = createMinimalStep('Do work'); + step.rules = []; + const ctx = createJudgmentContext(); + + expect(() => buildStatusJudgmentInstruction(step, ctx)).toThrow('no rules'); + }); + + it('should default language to en', () => { + const step = createMinimalStep('Do work'); + step.name = 'test'; + step.rules = [{ condition: 'Done', next: 'COMPLETE' }]; + const ctx: StatusJudgmentContext = {}; + + const result = buildStatusJudgmentInstruction(step, ctx); + + expect(result).toContain('Review your work results'); + expect(result).toContain('## Decision Criteria'); + }); + + it('should include appendix template when rules have appendix', () => { + const step = createMinimalStep('Do work'); + step.name = 'plan'; + step.rules = [ + { condition: 'Done', next: 'COMPLETE' }, + { condition: 'Blocked', next: 'ABORT', appendix: '確認事項:\n- {質問1}' }, + ]; + const ctx = createJudgmentContext(); + + const result = buildStatusJudgmentInstruction(step, ctx); + + expect(result).toContain('Appendix Template'); + expect(result).toContain('確認事項:'); + }); + }); }); diff --git a/src/__tests__/parallel-and-loader.test.ts b/src/__tests__/parallel-and-loader.test.ts index b6dfde7..b8a53e3 100644 --- a/src/__tests__/parallel-and-loader.test.ts +++ b/src/__tests__/parallel-and-loader.test.ts @@ -252,6 +252,227 @@ describe('ai() condition regex parsing', () => { }); }); +describe('all()/any() aggregate condition regex parsing', () => { + const AGGREGATE_CONDITION_REGEX = /^(all|any)\("(.+)"\)$/; + + it('should match all() condition', () => { + const match = 'all("approved")'.match(AGGREGATE_CONDITION_REGEX); + expect(match).not.toBeNull(); + expect(match![1]).toBe('all'); + expect(match![2]).toBe('approved'); + }); + + it('should match any() condition', () => { + const match = 'any("rejected")'.match(AGGREGATE_CONDITION_REGEX); + expect(match).not.toBeNull(); + expect(match![1]).toBe('any'); + expect(match![2]).toBe('rejected'); + }); + + it('should match with Japanese text', () => { + const match = 'all("承認済み")'.match(AGGREGATE_CONDITION_REGEX); + expect(match).not.toBeNull(); + expect(match![1]).toBe('all'); + expect(match![2]).toBe('承認済み'); + }); + + it('should not match regular condition text', () => { + expect('approved'.match(AGGREGATE_CONDITION_REGEX)).toBeNull(); + }); + + it('should not match ai() condition', () => { + expect('ai("something")'.match(AGGREGATE_CONDITION_REGEX)).toBeNull(); + }); + + it('should not match invalid patterns', () => { + expect('all(missing quotes)'.match(AGGREGATE_CONDITION_REGEX)).toBeNull(); + expect('all("")'.match(AGGREGATE_CONDITION_REGEX)).toBeNull(); + expect('not all("text")'.match(AGGREGATE_CONDITION_REGEX)).toBeNull(); + expect('all("text") extra'.match(AGGREGATE_CONDITION_REGEX)).toBeNull(); + expect('ALL("text")'.match(AGGREGATE_CONDITION_REGEX)).toBeNull(); + }); + + it('should match with special characters in text', () => { + const match = 'any("issues found (critical)")'.match(AGGREGATE_CONDITION_REGEX); + expect(match).not.toBeNull(); + expect(match![2]).toBe('issues found (critical)'); + }); +}); + +describe('all()/any() condition in WorkflowStepRawSchema', () => { + it('should accept all() condition as a string', () => { + const raw = { + name: 'parallel-review', + parallel: [ + { name: 'arch-review', agent: 'reviewer.md', instruction_template: 'Review' }, + ], + rules: [ + { condition: 'all("approved")', next: 'COMPLETE' }, + { condition: 'any("rejected")', next: 'fix' }, + ], + }; + + const result = WorkflowStepRawSchema.safeParse(raw); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.rules?.[0].condition).toBe('all("approved")'); + expect(result.data.rules?.[1].condition).toBe('any("rejected")'); + } + }); + + it('should accept mixed regular, ai(), and all()/any() conditions', () => { + const raw = { + name: 'mixed-rules', + parallel: [ + { name: 'sub', agent: 'agent.md' }, + ], + rules: [ + { condition: 'all("approved")', next: 'COMPLETE' }, + { condition: 'any("rejected")', next: 'fix' }, + { condition: 'ai("Difficult judgment")', next: 'manual-review' }, + ], + }; + + const result = WorkflowStepRawSchema.safeParse(raw); + expect(result.success).toBe(true); + }); +}); + +describe('aggregate condition evaluation logic', () => { + // Simulate the evaluation logic from engine.ts + type SubResult = { name: string; matchedRuleIndex?: number; rules?: { condition: string }[] }; + + function evaluateAggregate( + aggregateType: 'all' | 'any', + targetCondition: string, + subSteps: SubResult[], + ): boolean { + if (subSteps.length === 0) return false; + + if (aggregateType === 'all') { + return subSteps.every((sub) => { + if (sub.matchedRuleIndex == null || !sub.rules) return false; + const matchedRule = sub.rules[sub.matchedRuleIndex]; + return matchedRule?.condition === targetCondition; + }); + } + // 'any' + return subSteps.some((sub) => { + if (sub.matchedRuleIndex == null || !sub.rules) return false; + const matchedRule = sub.rules[sub.matchedRuleIndex]; + return matchedRule?.condition === targetCondition; + }); + } + + const rules = [ + { condition: 'approved' }, + { condition: 'rejected' }, + ]; + + it('all(): true when all sub-steps match', () => { + const subs: SubResult[] = [ + { name: 'a', matchedRuleIndex: 0, rules }, + { name: 'b', matchedRuleIndex: 0, rules }, + ]; + expect(evaluateAggregate('all', 'approved', subs)).toBe(true); + }); + + it('all(): false when some sub-steps do not match', () => { + const subs: SubResult[] = [ + { name: 'a', matchedRuleIndex: 0, rules }, + { name: 'b', matchedRuleIndex: 1, rules }, + ]; + expect(evaluateAggregate('all', 'approved', subs)).toBe(false); + }); + + it('all(): false when sub-step has no matched rule', () => { + const subs: SubResult[] = [ + { name: 'a', matchedRuleIndex: 0, rules }, + { name: 'b', matchedRuleIndex: undefined, rules }, + ]; + expect(evaluateAggregate('all', 'approved', subs)).toBe(false); + }); + + it('all(): false when sub-step has no rules', () => { + const subs: SubResult[] = [ + { name: 'a', matchedRuleIndex: 0, rules }, + { name: 'b', matchedRuleIndex: 0, rules: undefined }, + ]; + expect(evaluateAggregate('all', 'approved', subs)).toBe(false); + }); + + it('all(): false with zero sub-steps', () => { + expect(evaluateAggregate('all', 'approved', [])).toBe(false); + }); + + it('any(): true when one sub-step matches', () => { + const subs: SubResult[] = [ + { name: 'a', matchedRuleIndex: 0, rules }, + { name: 'b', matchedRuleIndex: 1, rules }, + ]; + expect(evaluateAggregate('any', 'rejected', subs)).toBe(true); + }); + + it('any(): true when all sub-steps match', () => { + const subs: SubResult[] = [ + { name: 'a', matchedRuleIndex: 1, rules }, + { name: 'b', matchedRuleIndex: 1, rules }, + ]; + expect(evaluateAggregate('any', 'rejected', subs)).toBe(true); + }); + + it('any(): false when no sub-steps match', () => { + const subs: SubResult[] = [ + { name: 'a', matchedRuleIndex: 0, rules }, + { name: 'b', matchedRuleIndex: 0, rules }, + ]; + expect(evaluateAggregate('any', 'rejected', subs)).toBe(false); + }); + + it('any(): false with zero sub-steps', () => { + expect(evaluateAggregate('any', 'rejected', [])).toBe(false); + }); + + it('any(): skips sub-steps without matched rule (does not count as match)', () => { + const subs: SubResult[] = [ + { name: 'a', matchedRuleIndex: undefined, rules }, + { name: 'b', matchedRuleIndex: 1, rules }, + ]; + expect(evaluateAggregate('any', 'rejected', subs)).toBe(true); + }); + + it('any(): false when only unmatched sub-steps exist', () => { + const subs: SubResult[] = [ + { name: 'a', matchedRuleIndex: undefined, rules }, + { name: 'b', matchedRuleIndex: undefined, rules }, + ]; + expect(evaluateAggregate('any', 'rejected', subs)).toBe(false); + }); + + it('evaluation priority: first matching aggregate rule wins', () => { + const parentRules = [ + { type: 'all' as const, condition: 'approved' }, + { type: 'any' as const, condition: 'rejected' }, + ]; + const subs: SubResult[] = [ + { name: 'a', matchedRuleIndex: 0, rules }, + { name: 'b', matchedRuleIndex: 0, rules }, + ]; + + // Find the first matching rule + let matchedIndex = -1; + for (let i = 0; i < parentRules.length; i++) { + const r = parentRules[i]!; + if (evaluateAggregate(r.type, r.condition, subs)) { + matchedIndex = i; + break; + } + } + + expect(matchedIndex).toBe(0); // all("approved") matches first + }); +}); + describe('parallel step aggregation format', () => { it('should aggregate sub-step outputs in the expected format', () => { // Mirror the aggregation logic from engine.ts diff --git a/src/config/workflowLoader.ts b/src/config/workflowLoader.ts index 0688512..a7bc5c1 100644 --- a/src/config/workflowLoader.ts +++ b/src/config/workflowLoader.ts @@ -123,21 +123,38 @@ function normalizeReport( /** Regex to detect ai("...") condition expressions */ const AI_CONDITION_REGEX = /^ai\("(.+)"\)$/; +/** Regex to detect all("...")/any("...") aggregate condition expressions */ +const AGGREGATE_CONDITION_REGEX = /^(all|any)\("(.+)"\)$/; + /** - * Parse a rule's condition for ai() expressions. - * If condition is `ai("some text")`, sets isAiCondition and aiConditionText. + * Parse a rule's condition for ai() and all()/any() expressions. + * - `ai("text")` → sets isAiCondition and aiConditionText + * - `all("text")` / `any("text")` → sets isAggregateCondition, aggregateType, aggregateConditionText */ function normalizeRule(r: { condition: string; next: string; appendix?: string }): WorkflowRule { - const match = r.condition.match(AI_CONDITION_REGEX); - if (match?.[1]) { + const aiMatch = r.condition.match(AI_CONDITION_REGEX); + if (aiMatch?.[1]) { return { condition: r.condition, next: r.next, appendix: r.appendix, isAiCondition: true, - aiConditionText: match[1], + aiConditionText: aiMatch[1], }; } + + const aggMatch = r.condition.match(AGGREGATE_CONDITION_REGEX); + if (aggMatch?.[1] && aggMatch[2]) { + return { + condition: r.condition, + next: r.next, + appendix: r.appendix, + isAggregateCondition: true, + aggregateType: aggMatch[1] as 'all' | 'any', + aggregateConditionText: aggMatch[2], + }; + } + return { condition: r.condition, next: r.next, diff --git a/src/models/schemas.ts b/src/models/schemas.ts index 4ec6ef8..502adc5 100644 --- a/src/models/schemas.ts +++ b/src/models/schemas.ts @@ -13,7 +13,6 @@ export const AgentTypeSchema = z.enum(['coder', 'architect', 'supervisor', 'cust /** Status schema */ export const StatusSchema = z.enum([ 'pending', - 'in_progress', 'done', 'blocked', 'approved', diff --git a/src/models/types.ts b/src/models/types.ts index 88efc90..7db34e1 100644 --- a/src/models/types.ts +++ b/src/models/types.ts @@ -8,7 +8,6 @@ export type AgentType = 'coder' | 'architect' | 'supervisor' | 'custom'; /** Execution status for agents and workflows */ export type Status = | 'pending' - | 'in_progress' | 'done' | 'blocked' | 'approved' @@ -52,6 +51,12 @@ export interface WorkflowRule { isAiCondition?: boolean; /** The condition text inside ai("...") for AI judge evaluation (set by loader) */ aiConditionText?: string; + /** Whether this condition uses all()/any() aggregate expression (set by loader) */ + isAggregateCondition?: boolean; + /** Aggregate type: 'all' requires all sub-steps match, 'any' requires at least one (set by loader) */ + aggregateType?: 'all' | 'any'; + /** The condition text inside all("...")/any("...") to match against sub-step results (set by loader) */ + aggregateConditionText?: string; } /** Report file configuration for a workflow step (label: path pair) */ diff --git a/src/workflow/engine.ts b/src/workflow/engine.ts index 34a5c43..612e0d4 100644 --- a/src/workflow/engine.ts +++ b/src/workflow/engine.ts @@ -16,7 +16,7 @@ import { COMPLETE_STEP, ABORT_STEP, ERROR_MESSAGES } from './constants.js'; import type { WorkflowEngineOptions } from './types.js'; import { determineNextStepByRules } from './transitions.js'; import { detectRuleIndex, callAiJudge } from '../claude/client.js'; -import { buildInstruction as buildInstructionFromTemplate, buildReportInstruction as buildReportInstructionFromTemplate, isReportObjectConfig } from './instruction-builder.js'; +import { buildInstruction as buildInstructionFromTemplate, buildReportInstruction as buildReportInstructionFromTemplate, buildStatusJudgmentInstruction as buildStatusJudgmentInstructionFromTemplate, isReportObjectConfig } from './instruction-builder.js'; import { LoopDetector } from './loop-detector.js'; import { handleBlocked } from './blocked-handler.js'; import { @@ -226,6 +226,27 @@ export class WorkflowEngine extends EventEmitter { }; } + /** + * Build RunAgentOptions for session-resume phases (Phase 2, Phase 3). + * Shares common fields with the original step's agent config. + */ + private buildResumeOptions(step: WorkflowStep, sessionId: string, overrides: Pick): RunAgentOptions { + return { + cwd: this.cwd, + sessionId, + agentPath: step.agentPath, + allowedTools: overrides.allowedTools, + maxTurns: overrides.maxTurns, + provider: step.provider, + model: step.model, + permissionMode: step.permissionMode, + onStream: this.options.onStream, + onPermissionRequest: this.options.onPermissionRequest, + onAskUserQuestion: this.options.onAskUserQuestion, + bypassPermissions: this.options.bypassPermissions, + }; + } + /** Update agent session and notify via callback if session changed */ private updateAgentSession(agent: string, sessionId: string | undefined): void { if (!sessionId) return; @@ -240,23 +261,96 @@ export class WorkflowEngine extends EventEmitter { /** * Detect matched rule for a step's response. - * 1. Try standard [STEP:N] tag detection - * 2. Fallback to ai() condition evaluation via AI judge + * Evaluation order (first match wins): + * 1. Aggregate conditions: all()/any() — evaluate sub-step results + * 2. Standard [STEP:N] tag detection (from tagContent, i.e. Phase 3 output) + * 3. ai() condition evaluation via AI judge (from agentContent, i.e. Phase 1 output) + * + * Returns undefined for steps without rules. + * Throws if rules exist but no rule matched (Fail Fast). + * + * @param step - The workflow step + * @param agentContent - Phase 1 output (main execution) + * @param tagContent - Phase 3 output (status judgment); empty string skips tag detection */ - private async detectMatchedRule(step: WorkflowStep, content: string): Promise { + private async detectMatchedRule(step: WorkflowStep, agentContent: string, tagContent: string): Promise { if (!step.rules || step.rules.length === 0) return undefined; - const ruleIndex = detectRuleIndex(content, step.name); - if (ruleIndex >= 0 && ruleIndex < step.rules.length) { - return ruleIndex; + // 1. Aggregate conditions (all/any) — only meaningful for parallel parent steps + const aggIndex = this.evaluateAggregateConditions(step); + if (aggIndex >= 0) { + return aggIndex; } - const aiRuleIndex = await this.evaluateAiConditions(step, content); + // 2. Standard tag detection (from Phase 3 output) + if (tagContent) { + const ruleIndex = detectRuleIndex(tagContent, step.name); + if (ruleIndex >= 0 && ruleIndex < step.rules.length) { + return ruleIndex; + } + } + + // 3. AI judge fallback (from Phase 1 output) + const aiRuleIndex = await this.evaluateAiConditions(step, agentContent); if (aiRuleIndex >= 0) { return aiRuleIndex; } - return undefined; + throw new Error(`Status not found for step "${step.name}": no rule matched after all detection phases`); + } + + /** + * Evaluate aggregate conditions (all()/any()) against sub-step results. + * Returns the 0-based rule index in the step's rules array, or -1 if no match. + * + * For each aggregate rule, checks the matched condition text of sub-steps: + * - all("X"): true when ALL sub-steps have matched condition === X + * - any("X"): true when at least ONE sub-step has matched condition === X + * + * Edge cases per spec: + * - Sub-step with no matched rule: all() → false, any() → skip that sub-step + * - No sub-steps (0 件): both → false + * - Non-parallel step: both → false + */ + private evaluateAggregateConditions(step: WorkflowStep): number { + if (!step.rules || !step.parallel || step.parallel.length === 0) return -1; + + for (let i = 0; i < step.rules.length; i++) { + const rule = step.rules[i]!; + if (!rule.isAggregateCondition || !rule.aggregateType || !rule.aggregateConditionText) { + continue; + } + + const subSteps = step.parallel; + const targetCondition = rule.aggregateConditionText; + + if (rule.aggregateType === 'all') { + const allMatch = subSteps.every((sub) => { + const output = this.state.stepOutputs.get(sub.name); + if (!output || output.matchedRuleIndex == null || !sub.rules) return false; + const matchedRule = sub.rules[output.matchedRuleIndex]; + return matchedRule?.condition === targetCondition; + }); + if (allMatch) { + log.debug('Aggregate all() matched', { step: step.name, condition: targetCondition, ruleIndex: i }); + return i; + } + } else { + // 'any' + const anyMatch = subSteps.some((sub) => { + const output = this.state.stepOutputs.get(sub.name); + if (!output || output.matchedRuleIndex == null || !sub.rules) return false; + const matchedRule = sub.rules[output.matchedRuleIndex]; + return matchedRule?.condition === targetCondition; + }); + if (anyMatch) { + log.debug('Aggregate any() matched', { step: step.name, condition: targetCondition, ruleIndex: i }); + return i; + } + } + } + + return -1; } /** Run a normal (non-parallel) step */ @@ -281,8 +375,13 @@ export class WorkflowEngine extends EventEmitter { await this.runReportPhase(step, stepIteration); } - // Status detection uses phase 1 response - const matchedRuleIndex = await this.detectMatchedRule(step, response.content); + // Phase 3: status judgment (resume session, no tools, output status tag) + let tagContent = ''; + if (this.needsStatusJudgmentPhase(step)) { + tagContent = await this.runStatusJudgmentPhase(step); + } + + const matchedRuleIndex = await this.detectMatchedRule(step, response.content, tagContent); if (matchedRuleIndex != null) { response = { ...response, matchedRuleIndex }; } @@ -300,8 +399,7 @@ export class WorkflowEngine extends EventEmitter { private async runReportPhase(step: WorkflowStep, stepIteration: number): Promise { const sessionId = this.state.agentSessions.get(step.agent); if (!sessionId) { - log.debug('Skipping report phase: no sessionId to resume', { step: step.name }); - return; + throw new Error(`Report phase requires a session to resume, but no sessionId found for agent "${step.agent}" in step "${step.name}"`); } log.debug('Running report phase', { step: step.name, sessionId }); @@ -313,20 +411,10 @@ export class WorkflowEngine extends EventEmitter { language: this.language, }); - const reportOptions: RunAgentOptions = { - cwd: this.cwd, - sessionId, - agentPath: step.agentPath, + const reportOptions = this.buildResumeOptions(step, sessionId, { allowedTools: ['Write'], maxTurns: 3, - provider: step.provider, - model: step.model, - permissionMode: step.permissionMode, - onStream: this.options.onStream, - onPermissionRequest: this.options.onPermissionRequest, - onAskUserQuestion: this.options.onAskUserQuestion, - bypassPermissions: this.options.bypassPermissions, - }; + }); const reportResponse = await runAgent(step.agent, reportInstruction, reportOptions); @@ -336,6 +424,48 @@ export class WorkflowEngine extends EventEmitter { log.debug('Report phase complete', { step: step.name, status: reportResponse.status }); } + /** + * Check if a step needs Phase 3 (status judgment). + * Returns true when at least one rule requires tag-based detection + * (i.e., not all rules are ai() or aggregate conditions). + */ + private needsStatusJudgmentPhase(step: WorkflowStep): boolean { + if (!step.rules || step.rules.length === 0) return false; + const allNonTagConditions = step.rules.every((r) => r.isAiCondition || r.isAggregateCondition); + return !allNonTagConditions; + } + + /** + * Phase 3: Status judgment. + * Resumes the agent session with no tools to ask the agent to output a status tag. + * Returns the Phase 3 response content (containing the status tag). + */ + private async runStatusJudgmentPhase(step: WorkflowStep): Promise { + const sessionId = this.state.agentSessions.get(step.agent); + if (!sessionId) { + throw new Error(`Status judgment phase requires a session to resume, but no sessionId found for agent "${step.agent}" in step "${step.name}"`); + } + + log.debug('Running status judgment phase', { step: step.name, sessionId }); + + const judgmentInstruction = buildStatusJudgmentInstructionFromTemplate(step, { + language: this.language, + }); + + const judgmentOptions = this.buildResumeOptions(step, sessionId, { + allowedTools: [], + maxTurns: 1, + }); + + const judgmentResponse = await runAgent(step.agent, judgmentInstruction, judgmentOptions); + + // Update session (phase 3 may update it) + this.updateAgentSession(step.agent, judgmentResponse.sessionId); + + log.debug('Status judgment phase complete', { step: step.name, status: judgmentResponse.status }); + return judgmentResponse.content; + } + /** * Run a parallel step: execute all sub-steps concurrently, then aggregate results. * The aggregated output becomes the parent step's response for rules evaluation. @@ -365,8 +495,13 @@ export class WorkflowEngine extends EventEmitter { await this.runReportPhase(subStep, subIteration); } - // Detect sub-step rule matches (tag detection + ai() fallback) - const matchedRuleIndex = await this.detectMatchedRule(subStep, subResponse.content); + // Phase 3: status judgment for sub-step + let subTagContent = ''; + if (this.needsStatusJudgmentPhase(subStep)) { + subTagContent = await this.runStatusJudgmentPhase(subStep); + } + + const matchedRuleIndex = await this.detectMatchedRule(subStep, subResponse.content, subTagContent); const finalResponse = matchedRuleIndex != null ? { ...subResponse, matchedRuleIndex } : subResponse; @@ -387,8 +522,8 @@ export class WorkflowEngine extends EventEmitter { .map((r) => r.instruction) .join('\n\n'); - // Evaluate parent step's rules against aggregated output - const matchedRuleIndex = await this.detectMatchedRule(step, aggregatedContent); + // Parent step uses aggregate conditions, so tagContent is empty + const matchedRuleIndex = await this.detectMatchedRule(step, aggregatedContent, ''); const aggregatedResponse: AgentResponse = { agent: step.name, diff --git a/src/workflow/instruction-builder.ts b/src/workflow/instruction-builder.ts index 1dabef0..487a991 100644 --- a/src/workflow/instruction-builder.ts +++ b/src/workflow/instruction-builder.ts @@ -5,7 +5,8 @@ * 1. Auto-injecting standard sections (Execution Context, Workflow Context, * User Request, Previous Response, Additional User Inputs, Instructions header) * 2. Replacing template placeholders with actual values - * 3. Appending auto-generated status rules from workflow rules + * + * Status judgment is handled separately in Phase 3 (buildStatusJudgmentInstruction). */ import type { WorkflowStep, WorkflowRule, AgentResponse, Language, ReportConfig, ReportObjectConfig } from '../models/types.js'; @@ -60,29 +61,6 @@ export function buildExecutionMetadata(context: InstructionContext, edit?: boole }; } -/** Localized strings for status rules header */ -const STATUS_RULES_HEADER_STRINGS = { - en: { - heading: '# ⚠️ Required: Status Output Rules ⚠️', - warning: '**The workflow will stop without this tag.**', - instruction: 'Your final output MUST include a status tag following the rules below.', - }, - ja: { - heading: '# ⚠️ 必須: ステータス出力ルール ⚠️', - warning: '**このタグがないとワークフローが停止します。**', - instruction: '最終出力には必ず以下のルールに従ったステータスタグを含めてください。', - }, -} as const; - -/** - * Render status rules header. - * Prepended to auto-generated status rules from workflow rules. - */ -export function renderStatusRulesHeader(language: Language): string { - const strings = STATUS_RULES_HEADER_STRINGS[language]; - return [strings.heading, '', strings.warning, strings.instruction, ''].join('\n'); -} - /** Localized strings for rules-based status prompt */ const RULES_PROMPT_STRINGS = { en: { @@ -428,7 +406,8 @@ function replaceTemplatePlaceholders( * 4. Previous Response — if passPreviousResponse and has content, unless template contains {previous_response} * 5. Additional User Inputs — unless template contains {user_inputs} * 6. Instructions header + instruction_template content — always - * 7. Status Output Rules — if rules exist + * + * Status judgment is handled separately in Phase 3 (buildStatusJudgmentInstruction). * * Template placeholders ({task}, {previous_response}, etc.) are still replaced * within the instruction_template body for backward compatibility. @@ -483,17 +462,6 @@ export function buildInstruction( ); sections.push(`${s.instructions}\n${processedTemplate}`); - // 7. Status rules (auto-generated from rules) - // Skip when ALL rules are ai() conditions — agent doesn't need to output status tags - if (step.rules && step.rules.length > 0) { - const allAiConditions = step.rules.every((r) => r.isAiCondition); - if (!allAiConditions) { - const statusHeader = renderStatusRulesHeader(language); - const generatedPrompt = generateStatusRulesFromRules(step.name, step.rules, language); - sections.push(`${statusHeader}\n${generatedPrompt}`); - } - } - return sections.join('\n\n'); } @@ -613,3 +581,53 @@ export function buildReportInstruction( return sections.join('\n\n'); } + +/** Localized strings for status judgment phase (Phase 3) */ +const STATUS_JUDGMENT_STRINGS = { + en: { + header: 'Review your work results and determine the status. Do NOT perform any additional work.', + }, + ja: { + header: '作業結果を振り返り、ステータスを判定してください。追加の作業は行わないでください。', + }, +} as const; + +/** + * Context for building status judgment instruction (Phase 3). + */ +export interface StatusJudgmentContext { + /** Language */ + language?: Language; +} + +/** + * Build instruction for Phase 3 (status judgment). + * + * Resumes the agent session and asks it to evaluate its work + * and output the appropriate status tag. No tools are allowed. + * + * Includes: + * - Header instruction (review and determine status) + * - Status rules (criteria table + output format) from generateStatusRulesFromRules() + */ +export function buildStatusJudgmentInstruction( + step: WorkflowStep, + context: StatusJudgmentContext, +): string { + if (!step.rules || step.rules.length === 0) { + throw new Error(`buildStatusJudgmentInstruction called for step "${step.name}" which has no rules`); + } + + const language = context.language ?? 'en'; + const s = STATUS_JUDGMENT_STRINGS[language]; + const sections: string[] = []; + + // Header + sections.push(s.header); + + // Status rules (criteria table + output format) + const generatedPrompt = generateStatusRulesFromRules(step.name, step.rules, language); + sections.push(generatedPrompt); + + return sections.join('\n\n'); +} From 9c05b45e1e91b7a6fd9c4b6f0b54ac5c1b2384ab Mon Sep 17 00:00:00 2001 From: nrslib <38722970+nrslib@users.noreply.github.com> Date: Fri, 30 Jan 2026 17:07:18 +0900 Subject: [PATCH 5/6] =?UTF-8?q?feat:=20=E3=83=AB=E3=83=BC=E3=83=AB?= =?UTF-8?q?=E3=83=9E=E3=83=83=E3=83=81=E6=96=B9=E6=B3=95=E3=81=AE=E5=8F=AF?= =?UTF-8?q?=E8=A6=96=E5=8C=96=E3=81=A85=E6=AE=B5=E9=9A=8E=E3=83=95?= =?UTF-8?q?=E3=82=A9=E3=83=BC=E3=83=AB=E3=83=90=E3=83=83=E3=82=AF=E6=A4=9C?= =?UTF-8?q?=E5=87=BA=E3=82=92=E5=AE=9F=E8=A3=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - RuleMatchMethod型を追加し、検出方法(aggregate/phase3_tag/phase1_tag/ai_judge/ai_judge_fallback)を記録 - detectMatchedRuleを5段階フォールバックに拡張(Phase3タグ→Phase1タグ→AI judge→全条件AI judge) - matchedRuleMethodをセッションログとUI出力の両方に表示 - Phase 3のmaxTurnsを3に増加 - ParallelLoggerによるパラレルステップのプレフィックス付き出力を追加 --- src/__tests__/parallel-logger.test.ts | 417 ++++++++++++++++++++++++++ src/commands/workflowExecution.ts | 4 +- src/models/index.ts | 1 + src/models/types.ts | 10 + src/utils/session.ts | 6 + src/workflow/engine.ts | 116 +++++-- src/workflow/parallel-logger.ts | 206 +++++++++++++ 7 files changed, 741 insertions(+), 19 deletions(-) create mode 100644 src/__tests__/parallel-logger.test.ts create mode 100644 src/workflow/parallel-logger.ts diff --git a/src/__tests__/parallel-logger.test.ts b/src/__tests__/parallel-logger.test.ts new file mode 100644 index 0000000..893954f --- /dev/null +++ b/src/__tests__/parallel-logger.test.ts @@ -0,0 +1,417 @@ +/** + * Tests for parallel-logger module + */ + +import { describe, it, expect, beforeEach } from 'vitest'; +import { ParallelLogger } from '../workflow/parallel-logger.js'; +import type { StreamEvent } from '../claude/types.js'; + +describe('ParallelLogger', () => { + let output: string[]; + let writeFn: (text: string) => void; + + beforeEach(() => { + output = []; + writeFn = (text: string) => output.push(text); + }); + + describe('buildPrefix', () => { + it('should build colored prefix with padding', () => { + const logger = new ParallelLogger({ + subStepNames: ['arch-review', 'sec'], + writeFn, + }); + + // arch-review is longest (11 chars), sec gets padding + const prefix = logger.buildPrefix('sec', 1); + // yellow color for index 1 + expect(prefix).toContain('[sec]'); + expect(prefix).toContain('\x1b[33m'); // yellow + expect(prefix).toContain('\x1b[0m'); // reset + // 11 - 3 = 8 spaces of padding + expect(prefix).toMatch(/\x1b\[0m {8} $/); + }); + + it('should cycle colors for index >= 4', () => { + const logger = new ParallelLogger({ + subStepNames: ['a', 'b', 'c', 'd', 'e'], + writeFn, + }); + + const prefix0 = logger.buildPrefix('a', 0); + const prefix4 = logger.buildPrefix('e', 4); + // Both should use cyan (\x1b[36m) + expect(prefix0).toContain('\x1b[36m'); + expect(prefix4).toContain('\x1b[36m'); + }); + + it('should assign correct colors in order', () => { + const logger = new ParallelLogger({ + subStepNames: ['a', 'b', 'c', 'd'], + writeFn, + }); + + expect(logger.buildPrefix('a', 0)).toContain('\x1b[36m'); // cyan + expect(logger.buildPrefix('b', 1)).toContain('\x1b[33m'); // yellow + expect(logger.buildPrefix('c', 2)).toContain('\x1b[35m'); // magenta + expect(logger.buildPrefix('d', 3)).toContain('\x1b[32m'); // green + }); + + it('should have no extra padding for longest name', () => { + const logger = new ParallelLogger({ + subStepNames: ['long-name', 'short'], + writeFn, + }); + + const prefix = logger.buildPrefix('long-name', 0); + // No padding needed (0 spaces) + expect(prefix).toMatch(/\x1b\[0m $/); + }); + }); + + describe('text event line buffering', () => { + it('should buffer partial line and output on newline', () => { + const logger = new ParallelLogger({ + subStepNames: ['step-a'], + writeFn, + }); + const handler = logger.createStreamHandler('step-a', 0); + + // Partial text (no newline) + handler({ type: 'text', data: { text: 'Hello' } } as StreamEvent); + expect(output).toHaveLength(0); + + // Complete the line + handler({ type: 'text', data: { text: ' World\n' } } as StreamEvent); + expect(output).toHaveLength(1); + expect(output[0]).toContain('[step-a]'); + expect(output[0]).toContain('Hello World'); + expect(output[0]).toMatch(/\n$/); + }); + + it('should handle multiple lines in single text event', () => { + const logger = new ParallelLogger({ + subStepNames: ['step-a'], + writeFn, + }); + const handler = logger.createStreamHandler('step-a', 0); + + handler({ type: 'text', data: { text: 'Line 1\nLine 2\n' } } as StreamEvent); + expect(output).toHaveLength(2); + expect(output[0]).toContain('Line 1'); + expect(output[1]).toContain('Line 2'); + }); + + it('should output empty line without prefix', () => { + const logger = new ParallelLogger({ + subStepNames: ['step-a'], + writeFn, + }); + const handler = logger.createStreamHandler('step-a', 0); + + handler({ type: 'text', data: { text: 'Hello\n\nWorld\n' } } as StreamEvent); + expect(output).toHaveLength(3); + expect(output[0]).toContain('Hello'); + expect(output[1]).toBe('\n'); // empty line without prefix + expect(output[2]).toContain('World'); + }); + + it('should keep trailing partial in buffer', () => { + const logger = new ParallelLogger({ + subStepNames: ['step-a'], + writeFn, + }); + const handler = logger.createStreamHandler('step-a', 0); + + handler({ type: 'text', data: { text: 'Complete\nPartial' } } as StreamEvent); + expect(output).toHaveLength(1); + expect(output[0]).toContain('Complete'); + + // Flush remaining + logger.flush(); + expect(output).toHaveLength(2); + expect(output[1]).toContain('Partial'); + }); + }); + + describe('block events (tool_use, tool_result, tool_output, thinking)', () => { + it('should prefix tool_use events', () => { + const logger = new ParallelLogger({ + subStepNames: ['sub-a'], + writeFn, + }); + const handler = logger.createStreamHandler('sub-a', 0); + + handler({ + type: 'tool_use', + data: { tool: 'Read', input: {}, id: '1' }, + } as StreamEvent); + + expect(output).toHaveLength(1); + expect(output[0]).toContain('[sub-a]'); + expect(output[0]).toContain('[tool] Read'); + }); + + it('should prefix tool_result events', () => { + const logger = new ParallelLogger({ + subStepNames: ['sub-a'], + writeFn, + }); + const handler = logger.createStreamHandler('sub-a', 0); + + handler({ + type: 'tool_result', + data: { content: 'File content here', isError: false }, + } as StreamEvent); + + expect(output).toHaveLength(1); + expect(output[0]).toContain('File content here'); + }); + + it('should prefix multi-line tool output', () => { + const logger = new ParallelLogger({ + subStepNames: ['sub-a'], + writeFn, + }); + const handler = logger.createStreamHandler('sub-a', 0); + + handler({ + type: 'tool_output', + data: { tool: 'Bash', output: 'line1\nline2' }, + } as StreamEvent); + + expect(output).toHaveLength(2); + expect(output[0]).toContain('line1'); + expect(output[1]).toContain('line2'); + }); + + it('should prefix thinking events', () => { + const logger = new ParallelLogger({ + subStepNames: ['sub-a'], + writeFn, + }); + const handler = logger.createStreamHandler('sub-a', 0); + + handler({ + type: 'thinking', + data: { thinking: 'Considering options...' }, + } as StreamEvent); + + expect(output).toHaveLength(1); + expect(output[0]).toContain('Considering options...'); + }); + }); + + describe('delegated events (init, result, error)', () => { + it('should delegate init event to parent callback', () => { + const parentEvents: StreamEvent[] = []; + const logger = new ParallelLogger({ + subStepNames: ['sub-a'], + parentOnStream: (event) => parentEvents.push(event), + writeFn, + }); + const handler = logger.createStreamHandler('sub-a', 0); + + const initEvent: StreamEvent = { + type: 'init', + data: { model: 'claude-3', sessionId: 'sess-1' }, + }; + handler(initEvent); + + expect(parentEvents).toHaveLength(1); + expect(parentEvents[0]).toBe(initEvent); + expect(output).toHaveLength(0); // Not written to stdout + }); + + it('should delegate result event to parent callback', () => { + const parentEvents: StreamEvent[] = []; + const logger = new ParallelLogger({ + subStepNames: ['sub-a'], + parentOnStream: (event) => parentEvents.push(event), + writeFn, + }); + const handler = logger.createStreamHandler('sub-a', 0); + + const resultEvent: StreamEvent = { + type: 'result', + data: { result: 'done', sessionId: 'sess-1', success: true }, + }; + handler(resultEvent); + + expect(parentEvents).toHaveLength(1); + expect(parentEvents[0]).toBe(resultEvent); + }); + + it('should delegate error event to parent callback', () => { + const parentEvents: StreamEvent[] = []; + const logger = new ParallelLogger({ + subStepNames: ['sub-a'], + parentOnStream: (event) => parentEvents.push(event), + writeFn, + }); + const handler = logger.createStreamHandler('sub-a', 0); + + const errorEvent: StreamEvent = { + type: 'error', + data: { message: 'Something went wrong' }, + }; + handler(errorEvent); + + expect(parentEvents).toHaveLength(1); + expect(parentEvents[0]).toBe(errorEvent); + }); + + it('should not crash when no parent callback for delegated events', () => { + const logger = new ParallelLogger({ + subStepNames: ['sub-a'], + writeFn, + }); + const handler = logger.createStreamHandler('sub-a', 0); + + // Should not throw + handler({ type: 'init', data: { model: 'claude-3', sessionId: 'sess-1' } } as StreamEvent); + handler({ type: 'result', data: { result: 'done', sessionId: 'sess-1', success: true } } as StreamEvent); + handler({ type: 'error', data: { message: 'err' } } as StreamEvent); + + expect(output).toHaveLength(0); + }); + }); + + describe('flush', () => { + it('should output remaining buffered content', () => { + const logger = new ParallelLogger({ + subStepNames: ['step-a', 'step-b'], + writeFn, + }); + const handlerA = logger.createStreamHandler('step-a', 0); + const handlerB = logger.createStreamHandler('step-b', 1); + + handlerA({ type: 'text', data: { text: 'partial-a' } } as StreamEvent); + handlerB({ type: 'text', data: { text: 'partial-b' } } as StreamEvent); + + expect(output).toHaveLength(0); + + logger.flush(); + + expect(output).toHaveLength(2); + expect(output[0]).toContain('partial-a'); + expect(output[1]).toContain('partial-b'); + }); + + it('should not output empty buffers', () => { + const logger = new ParallelLogger({ + subStepNames: ['step-a', 'step-b'], + writeFn, + }); + const handlerA = logger.createStreamHandler('step-a', 0); + + handlerA({ type: 'text', data: { text: 'content\n' } } as StreamEvent); + output.length = 0; // Clear previous output + + logger.flush(); + expect(output).toHaveLength(0); // Nothing to flush + }); + }); + + describe('printSummary', () => { + it('should print completion summary', () => { + const logger = new ParallelLogger({ + subStepNames: ['arch-review', 'security-review'], + writeFn, + }); + + logger.printSummary('parallel-review', [ + { name: 'arch-review', condition: 'approved' }, + { name: 'security-review', condition: 'rejected' }, + ]); + + const fullOutput = output.join(''); + expect(fullOutput).toContain('parallel-review results'); + expect(fullOutput).toContain('arch-review:'); + expect(fullOutput).toContain('approved'); + expect(fullOutput).toContain('security-review:'); + expect(fullOutput).toContain('rejected'); + // Header and footer contain ─ + expect(fullOutput).toContain('─'); + }); + + it('should show (no result) for undefined condition', () => { + const logger = new ParallelLogger({ + subStepNames: ['step-a'], + writeFn, + }); + + logger.printSummary('parallel-step', [ + { name: 'step-a', condition: undefined }, + ]); + + const fullOutput = output.join(''); + expect(fullOutput).toContain('(no result)'); + }); + + it('should right-pad sub-step names to align results', () => { + const logger = new ParallelLogger({ + subStepNames: ['short', 'very-long-name'], + writeFn, + }); + + logger.printSummary('test', [ + { name: 'short', condition: 'done' }, + { name: 'very-long-name', condition: 'done' }, + ]); + + // Find the result lines (indented with 2 spaces) + const resultLines = output.filter((l) => l.startsWith(' ')); + expect(resultLines).toHaveLength(2); + + // Both 'done' values should be at the same column + const doneIndex0 = resultLines[0]!.indexOf('done'); + const doneIndex1 = resultLines[1]!.indexOf('done'); + expect(doneIndex0).toBe(doneIndex1); + }); + + it('should flush remaining buffers before printing summary', () => { + const logger = new ParallelLogger({ + subStepNames: ['step-a'], + writeFn, + }); + const handler = logger.createStreamHandler('step-a', 0); + + // Leave partial content in buffer + handler({ type: 'text', data: { text: 'trailing content' } } as StreamEvent); + + logger.printSummary('test', [ + { name: 'step-a', condition: 'done' }, + ]); + + // First output should be the flushed buffer + expect(output[0]).toContain('trailing content'); + // Then the summary + const fullOutput = output.join(''); + expect(fullOutput).toContain('test results'); + }); + }); + + describe('interleaved output from multiple sub-steps', () => { + it('should correctly interleave prefixed output', () => { + const logger = new ParallelLogger({ + subStepNames: ['step-a', 'step-b'], + writeFn, + }); + const handlerA = logger.createStreamHandler('step-a', 0); + const handlerB = logger.createStreamHandler('step-b', 1); + + handlerA({ type: 'text', data: { text: 'A output\n' } } as StreamEvent); + handlerB({ type: 'text', data: { text: 'B output\n' } } as StreamEvent); + handlerA({ type: 'text', data: { text: 'A second\n' } } as StreamEvent); + + expect(output).toHaveLength(3); + expect(output[0]).toContain('[step-a]'); + expect(output[0]).toContain('A output'); + expect(output[1]).toContain('[step-b]'); + expect(output[1]).toContain('B output'); + expect(output[2]).toContain('[step-a]'); + expect(output[2]).toContain('A second'); + }); + }); +}); diff --git a/src/commands/workflowExecution.ts b/src/commands/workflowExecution.ts index 0c7be9f..b727db2 100644 --- a/src/commands/workflowExecution.ts +++ b/src/commands/workflowExecution.ts @@ -190,6 +190,7 @@ export async function executeWorkflow( step: step.name, status: response.status, matchedRuleIndex: response.matchedRuleIndex, + matchedRuleMethod: response.matchedRuleMethod, contentLength: response.content.length, sessionId: response.sessionId, error: response.error, @@ -203,7 +204,8 @@ export async function executeWorkflow( if (response.matchedRuleIndex != null && step.rules) { const rule = step.rules[response.matchedRuleIndex]; if (rule) { - status('Status', rule.condition); + const methodLabel = response.matchedRuleMethod ? ` (${response.matchedRuleMethod})` : ''; + status('Status', `${rule.condition}${methodLabel}`); } else { status('Status', response.status); } diff --git a/src/models/index.ts b/src/models/index.ts index 5b3e865..b43dd20 100644 --- a/src/models/index.ts +++ b/src/models/index.ts @@ -2,6 +2,7 @@ export type { AgentType, Status, + RuleMatchMethod, ReportConfig, ReportObjectConfig, AgentResponse, diff --git a/src/models/types.ts b/src/models/types.ts index 7db34e1..fc0bd83 100644 --- a/src/models/types.ts +++ b/src/models/types.ts @@ -17,6 +17,14 @@ export type Status = | 'interrupted' | 'answer'; +/** How a rule match was detected */ +export type RuleMatchMethod = + | 'aggregate' + | 'phase3_tag' + | 'phase1_tag' + | 'ai_judge' + | 'ai_judge_fallback'; + /** Response from an agent execution */ export interface AgentResponse { agent: string; @@ -28,6 +36,8 @@ export interface AgentResponse { error?: string; /** Matched rule index (0-based) when rules-based detection was used */ matchedRuleIndex?: number; + /** How the rule match was detected */ + matchedRuleMethod?: RuleMatchMethod; } /** Session state for workflow execution */ diff --git a/src/utils/session.ts b/src/utils/session.ts index fa88f27..1ab50da 100644 --- a/src/utils/session.ts +++ b/src/utils/session.ts @@ -24,6 +24,10 @@ export interface SessionLog { timestamp: string; content: string; error?: string; + /** Matched rule index (0-based) when rules-based detection was used */ + matchedRuleIndex?: number; + /** How the rule match was detected */ + matchedRuleMethod?: string; }>; } @@ -88,6 +92,8 @@ export function addToSessionLog( timestamp: response.timestamp.toISOString(), content: response.content, ...(response.error ? { error: response.error } : {}), + ...(response.matchedRuleIndex != null ? { matchedRuleIndex: response.matchedRuleIndex } : {}), + ...(response.matchedRuleMethod ? { matchedRuleMethod: response.matchedRuleMethod } : {}), }); log.iterations++; } diff --git a/src/workflow/engine.ts b/src/workflow/engine.ts index 612e0d4..e06fcd4 100644 --- a/src/workflow/engine.ts +++ b/src/workflow/engine.ts @@ -10,6 +10,7 @@ import type { WorkflowState, WorkflowStep, AgentResponse, + RuleMatchMethod, } from '../models/types.js'; import { runAgent, type RunAgentOptions } from '../agents/runner.js'; import { COMPLETE_STEP, ABORT_STEP, ERROR_MESSAGES } from './constants.js'; @@ -19,6 +20,7 @@ import { detectRuleIndex, callAiJudge } from '../claude/client.js'; import { buildInstruction as buildInstructionFromTemplate, buildReportInstruction as buildReportInstructionFromTemplate, buildStatusJudgmentInstruction as buildStatusJudgmentInstructionFromTemplate, isReportObjectConfig } from './instruction-builder.js'; import { LoopDetector } from './loop-detector.js'; import { handleBlocked } from './blocked-handler.js'; +import { ParallelLogger } from './parallel-logger.js'; import { createInitialState, addUserInput, @@ -263,8 +265,10 @@ export class WorkflowEngine extends EventEmitter { * Detect matched rule for a step's response. * Evaluation order (first match wins): * 1. Aggregate conditions: all()/any() — evaluate sub-step results - * 2. Standard [STEP:N] tag detection (from tagContent, i.e. Phase 3 output) - * 3. ai() condition evaluation via AI judge (from agentContent, i.e. Phase 1 output) + * 2. Tag detection from Phase 3 output + * 3. Tag detection from Phase 1 output (fallback) + * 4. ai() condition evaluation via AI judge + * 5. All-conditions AI judge (final fallback) * * Returns undefined for steps without rules. * Throws if rules exist but no rule matched (Fail Fast). @@ -273,27 +277,41 @@ export class WorkflowEngine extends EventEmitter { * @param agentContent - Phase 1 output (main execution) * @param tagContent - Phase 3 output (status judgment); empty string skips tag detection */ - private async detectMatchedRule(step: WorkflowStep, agentContent: string, tagContent: string): Promise { + private async detectMatchedRule(step: WorkflowStep, agentContent: string, tagContent: string): Promise<{ index: number; method: RuleMatchMethod } | undefined> { if (!step.rules || step.rules.length === 0) return undefined; // 1. Aggregate conditions (all/any) — only meaningful for parallel parent steps const aggIndex = this.evaluateAggregateConditions(step); if (aggIndex >= 0) { - return aggIndex; + return { index: aggIndex, method: 'aggregate' }; } - // 2. Standard tag detection (from Phase 3 output) + // 2. Tag detection from Phase 3 output if (tagContent) { const ruleIndex = detectRuleIndex(tagContent, step.name); if (ruleIndex >= 0 && ruleIndex < step.rules.length) { - return ruleIndex; + return { index: ruleIndex, method: 'phase3_tag' }; } } - // 3. AI judge fallback (from Phase 1 output) + // 3. Tag detection from Phase 1 output (fallback) + if (agentContent) { + const ruleIndex = detectRuleIndex(agentContent, step.name); + if (ruleIndex >= 0 && ruleIndex < step.rules.length) { + return { index: ruleIndex, method: 'phase1_tag' }; + } + } + + // 4. AI judge for ai() conditions only const aiRuleIndex = await this.evaluateAiConditions(step, agentContent); if (aiRuleIndex >= 0) { - return aiRuleIndex; + return { index: aiRuleIndex, method: 'ai_judge' }; + } + + // 5. AI judge for all conditions (final fallback) + const fallbackIndex = await this.evaluateAllConditionsViaAiJudge(step, agentContent); + if (fallbackIndex >= 0) { + return { index: fallbackIndex, method: 'ai_judge_fallback' }; } throw new Error(`Status not found for step "${step.name}": no rule matched after all detection phases`); @@ -381,9 +399,10 @@ export class WorkflowEngine extends EventEmitter { tagContent = await this.runStatusJudgmentPhase(step); } - const matchedRuleIndex = await this.detectMatchedRule(step, response.content, tagContent); - if (matchedRuleIndex != null) { - response = { ...response, matchedRuleIndex }; + const match = await this.detectMatchedRule(step, response.content, tagContent); + if (match) { + log.debug('Rule matched', { step: step.name, ruleIndex: match.index, method: match.method }); + response = { ...response, matchedRuleIndex: match.index, matchedRuleMethod: match.method }; } this.state.stepOutputs.set(step.name, response); @@ -454,7 +473,7 @@ export class WorkflowEngine extends EventEmitter { const judgmentOptions = this.buildResumeOptions(step, sessionId, { allowedTools: [], - maxTurns: 1, + maxTurns: 3, }); const judgmentResponse = await runAgent(step.agent, judgmentInstruction, judgmentOptions); @@ -469,6 +488,9 @@ export class WorkflowEngine extends EventEmitter { /** * Run a parallel step: execute all sub-steps concurrently, then aggregate results. * The aggregated output becomes the parent step's response for rules evaluation. + * + * When onStream is provided, uses ParallelLogger to prefix each sub-step's + * output with `[name]` for readable interleaved display. */ private async runParallelStep(step: WorkflowStep): Promise<{ response: AgentResponse; instruction: string }> { const subSteps = step.parallel!; @@ -479,14 +501,28 @@ export class WorkflowEngine extends EventEmitter { stepIteration, }); + // Create parallel logger for prefixed output (only when streaming is enabled) + const parallelLogger = this.options.onStream + ? new ParallelLogger({ + subStepNames: subSteps.map((s) => s.name), + parentOnStream: this.options.onStream, + }) + : undefined; + // Run all sub-steps concurrently const subResults = await Promise.all( - subSteps.map(async (subStep) => { + subSteps.map(async (subStep, index) => { const subIteration = incrementStepIteration(this.state, subStep.name); const subInstruction = this.buildInstruction(subStep, subIteration); // Phase 1: main execution (Write excluded if sub-step has report) const agentOptions = this.buildAgentOptions(subStep); + + // Override onStream with parallel logger's prefixed handler + if (parallelLogger) { + agentOptions.onStream = parallelLogger.createStreamHandler(subStep.name, index); + } + const subResponse = await runAgent(subStep.agent, subInstruction, agentOptions); this.updateAgentSession(subStep.agent, subResponse.sessionId); @@ -501,9 +537,9 @@ export class WorkflowEngine extends EventEmitter { subTagContent = await this.runStatusJudgmentPhase(subStep); } - const matchedRuleIndex = await this.detectMatchedRule(subStep, subResponse.content, subTagContent); - const finalResponse = matchedRuleIndex != null - ? { ...subResponse, matchedRuleIndex } + const match = await this.detectMatchedRule(subStep, subResponse.content, subTagContent); + const finalResponse = match + ? { ...subResponse, matchedRuleIndex: match.index, matchedRuleMethod: match.method } : subResponse; this.state.stepOutputs.set(subStep.name, finalResponse); @@ -513,6 +549,19 @@ export class WorkflowEngine extends EventEmitter { }), ); + // Print completion summary + if (parallelLogger) { + parallelLogger.printSummary( + step.name, + subResults.map((r) => ({ + name: r.subStep.name, + condition: r.response.matchedRuleIndex != null && r.subStep.rules + ? r.subStep.rules[r.response.matchedRuleIndex]?.condition + : undefined, + })), + ); + } + // Aggregate sub-step outputs into parent step's response const aggregatedContent = subResults .map((r) => `## ${r.subStep.name}\n${r.response.content}`) @@ -523,14 +572,14 @@ export class WorkflowEngine extends EventEmitter { .join('\n\n'); // Parent step uses aggregate conditions, so tagContent is empty - const matchedRuleIndex = await this.detectMatchedRule(step, aggregatedContent, ''); + const match = await this.detectMatchedRule(step, aggregatedContent, ''); const aggregatedResponse: AgentResponse = { agent: step.name, status: 'done', content: aggregatedContent, timestamp: new Date(), - ...(matchedRuleIndex != null && { matchedRuleIndex }), + ...(match && { matchedRuleIndex: match.index, matchedRuleMethod: match.method }), }; this.state.stepOutputs.set(step.name, aggregatedResponse); @@ -580,6 +629,37 @@ export class WorkflowEngine extends EventEmitter { return -1; } + /** + * Final fallback: evaluate ALL rule conditions via AI judge. + * Unlike evaluateAiConditions (which only handles ai() flagged rules), + * this sends every rule's condition text to the judge. + * Returns the 0-based rule index, or -1 if no match. + */ + private async evaluateAllConditionsViaAiJudge(step: WorkflowStep, agentOutput: string): Promise { + if (!step.rules || step.rules.length === 0) return -1; + + const conditions = step.rules.map((rule, i) => ({ index: i, text: rule.condition })); + + log.debug('Evaluating all conditions via AI judge (final fallback)', { + step: step.name, + conditionCount: conditions.length, + }); + + const judgeResult = await callAiJudge(agentOutput, conditions, { cwd: this.cwd }); + + if (judgeResult >= 0 && judgeResult < conditions.length) { + log.debug('AI judge (fallback) matched condition', { + step: step.name, + ruleIndex: judgeResult, + condition: conditions[judgeResult]!.text, + }); + return judgeResult; + } + + log.debug('AI judge (fallback) did not match any condition', { step: step.name }); + return -1; + } + /** * Determine next step for a completed step using rules-based routing. */ diff --git a/src/workflow/parallel-logger.ts b/src/workflow/parallel-logger.ts new file mode 100644 index 0000000..31d14a3 --- /dev/null +++ b/src/workflow/parallel-logger.ts @@ -0,0 +1,206 @@ +/** + * Parallel step log display + * + * Provides prefixed, color-coded interleaved output for parallel sub-steps. + * Each sub-step's stream output gets a `[name]` prefix with right-padding + * aligned to the longest sub-step name. + */ + +import type { StreamCallback, StreamEvent } from '../claude/types.js'; + +/** ANSI color codes for sub-step prefixes (cycled in order) */ +const COLORS = ['\x1b[36m', '\x1b[33m', '\x1b[35m', '\x1b[32m'] as const; // cyan, yellow, magenta, green +const RESET = '\x1b[0m'; + +export interface ParallelLoggerOptions { + /** Sub-step names (used to calculate prefix width) */ + subStepNames: string[]; + /** Parent onStream callback to delegate non-prefixed events */ + parentOnStream?: StreamCallback; + /** Override process.stdout.write for testing */ + writeFn?: (text: string) => void; +} + +/** + * Logger for parallel step execution. + * + * Creates per-sub-step StreamCallback wrappers that: + * - Buffer partial lines until newline + * - Prepend colored `[name]` prefix to each complete line + * - Delegate init/result/error events to the parent callback + */ +export class ParallelLogger { + private readonly maxNameLength: number; + private readonly lineBuffers: Map = new Map(); + private readonly parentOnStream?: StreamCallback; + private readonly writeFn: (text: string) => void; + + constructor(options: ParallelLoggerOptions) { + this.maxNameLength = Math.max(...options.subStepNames.map((n) => n.length)); + this.parentOnStream = options.parentOnStream; + this.writeFn = options.writeFn ?? ((text: string) => process.stdout.write(text)); + + for (const name of options.subStepNames) { + this.lineBuffers.set(name, ''); + } + } + + /** + * Build the colored prefix string for a sub-step. + * Format: `\x1b[COLORm[name]\x1b[0m` + padding spaces + */ + buildPrefix(name: string, index: number): string { + const color = COLORS[index % COLORS.length]!; + const padding = ' '.repeat(this.maxNameLength - name.length); + return `${color}[${name}]${RESET}${padding} `; + } + + /** + * Create a StreamCallback wrapper for a specific sub-step. + * + * - `text`: buffered line-by-line with prefix + * - `tool_use`, `tool_result`, `tool_output`, `thinking`: prefixed per-line, no buffering + * - `init`, `result`, `error`: delegated to parent callback (no prefix) + */ + createStreamHandler(subStepName: string, index: number): StreamCallback { + const prefix = this.buildPrefix(subStepName, index); + + return (event: StreamEvent) => { + switch (event.type) { + case 'text': + this.handleTextEvent(subStepName, prefix, event.data.text); + break; + + case 'tool_use': + case 'tool_result': + case 'tool_output': + case 'thinking': + this.handleBlockEvent(prefix, event); + break; + + case 'init': + case 'result': + case 'error': + // Delegate to parent without prefix + this.parentOnStream?.(event); + break; + } + }; + } + + /** + * Handle text event with line buffering. + * Buffer until newline, then output prefixed complete lines. + * Empty lines get no prefix per spec. + */ + private handleTextEvent(name: string, prefix: string, text: string): void { + const buffer = this.lineBuffers.get(name) ?? ''; + const combined = buffer + text; + const parts = combined.split('\n'); + + // Last part is incomplete (no trailing newline) — keep in buffer + this.lineBuffers.set(name, parts.pop()!); + + // Output all complete lines + for (const line of parts) { + if (line === '') { + this.writeFn('\n'); + } else { + this.writeFn(`${prefix}${line}\n`); + } + } + } + + /** + * Handle block events (tool_use, tool_result, tool_output, thinking). + * Output with prefix, splitting multi-line content. + */ + private handleBlockEvent(prefix: string, event: StreamEvent): void { + let text: string; + switch (event.type) { + case 'tool_use': + text = `[tool] ${event.data.tool}`; + break; + case 'tool_result': + text = event.data.content; + break; + case 'tool_output': + text = event.data.output; + break; + case 'thinking': + text = event.data.thinking; + break; + default: + return; + } + + for (const line of text.split('\n')) { + if (line === '') { + this.writeFn('\n'); + } else { + this.writeFn(`${prefix}${line}\n`); + } + } + } + + /** + * Flush remaining line buffers for all sub-steps. + * Call after all sub-steps complete to output any trailing partial lines. + */ + flush(): void { + // Build prefixes for flush — need index mapping + // Since we don't store index, iterate lineBuffers in insertion order + // (Map preserves insertion order, matching subStepNames order) + let index = 0; + for (const [name, buffer] of this.lineBuffers) { + if (buffer !== '') { + const prefix = this.buildPrefix(name, index); + this.writeFn(`${prefix}${buffer}\n`); + this.lineBuffers.set(name, ''); + } + index++; + } + } + + /** + * Print completion summary after all sub-steps finish. + * + * Format: + * ``` + * ── parallel-review results ── + * arch-review: approved + * security-review: rejected + * ────────────────────────────── + * ``` + */ + printSummary( + parentStepName: string, + results: Array<{ name: string; condition: string | undefined }>, + ): void { + this.flush(); + + const maxResultNameLength = Math.max(...results.map((r) => r.name.length)); + + const resultLines = results.map((r) => { + const padding = ' '.repeat(maxResultNameLength - r.name.length); + const condition = r.condition ?? '(no result)'; + return ` ${r.name}:${padding} ${condition}`; + }); + + // Header line: ── name results ── + const headerText = ` ${parentStepName} results `; + const maxLineLength = Math.max( + headerText.length + 4, // 4 for "── " + " ──" + ...resultLines.map((l) => l.length), + ); + const sideWidth = Math.max(1, Math.floor((maxLineLength - headerText.length) / 2)); + const headerLine = `${'─'.repeat(sideWidth)}${headerText}${'─'.repeat(sideWidth)}`; + const footerLine = '─'.repeat(headerLine.length); + + this.writeFn(`${headerLine}\n`); + for (const line of resultLines) { + this.writeFn(`${line}\n`); + } + this.writeFn(`${footerLine}\n`); + } +} From 213e293c066009e586b5d0571aa519885372e882 Mon Sep 17 00:00:00 2001 From: nrslib <38722970+nrslib@users.noreply.github.com> Date: Fri, 30 Jan 2026 17:38:49 +0900 Subject: [PATCH 6/6] =?UTF-8?q?Phase=201=E3=83=97=E3=83=AD=E3=83=B3?= =?UTF-8?q?=E3=83=97=E3=83=88=E3=81=AB=E3=82=82=E3=82=B9=E3=83=86=E3=83=BC?= =?UTF-8?q?=E3=82=BF=E3=82=B9=E3=83=AB=E3=83=BC=E3=83=AB=E3=82=92=E6=B3=A8?= =?UTF-8?q?=E5=85=A5=EF=BC=88Phase=203=E3=81=A8=E3=81=AE=E4=BD=B5=E7=94=A8?= =?UTF-8?q?=E6=96=B9=E5=BC=8F=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit buildInstruction()にセクション7を追加し、タグベースのルールがある場合に 判定基準と出力フォーマットをPhase 1のプロンプトに注入する。 ai()/aggregate条件のみの場合はスキップ。 --- src/__tests__/instructionBuilder.test.ts | 46 +++++++++++++----------- src/workflow/instruction-builder.ts | 19 +++++++--- 2 files changed, 40 insertions(+), 25 deletions(-) diff --git a/src/__tests__/instructionBuilder.test.ts b/src/__tests__/instructionBuilder.test.ts index 285ad56..5c54322 100644 --- a/src/__tests__/instructionBuilder.test.ts +++ b/src/__tests__/instructionBuilder.test.ts @@ -362,8 +362,8 @@ describe('instruction-builder', () => { }); }); - describe('buildInstruction with rules (Phase 1 — no status tags)', () => { - it('should NOT include status rules even when rules exist (phase separation)', () => { + describe('buildInstruction with rules (Phase 1 — status rules injection)', () => { + it('should include status rules when tag-based rules exist', () => { const step = createMinimalStep('Do work'); step.name = 'plan'; step.rules = [ @@ -374,10 +374,9 @@ describe('instruction-builder', () => { const result = buildInstruction(step, context); - // Phase 1 should NOT contain status header or criteria - expect(result).not.toContain('Status Output Rules'); - expect(result).not.toContain('Decision Criteria'); - expect(result).not.toContain('[PLAN:'); + expect(result).toContain('Decision Criteria'); + expect(result).toContain('[PLAN:1]'); + expect(result).toContain('[PLAN:2]'); }); it('should not add status rules when rules do not exist', () => { @@ -386,7 +385,6 @@ describe('instruction-builder', () => { const result = buildInstruction(step, context); - expect(result).not.toContain('Status Output Rules'); expect(result).not.toContain('Decision Criteria'); }); @@ -397,7 +395,6 @@ describe('instruction-builder', () => { const result = buildInstruction(step, context); - expect(result).not.toContain('Status Output Rules'); expect(result).not.toContain('Decision Criteria'); }); }); @@ -859,8 +856,8 @@ describe('instruction-builder', () => { }); }); - describe('phase separation — buildInstruction never includes status rules', () => { - it('should NOT include status rules even with ai() conditions', () => { + describe('status rules injection — skip when all rules are ai()/aggregate', () => { + it('should NOT include status rules when all rules are ai() conditions', () => { const step = createMinimalStep('Do work'); step.rules = [ { condition: 'ai("No issues")', next: 'COMPLETE', isAiCondition: true, aiConditionText: 'No issues' }, @@ -870,12 +867,13 @@ describe('instruction-builder', () => { const result = buildInstruction(step, context); - expect(result).not.toContain('Status Output Rules'); + expect(result).not.toContain('Decision Criteria'); expect(result).not.toContain('[TEST-STEP:'); }); - it('should NOT include status rules with mixed regular and ai() conditions', () => { + it('should include status rules with mixed regular and ai() conditions', () => { const step = createMinimalStep('Do work'); + step.name = 'review'; step.rules = [ { condition: 'Error occurred', next: 'ABORT' }, { condition: 'ai("Issues found")', next: 'fix', isAiCondition: true, aiConditionText: 'Issues found' }, @@ -884,11 +882,13 @@ describe('instruction-builder', () => { const result = buildInstruction(step, context); - expect(result).not.toContain('Status Output Rules'); + expect(result).toContain('Decision Criteria'); + expect(result).toContain('[REVIEW:1]'); }); - it('should NOT include status rules with regular conditions only', () => { + it('should include status rules with regular conditions only', () => { const step = createMinimalStep('Do work'); + step.name = 'plan'; step.rules = [ { condition: 'Done', next: 'COMPLETE' }, { condition: 'Blocked', next: 'ABORT' }, @@ -897,10 +897,12 @@ describe('instruction-builder', () => { const result = buildInstruction(step, context); - expect(result).not.toContain('Status Output Rules'); + expect(result).toContain('Decision Criteria'); + expect(result).toContain('[PLAN:1]'); + expect(result).toContain('[PLAN:2]'); }); - it('should NOT include status rules with aggregate conditions', () => { + it('should NOT include status rules when all rules are aggregate conditions', () => { const step = createMinimalStep('Do work'); step.rules = [ { condition: 'all("approved")', next: 'COMPLETE', isAggregateCondition: true, aggregateType: 'all' as const, aggregateConditionText: 'approved' }, @@ -910,10 +912,10 @@ describe('instruction-builder', () => { const result = buildInstruction(step, context); - expect(result).not.toContain('Status Output Rules'); + expect(result).not.toContain('Decision Criteria'); }); - it('should NOT include status rules with mixed ai() and aggregate conditions', () => { + it('should NOT include status rules when all rules are ai() + aggregate', () => { const step = createMinimalStep('Do work'); step.rules = [ { condition: 'all("approved")', next: 'COMPLETE', isAggregateCondition: true, aggregateType: 'all' as const, aggregateConditionText: 'approved' }, @@ -924,11 +926,12 @@ describe('instruction-builder', () => { const result = buildInstruction(step, context); - expect(result).not.toContain('Status Output Rules'); + expect(result).not.toContain('Decision Criteria'); }); - it('should NOT include status rules with mixed aggregate and regular conditions', () => { + it('should include status rules with mixed aggregate and regular conditions', () => { const step = createMinimalStep('Do work'); + step.name = 'supervise'; step.rules = [ { condition: 'all("approved")', next: 'COMPLETE', isAggregateCondition: true, aggregateType: 'all' as const, aggregateConditionText: 'approved' }, { condition: 'Error occurred', next: 'ABORT' }, @@ -937,7 +940,8 @@ describe('instruction-builder', () => { const result = buildInstruction(step, context); - expect(result).not.toContain('Status Output Rules'); + expect(result).toContain('Decision Criteria'); + expect(result).toContain('[SUPERVISE:1]'); }); }); diff --git a/src/workflow/instruction-builder.ts b/src/workflow/instruction-builder.ts index 487a991..acdd061 100644 --- a/src/workflow/instruction-builder.ts +++ b/src/workflow/instruction-builder.ts @@ -3,10 +3,12 @@ * * Builds the instruction string for agent execution by: * 1. Auto-injecting standard sections (Execution Context, Workflow Context, - * User Request, Previous Response, Additional User Inputs, Instructions header) + * User Request, Previous Response, Additional User Inputs, Instructions header, + * Status Output Rules) * 2. Replacing template placeholders with actual values * - * Status judgment is handled separately in Phase 3 (buildStatusJudgmentInstruction). + * Status rules are injected into Phase 1 for tag-based detection, + * and also used in Phase 3 (buildStatusJudgmentInstruction) as a dedicated follow-up. */ import type { WorkflowStep, WorkflowRule, AgentResponse, Language, ReportConfig, ReportObjectConfig } from '../models/types.js'; @@ -406,8 +408,7 @@ function replaceTemplatePlaceholders( * 4. Previous Response — if passPreviousResponse and has content, unless template contains {previous_response} * 5. Additional User Inputs — unless template contains {user_inputs} * 6. Instructions header + instruction_template content — always - * - * Status judgment is handled separately in Phase 3 (buildStatusJudgmentInstruction). + * 7. Status Output Rules — when step has tag-based rules (not all ai()/aggregate) * * Template placeholders ({task}, {previous_response}, etc.) are still replaced * within the instruction_template body for backward compatibility. @@ -462,6 +463,16 @@ export function buildInstruction( ); sections.push(`${s.instructions}\n${processedTemplate}`); + // 7. Status Output Rules (for tag-based detection in Phase 1) + // Skip if all rules are ai() or aggregate conditions (no tags needed) + if (step.rules && step.rules.length > 0) { + const allNonTagConditions = step.rules.every((r) => r.isAiCondition || r.isAggregateCondition); + if (!allNonTagConditions) { + const statusRulesPrompt = generateStatusRulesFromRules(step.name, step.rules, language); + sections.push(statusRulesPrompt); + } + } + return sections.join('\n\n'); }