From 70651f8dd86f7931ae77d222ad248fa7ae26f8ae Mon Sep 17 00:00:00 2001 From: nrslib <38722970+nrslib@users.noreply.github.com> Date: Fri, 30 Jan 2026 14:53:25 +0900 Subject: [PATCH] =?UTF-8?q?feat:=20ai()=20=E6=9D=A1=E4=BB=B6=E5=BC=8F?= =?UTF-8?q?=E3=81=AB=E3=82=88=E3=82=8BAI=E9=81=B7=E7=A7=BB=E5=88=A4?= =?UTF-8?q?=E6=96=AD=E3=81=A8=E3=83=91=E3=83=A9=E3=83=AC=E3=83=AB=E3=82=B9?= =?UTF-8?q?=E3=83=86=E3=83=83=E3=83=97=E5=AE=9F=E8=A1=8C=E3=82=92=E5=AE=9F?= =?UTF-8?q?=E8=A3=85=20(#9,=20#20)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - rules の condition に ai("...") 式を追加し、別AIが遷移先を判断する仕組みを導入 - ワークフローステップに parallel フィールドを追加し、サブステップの並列実行を実装 - all()/any() 集約条件の仕様書を追加 --- docs/spec-aggregate-conditions.md | 104 ++++++++ src/__tests__/ai-judge.test.ts | 53 ++++ src/__tests__/instructionBuilder.test.ts | 42 +++ src/__tests__/parallel-and-loader.test.ts | 300 ++++++++++++++++++++++ src/claude/client.ts | 74 ++++++ src/config/workflowLoader.ts | 84 ++++-- src/models/schemas.ts | 27 +- src/models/types.ts | 6 + src/workflow/engine.ts | 191 ++++++++++++-- src/workflow/instruction-builder.ts | 10 +- 10 files changed, 837 insertions(+), 54 deletions(-) create mode 100644 docs/spec-aggregate-conditions.md create mode 100644 src/__tests__/ai-judge.test.ts create mode 100644 src/__tests__/parallel-and-loader.test.ts diff --git a/docs/spec-aggregate-conditions.md b/docs/spec-aggregate-conditions.md new file mode 100644 index 0000000..dcfbbde --- /dev/null +++ b/docs/spec-aggregate-conditions.md @@ -0,0 +1,104 @@ +# 集約条件 `all()` / `any()` 仕様 + +## 背景 + +パラレルステップでは複数のサブステップが並列実行される。各サブステップは自身のルールで結果(approved, rejected 等)を判定するが、親ステップが「全体としてどう遷移するか」を決定する必要がある。 + +現状、親ステップの遷移判定は結合テキストに対する `ai()` 評価かタグ検出しかない。しかし、「全員が承認したら次へ」「1人でも却下したらやり直し」といった集約判定はルールベースで十分であり、AI呼び出しは不要。 + +## 目的 + +パラレルステップの親ルールに `all("condition")` / `any("condition")` 構文を追加し、サブステップの判定結果をルールベースで集約する。 + +## YAML 構文 + +```yaml +- name: parallel-review + parallel: + - name: arch-review + agent: ~/.takt/agents/default/architect.md + rules: + - condition: approved + next: _ + - condition: rejected + next: _ + - name: security-review + agent: ~/.takt/agents/default/security-reviewer.md + rules: + - condition: approved + next: _ + - condition: rejected + next: _ + rules: + - condition: all("approved") + next: COMPLETE + - condition: any("rejected") + next: implement +``` + +## 式のセマンティクス + +| 式 | 意味 | +|---|------| +| `all("X")` | 全サブステップの判定結果が `X` のとき真 | +| `any("X")` | 1つ以上のサブステップの判定結果が `X` のとき真 | + +「判定結果」とは、サブステップのルール評価でマッチしたルールの `condition` 値を指す。 + +## エッジケースの定義 + +| ケース | `all("X")` | `any("X")` | +|--------|-----------|-----------| +| 全サブステップが X | true | true | +| 一部が X | false | true | +| いずれも X でない | false | false | +| 判定結果なし(ルール未定義 or マッチなし) | false | そのサブステップは判定対象外 | +| サブステップ 0 件 | false | false | +| 非パラレルステップで使用 | false | false | + +`all()` は「全員が確実に X」を要求するため、判定不能なサブステップがあれば false。 +`any()` は「誰か1人でも X」を探すため、判定不能なサブステップは無視する。 + +## 評価の優先順位 + +親ステップの `rules` 配列は先頭から順に評価される。各ルールの種類に応じた評価方式が適用される。 + +| 順位 | 種類 | 評価方式 | コスト | +|------|------|---------|--------| +| 1 | `all()` / `any()` | サブステップの判定結果を集計 | なし | +| 2 | 通常条件(`done` 等) | 結合テキストで `[STEP:N]` タグ検出 | なし | +| 3 | `ai("...")` | AI judge 呼び出し | API 1回 | + +最初にマッチしたルールで遷移が確定する。`all()` / `any()` を先に書けば、マッチした時点で `ai()` は呼ばれない。 + +## 他の条件式との混在 + +同一の `rules` 配列内で自由に混在できる。 + +```yaml +rules: + - condition: all("approved") # 集約(高速) + next: COMPLETE + - condition: any("rejected") # 集約(高速) + next: implement + - condition: ai("判断が難しい場合") # AI フォールバック + next: manual-review +``` + +## サブステップのルール + +サブステップの `rules` はサブステップ自身の判定結果を決めるために使う。`next` フィールドはパラレル文脈では使用されない(親の `rules` が遷移を決定する)。スキーマ互換性のため `next` は必須のまま残し、値は任意とする。 + +## ステータスタグの注入 + +親ステップの全ルールが `all()` / `any()` / `ai()` のいずれかである場合、ステータスタグ(`[STEP:N]` 系)の注入をスキップする。タグ検出が不要なため。 + +## 変更対象 + +| ファイル | 変更内容 | +|---------|---------| +| `src/models/types.ts` | `WorkflowRule` に集約条件フラグを追加 | +| `src/config/workflowLoader.ts` | `all()` / `any()` パターンの検出と正規化 | +| `src/workflow/engine.ts` | 集約条件の評価ロジックを追加 | +| `src/workflow/instruction-builder.ts` | ステータスタグスキップ条件を拡張 | +| テスト | パース、評価、エッジケース、混在ルール | diff --git a/src/__tests__/ai-judge.test.ts b/src/__tests__/ai-judge.test.ts new file mode 100644 index 0000000..d86e68b --- /dev/null +++ b/src/__tests__/ai-judge.test.ts @@ -0,0 +1,53 @@ +/** + * Tests for AI judge (ai() condition evaluation) + */ + +import { describe, it, expect } from 'vitest'; +import { detectJudgeIndex, buildJudgePrompt } from '../claude/client.js'; + +describe('detectJudgeIndex', () => { + it('should detect [JUDGE:1] as index 0', () => { + expect(detectJudgeIndex('[JUDGE:1]')).toBe(0); + }); + + it('should detect [JUDGE:3] as index 2', () => { + expect(detectJudgeIndex('Some output [JUDGE:3] more text')).toBe(2); + }); + + it('should return -1 for no match', () => { + expect(detectJudgeIndex('No judge tag here')).toBe(-1); + }); + + it('should return -1 for [JUDGE:0]', () => { + expect(detectJudgeIndex('[JUDGE:0]')).toBe(-1); + }); + + it('should be case-insensitive', () => { + expect(detectJudgeIndex('[judge:2]')).toBe(1); + }); +}); + +describe('buildJudgePrompt', () => { + it('should build a well-structured judge prompt', () => { + const agentOutput = 'Code implementation complete.\n\nAll tests pass.'; + const conditions = [ + { index: 0, text: 'No issues found' }, + { index: 1, text: 'Issues detected that need fixing' }, + ]; + + const prompt = buildJudgePrompt(agentOutput, conditions); + + expect(prompt).toContain('# Judge Task'); + expect(prompt).toContain('Code implementation complete.'); + expect(prompt).toContain('All tests pass.'); + expect(prompt).toContain('| 1 | No issues found |'); + expect(prompt).toContain('| 2 | Issues detected that need fixing |'); + expect(prompt).toContain('[JUDGE:N]'); + }); + + it('should handle single condition', () => { + const prompt = buildJudgePrompt('output', [{ index: 0, text: 'Always true' }]); + + expect(prompt).toContain('| 1 | Always true |'); + }); +}); diff --git a/src/__tests__/instructionBuilder.test.ts b/src/__tests__/instructionBuilder.test.ts index 77e9eaa..c7a5ecc 100644 --- a/src/__tests__/instructionBuilder.test.ts +++ b/src/__tests__/instructionBuilder.test.ts @@ -910,6 +910,48 @@ describe('instruction-builder', () => { }); }); + describe('ai() condition status tag skip', () => { + it('should skip status rules when ALL rules are ai() conditions', () => { + const step = createMinimalStep('Do work'); + step.rules = [ + { condition: 'ai("No issues")', next: 'COMPLETE', isAiCondition: true, aiConditionText: 'No issues' }, + { condition: 'ai("Issues found")', next: 'fix', isAiCondition: true, aiConditionText: 'Issues found' }, + ]; + const context = createMinimalContext({ language: 'en' }); + + const result = buildInstruction(step, context); + + expect(result).not.toContain('Status Output Rules'); + expect(result).not.toContain('[TEST-STEP:'); + }); + + it('should include status rules when some rules are NOT ai() conditions', () => { + const step = createMinimalStep('Do work'); + step.rules = [ + { condition: 'Error occurred', next: 'ABORT' }, + { condition: 'ai("Issues found")', next: 'fix', isAiCondition: true, aiConditionText: 'Issues found' }, + ]; + const context = createMinimalContext({ language: 'en' }); + + const result = buildInstruction(step, context); + + expect(result).toContain('Status Output Rules'); + }); + + it('should include status rules when no rules are ai() conditions', () => { + const step = createMinimalStep('Do work'); + step.rules = [ + { condition: 'Done', next: 'COMPLETE' }, + { condition: 'Blocked', next: 'ABORT' }, + ]; + const context = createMinimalContext({ language: 'en' }); + + const result = buildInstruction(step, context); + + expect(result).toContain('Status Output Rules'); + }); + }); + describe('isReportObjectConfig', () => { it('should return true for ReportObjectConfig', () => { expect(isReportObjectConfig({ name: '00-plan.md' })).toBe(true); diff --git a/src/__tests__/parallel-and-loader.test.ts b/src/__tests__/parallel-and-loader.test.ts new file mode 100644 index 0000000..b6dfde7 --- /dev/null +++ b/src/__tests__/parallel-and-loader.test.ts @@ -0,0 +1,300 @@ +/** + * Tests for parallel step execution and ai() condition loader + * + * Covers: + * - Schema validation for parallel sub-steps + * - Workflow loader normalization of ai() conditions and parallel steps + * - Engine parallel step aggregation logic + */ + +import { describe, it, expect } from 'vitest'; +import { WorkflowConfigRawSchema, ParallelSubStepRawSchema, WorkflowStepRawSchema } from '../models/schemas.js'; + +describe('ParallelSubStepRawSchema', () => { + it('should validate a valid parallel sub-step', () => { + const raw = { + name: 'arch-review', + agent: '~/.takt/agents/default/reviewer.md', + instruction_template: 'Review architecture', + }; + + const result = ParallelSubStepRawSchema.safeParse(raw); + expect(result.success).toBe(true); + }); + + it('should reject a sub-step without agent', () => { + const raw = { + name: 'no-agent-step', + instruction_template: 'Do something', + }; + + const result = ParallelSubStepRawSchema.safeParse(raw); + expect(result.success).toBe(false); + }); + + it('should accept optional fields', () => { + const raw = { + name: 'full-sub-step', + agent: '~/.takt/agents/default/coder.md', + agent_name: 'Coder', + allowed_tools: ['Read', 'Grep'], + model: 'haiku', + edit: false, + instruction_template: 'Do work', + report: '01-report.md', + pass_previous_response: false, + }; + + const result = ParallelSubStepRawSchema.safeParse(raw); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.agent_name).toBe('Coder'); + expect(result.data.allowed_tools).toEqual(['Read', 'Grep']); + expect(result.data.edit).toBe(false); + } + }); + + it('should accept rules on sub-steps', () => { + const raw = { + name: 'reviewed', + agent: '~/.takt/agents/default/reviewer.md', + instruction_template: 'Review', + rules: [ + { condition: 'No issues', next: 'COMPLETE' }, + { condition: 'Issues found', next: 'fix' }, + ], + }; + + const result = ParallelSubStepRawSchema.safeParse(raw); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.rules).toHaveLength(2); + } + }); +}); + +describe('WorkflowStepRawSchema with parallel', () => { + it('should accept a step with parallel sub-steps (no agent)', () => { + const raw = { + name: 'parallel-review', + parallel: [ + { name: 'arch-review', agent: 'reviewer.md', instruction_template: 'Review arch' }, + { name: 'sec-review', agent: 'security.md', instruction_template: 'Review security' }, + ], + rules: [ + { condition: 'All pass', next: 'COMPLETE' }, + ], + }; + + const result = WorkflowStepRawSchema.safeParse(raw); + expect(result.success).toBe(true); + }); + + it('should reject a step with neither agent nor parallel', () => { + const raw = { + name: 'orphan-step', + instruction_template: 'Do something', + }; + + const result = WorkflowStepRawSchema.safeParse(raw); + expect(result.success).toBe(false); + }); + + it('should accept a step with agent (no parallel)', () => { + const raw = { + name: 'normal-step', + agent: 'coder.md', + instruction_template: 'Code something', + }; + + const result = WorkflowStepRawSchema.safeParse(raw); + expect(result.success).toBe(true); + }); + + it('should reject a step with empty parallel array', () => { + const raw = { + name: 'empty-parallel', + parallel: [], + }; + + const result = WorkflowStepRawSchema.safeParse(raw); + expect(result.success).toBe(false); + }); +}); + +describe('WorkflowConfigRawSchema with parallel steps', () => { + it('should validate a workflow with parallel step', () => { + const raw = { + name: 'test-parallel-workflow', + steps: [ + { + name: 'plan', + agent: 'planner.md', + rules: [{ condition: 'Plan complete', next: 'review' }], + }, + { + name: 'review', + parallel: [ + { name: 'arch-review', agent: 'arch-reviewer.md', instruction_template: 'Review architecture' }, + { name: 'sec-review', agent: 'sec-reviewer.md', instruction_template: 'Review security' }, + ], + rules: [ + { condition: 'All approved', next: 'COMPLETE' }, + { condition: 'Issues found', next: 'plan' }, + ], + }, + ], + initial_step: 'plan', + max_iterations: 10, + }; + + const result = WorkflowConfigRawSchema.safeParse(raw); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.steps).toHaveLength(2); + expect(result.data.steps[1].parallel).toHaveLength(2); + } + }); + + it('should validate a workflow mixing normal and parallel steps', () => { + const raw = { + name: 'mixed-workflow', + steps: [ + { name: 'plan', agent: 'planner.md', rules: [{ condition: 'Done', next: 'implement' }] }, + { name: 'implement', agent: 'coder.md', rules: [{ condition: 'Done', next: 'review' }] }, + { + name: 'review', + parallel: [ + { name: 'arch', agent: 'arch.md' }, + { name: 'sec', agent: 'sec.md' }, + ], + rules: [{ condition: 'All pass', next: 'COMPLETE' }], + }, + ], + initial_step: 'plan', + }; + + const result = WorkflowConfigRawSchema.safeParse(raw); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.steps[0].agent).toBe('planner.md'); + expect(result.data.steps[2].parallel).toHaveLength(2); + } + }); +}); + +describe('ai() condition in WorkflowRuleSchema', () => { + it('should accept ai() condition as a string', () => { + const raw = { + name: 'test-step', + agent: 'agent.md', + rules: [ + { condition: 'ai("All reviews approved")', next: 'COMPLETE' }, + { condition: 'ai("Issues detected")', next: 'fix' }, + ], + }; + + const result = WorkflowStepRawSchema.safeParse(raw); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.rules?.[0].condition).toBe('ai("All reviews approved")'); + expect(result.data.rules?.[1].condition).toBe('ai("Issues detected")'); + } + }); + + it('should accept mixed regular and ai() conditions', () => { + const raw = { + name: 'mixed-rules', + agent: 'agent.md', + rules: [ + { condition: 'Regular condition', next: 'step-a' }, + { condition: 'ai("AI evaluated condition")', next: 'step-b' }, + ], + }; + + const result = WorkflowStepRawSchema.safeParse(raw); + expect(result.success).toBe(true); + }); +}); + +describe('ai() condition regex parsing', () => { + // Test the regex pattern used in workflowLoader.ts + const AI_CONDITION_REGEX = /^ai\("(.+)"\)$/; + + it('should match simple ai() condition', () => { + const match = 'ai("No issues found")'.match(AI_CONDITION_REGEX); + expect(match).not.toBeNull(); + expect(match![1]).toBe('No issues found'); + }); + + it('should match ai() with Japanese text', () => { + const match = 'ai("全てのレビューが承認している場合")'.match(AI_CONDITION_REGEX); + expect(match).not.toBeNull(); + expect(match![1]).toBe('全てのレビューが承認している場合'); + }); + + it('should not match regular condition text', () => { + const match = 'No issues found'.match(AI_CONDITION_REGEX); + expect(match).toBeNull(); + }); + + it('should not match partial ai() pattern', () => { + expect('ai(missing quotes)'.match(AI_CONDITION_REGEX)).toBeNull(); + expect('ai("")'.match(AI_CONDITION_REGEX)).toBeNull(); // .+ requires at least 1 char + expect('not ai("text")'.match(AI_CONDITION_REGEX)).toBeNull(); // must start with ai( + expect('ai("text") extra'.match(AI_CONDITION_REGEX)).toBeNull(); // must end with ) + }); + + it('should match ai() with special characters in text', () => { + const match = 'ai("Issues found (critical/high severity)")'.match(AI_CONDITION_REGEX); + expect(match).not.toBeNull(); + expect(match![1]).toBe('Issues found (critical/high severity)'); + }); +}); + +describe('parallel step aggregation format', () => { + it('should aggregate sub-step outputs in the expected format', () => { + // Mirror the aggregation logic from engine.ts + const subResults = [ + { name: 'arch-review', content: 'Architecture looks good.\n## Result: APPROVE' }, + { name: 'sec-review', content: 'No security issues.\n## Result: APPROVE' }, + ]; + + const aggregatedContent = subResults + .map((r) => `## ${r.name}\n${r.content}`) + .join('\n\n---\n\n'); + + expect(aggregatedContent).toContain('## arch-review'); + expect(aggregatedContent).toContain('Architecture looks good.'); + expect(aggregatedContent).toContain('---'); + expect(aggregatedContent).toContain('## sec-review'); + expect(aggregatedContent).toContain('No security issues.'); + }); + + it('should handle single sub-step', () => { + const subResults = [ + { name: 'only-step', content: 'Single result' }, + ]; + + const aggregatedContent = subResults + .map((r) => `## ${r.name}\n${r.content}`) + .join('\n\n---\n\n'); + + expect(aggregatedContent).toBe('## only-step\nSingle result'); + expect(aggregatedContent).not.toContain('---'); + }); + + it('should handle empty content from sub-steps', () => { + const subResults = [ + { name: 'step-a', content: '' }, + { name: 'step-b', content: 'Has content' }, + ]; + + const aggregatedContent = subResults + .map((r) => `## ${r.name}\n${r.content}`) + .join('\n\n---\n\n'); + + expect(aggregatedContent).toContain('## step-a\n'); + expect(aggregatedContent).toContain('## step-b\nHas content'); + }); +}); diff --git a/src/claude/client.ts b/src/claude/client.ts index 75849f3..5aa6bbc 100644 --- a/src/claude/client.ts +++ b/src/claude/client.ts @@ -165,6 +165,80 @@ export async function callClaudeCustom( }; } +/** + * Detect judge rule index from [JUDGE:N] tag pattern. + * Returns 0-based rule index, or -1 if no match. + */ +export function detectJudgeIndex(content: string): number { + const regex = /\[JUDGE:(\d+)\]/i; + const match = content.match(regex); + if (match?.[1]) { + const index = Number.parseInt(match[1], 10) - 1; + return index >= 0 ? index : -1; + } + return -1; +} + +/** + * Build the prompt for the AI judge that evaluates agent output against ai() conditions. + */ +export function buildJudgePrompt( + agentOutput: string, + aiConditions: { index: number; text: string }[], +): string { + const conditionList = aiConditions + .map((c) => `| ${c.index + 1} | ${c.text} |`) + .join('\n'); + + return [ + '# Judge Task', + '', + 'You are a judge evaluating an agent\'s output against a set of conditions.', + 'Read the agent output below, then determine which condition best matches.', + '', + '## Agent Output', + '```', + agentOutput, + '```', + '', + '## Conditions', + '| # | Condition |', + '|---|-----------|', + conditionList, + '', + '## Instructions', + 'Output ONLY the tag `[JUDGE:N]` where N is the number of the best matching condition.', + 'Do not output anything else.', + ].join('\n'); +} + +/** + * Call AI judge to evaluate agent output against ai() conditions. + * Uses a lightweight model (haiku) for cost efficiency. + * Returns 0-based index of the matched ai() condition, or -1 if no match. + */ +export async function callAiJudge( + agentOutput: string, + aiConditions: { index: number; text: string }[], + options: { cwd: string }, +): Promise { + const prompt = buildJudgePrompt(agentOutput, aiConditions); + + const spawnOptions: ClaudeSpawnOptions = { + cwd: options.cwd, + model: 'haiku', + maxTurns: 1, + }; + + const result = await executeClaudeCli(prompt, spawnOptions); + if (!result.success) { + log.error('AI judge call failed', { error: result.error }); + return -1; + } + + return detectJudgeIndex(result.content); +} + /** Call a Claude Code built-in agent (using claude --agent flag if available) */ export async function callClaudeAgent( claudeAgentName: string, diff --git a/src/config/workflowLoader.ts b/src/config/workflowLoader.ts index 93c9f9a..0688512 100644 --- a/src/config/workflowLoader.ts +++ b/src/config/workflowLoader.ts @@ -120,6 +120,64 @@ function normalizeReport( ); } +/** Regex to detect ai("...") condition expressions */ +const AI_CONDITION_REGEX = /^ai\("(.+)"\)$/; + +/** + * Parse a rule's condition for ai() expressions. + * If condition is `ai("some text")`, sets isAiCondition and aiConditionText. + */ +function normalizeRule(r: { condition: string; next: string; appendix?: string }): WorkflowRule { + const match = r.condition.match(AI_CONDITION_REGEX); + if (match?.[1]) { + return { + condition: r.condition, + next: r.next, + appendix: r.appendix, + isAiCondition: true, + aiConditionText: match[1], + }; + } + return { + condition: r.condition, + next: r.next, + appendix: r.appendix, + }; +} + +// eslint-disable-next-line @typescript-eslint/no-explicit-any +type RawStep = any; + +/** + * Normalize a raw step into internal WorkflowStep format. + */ +function normalizeStepFromRaw(step: RawStep, workflowDir: string): WorkflowStep { + const rules: WorkflowRule[] | undefined = step.rules?.map(normalizeRule); + const agentSpec: string = step.agent ?? ''; + + const result: WorkflowStep = { + name: step.name, + agent: agentSpec, + agentDisplayName: step.agent_name || (agentSpec ? extractAgentDisplayName(agentSpec) : step.name), + agentPath: agentSpec ? resolveAgentPathForWorkflow(agentSpec, workflowDir) : undefined, + allowedTools: step.allowed_tools, + provider: step.provider, + model: step.model, + permissionMode: step.permission_mode, + edit: step.edit, + instructionTemplate: resolveContentPath(step.instruction_template, workflowDir) || step.instruction || '{task}', + rules, + report: normalizeReport(step.report, workflowDir), + passPreviousResponse: step.pass_previous_response ?? true, + }; + + if (step.parallel && step.parallel.length > 0) { + result.parallel = step.parallel.map((sub: RawStep) => normalizeStepFromRaw(sub, workflowDir)); + } + + return result; +} + /** * Convert raw YAML workflow config to internal format. * Agent paths are resolved relative to the workflow directory. @@ -127,29 +185,9 @@ function normalizeReport( function normalizeWorkflowConfig(raw: unknown, workflowDir: string): WorkflowConfig { const parsed = WorkflowConfigRawSchema.parse(raw); - const steps: WorkflowStep[] = parsed.steps.map((step) => { - const rules: WorkflowRule[] | undefined = step.rules?.map((r) => ({ - condition: r.condition, - next: r.next, - appendix: r.appendix, - })); - - return { - name: step.name, - agent: step.agent, - agentDisplayName: step.agent_name || extractAgentDisplayName(step.agent), - agentPath: resolveAgentPathForWorkflow(step.agent, workflowDir), - allowedTools: step.allowed_tools, - provider: step.provider, - model: step.model, - permissionMode: step.permission_mode, - edit: step.edit, - instructionTemplate: resolveContentPath(step.instruction_template, workflowDir) || step.instruction || '{task}', - rules, - report: normalizeReport(step.report, workflowDir), - passPreviousResponse: step.pass_previous_response, - }; - }); + const steps: WorkflowStep[] = parsed.steps.map((step) => + normalizeStepFromRaw(step, workflowDir), + ); return { name: parsed.name, diff --git a/src/models/schemas.ts b/src/models/schemas.ts index e1d7aab..4ec6ef8 100644 --- a/src/models/schemas.ts +++ b/src/models/schemas.ts @@ -81,10 +81,28 @@ export const WorkflowRuleSchema = z.object({ appendix: z.string().optional(), }); +/** Sub-step schema for parallel execution (agent is required) */ +export const ParallelSubStepRawSchema = z.object({ + name: z.string().min(1), + agent: z.string().min(1), + agent_name: z.string().optional(), + allowed_tools: z.array(z.string()).optional(), + provider: z.enum(['claude', 'codex', 'mock']).optional(), + model: z.string().optional(), + permission_mode: PermissionModeSchema.optional(), + edit: z.boolean().optional(), + instruction: z.string().optional(), + instruction_template: z.string().optional(), + rules: z.array(WorkflowRuleSchema).optional(), + report: ReportFieldSchema.optional(), + pass_previous_response: z.boolean().optional().default(true), +}); + /** Workflow step schema - raw YAML format */ export const WorkflowStepRawSchema = z.object({ name: z.string().min(1), - agent: z.string().min(1), + /** Agent is required for normal steps, optional for parallel container steps */ + agent: z.string().optional(), /** Display name for the agent (shown in output). Falls back to agent basename if not specified */ agent_name: z.string().optional(), allowed_tools: z.array(z.string()).optional(), @@ -101,7 +119,12 @@ export const WorkflowStepRawSchema = z.object({ /** Report file(s) for this step */ report: ReportFieldSchema.optional(), pass_previous_response: z.boolean().optional().default(true), -}); + /** Sub-steps to execute in parallel */ + parallel: z.array(ParallelSubStepRawSchema).optional(), +}).refine( + (data) => data.agent || (data.parallel && data.parallel.length > 0), + { message: 'Step must have either an agent or parallel sub-steps' }, +); /** Workflow configuration schema - raw YAML format */ export const WorkflowConfigRawSchema = z.object({ diff --git a/src/models/types.ts b/src/models/types.ts index 8bce74c..88efc90 100644 --- a/src/models/types.ts +++ b/src/models/types.ts @@ -48,6 +48,10 @@ export interface WorkflowRule { next: string; /** Template for additional AI output */ appendix?: string; + /** Whether this condition uses ai() expression (set by loader) */ + isAiCondition?: boolean; + /** The condition text inside ai("...") for AI judge evaluation (set by loader) */ + aiConditionText?: string; } /** Report file configuration for a workflow step (label: path pair) */ @@ -96,6 +100,8 @@ export interface WorkflowStep { /** Report file configuration. Single string, array of label:path, or object with order/format. */ report?: string | ReportConfig[] | ReportObjectConfig; passPreviousResponse: boolean; + /** Sub-steps to execute in parallel. When set, this step runs all sub-steps concurrently. */ + parallel?: WorkflowStep[]; } /** Loop detection configuration */ diff --git a/src/workflow/engine.ts b/src/workflow/engine.ts index 6e311e8..ae11961 100644 --- a/src/workflow/engine.ts +++ b/src/workflow/engine.ts @@ -15,7 +15,7 @@ import { runAgent, type RunAgentOptions } from '../agents/runner.js'; import { COMPLETE_STEP, ABORT_STEP, ERROR_MESSAGES } from './constants.js'; import type { WorkflowEngineOptions } from './types.js'; import { determineNextStepByRules } from './transitions.js'; -import { detectRuleIndex } from '../claude/client.js'; +import { detectRuleIndex, callAiJudge } from '../claude/client.js'; import { buildInstruction as buildInstructionFromTemplate, isReportObjectConfig } from './instruction-builder.js'; import { LoopDetector } from './loop-detector.js'; import { handleBlocked } from './blocked-handler.js'; @@ -196,22 +196,19 @@ export class WorkflowEngine extends EventEmitter { } } - /** Run a single step */ + /** Run a single step (delegates to runParallelStep if step has parallel sub-steps) */ private async runStep(step: WorkflowStep): Promise<{ response: AgentResponse; instruction: string }> { - const stepIteration = incrementStepIteration(this.state, step.name); - const instruction = this.buildInstruction(step, stepIteration); - const sessionId = this.state.agentSessions.get(step.agent); - log.debug('Running step', { - step: step.name, - agent: step.agent, - stepIteration, - iteration: this.state.iteration, - sessionId: sessionId ?? 'new', - }); + if (step.parallel && step.parallel.length > 0) { + return this.runParallelStep(step); + } + return this.runNormalStep(step); + } - const agentOptions: RunAgentOptions = { + /** Build RunAgentOptions from a step's configuration */ + private buildAgentOptions(step: WorkflowStep): RunAgentOptions { + return { cwd: this.cwd, - sessionId, + sessionId: this.state.agentSessions.get(step.agent), agentPath: step.agentPath, allowedTools: step.allowedTools, provider: step.provider, @@ -222,23 +219,61 @@ export class WorkflowEngine extends EventEmitter { onAskUserQuestion: this.options.onAskUserQuestion, bypassPermissions: this.options.bypassPermissions, }; + } - let response = await runAgent(step.agent, instruction, agentOptions); + /** Update agent session and notify via callback if session changed */ + private updateAgentSession(agent: string, sessionId: string | undefined): void { + if (!sessionId) return; - if (response.sessionId) { - const previousSessionId = this.state.agentSessions.get(step.agent); - this.state.agentSessions.set(step.agent, response.sessionId); + const previousSessionId = this.state.agentSessions.get(agent); + this.state.agentSessions.set(agent, sessionId); - if (this.options.onSessionUpdate && response.sessionId !== previousSessionId) { - this.options.onSessionUpdate(step.agent, response.sessionId); - } + if (this.options.onSessionUpdate && sessionId !== previousSessionId) { + this.options.onSessionUpdate(agent, sessionId); + } + } + + /** + * Detect matched rule for a step's response. + * 1. Try standard [STEP:N] tag detection + * 2. Fallback to ai() condition evaluation via AI judge + */ + private async detectMatchedRule(step: WorkflowStep, content: string): Promise { + if (!step.rules || step.rules.length === 0) return undefined; + + const ruleIndex = detectRuleIndex(content, step.name); + if (ruleIndex >= 0 && ruleIndex < step.rules.length) { + return ruleIndex; } - if (step.rules && step.rules.length > 0) { - const ruleIndex = detectRuleIndex(response.content, step.name); - if (ruleIndex >= 0 && ruleIndex < step.rules.length) { - response = { ...response, matchedRuleIndex: ruleIndex }; - } + const aiRuleIndex = await this.evaluateAiConditions(step, content); + if (aiRuleIndex >= 0) { + return aiRuleIndex; + } + + return undefined; + } + + /** Run a normal (non-parallel) step */ + private async runNormalStep(step: WorkflowStep): Promise<{ response: AgentResponse; instruction: string }> { + const stepIteration = incrementStepIteration(this.state, step.name); + const instruction = this.buildInstruction(step, stepIteration); + log.debug('Running step', { + step: step.name, + agent: step.agent, + stepIteration, + iteration: this.state.iteration, + sessionId: this.state.agentSessions.get(step.agent) ?? 'new', + }); + + const agentOptions = this.buildAgentOptions(step); + let response = await runAgent(step.agent, instruction, agentOptions); + + this.updateAgentSession(step.agent, response.sessionId); + + const matchedRuleIndex = await this.detectMatchedRule(step, response.content); + if (matchedRuleIndex != null) { + response = { ...response, matchedRuleIndex }; } this.state.stepOutputs.set(step.name, response); @@ -246,6 +281,110 @@ export class WorkflowEngine extends EventEmitter { return { response, instruction }; } + /** + * Run a parallel step: execute all sub-steps concurrently, then aggregate results. + * The aggregated output becomes the parent step's response for rules evaluation. + */ + private async runParallelStep(step: WorkflowStep): Promise<{ response: AgentResponse; instruction: string }> { + const subSteps = step.parallel!; + const stepIteration = incrementStepIteration(this.state, step.name); + log.debug('Running parallel step', { + step: step.name, + subSteps: subSteps.map(s => s.name), + stepIteration, + }); + + // Run all sub-steps concurrently + const subResults = await Promise.all( + subSteps.map(async (subStep) => { + const subIteration = incrementStepIteration(this.state, subStep.name); + const subInstruction = this.buildInstruction(subStep, subIteration); + + const agentOptions = this.buildAgentOptions(subStep); + const subResponse = await runAgent(subStep.agent, subInstruction, agentOptions); + + this.updateAgentSession(subStep.agent, subResponse.sessionId); + + // Detect sub-step rule matches (tag detection + ai() fallback) + const matchedRuleIndex = await this.detectMatchedRule(subStep, subResponse.content); + const finalResponse = matchedRuleIndex != null + ? { ...subResponse, matchedRuleIndex } + : subResponse; + + this.state.stepOutputs.set(subStep.name, finalResponse); + this.emitStepReports(subStep); + + return { subStep, response: finalResponse, instruction: subInstruction }; + }), + ); + + // Aggregate sub-step outputs into parent step's response + const aggregatedContent = subResults + .map((r) => `## ${r.subStep.name}\n${r.response.content}`) + .join('\n\n---\n\n'); + + const aggregatedInstruction = subResults + .map((r) => r.instruction) + .join('\n\n'); + + // Evaluate parent step's rules against aggregated output + const matchedRuleIndex = await this.detectMatchedRule(step, aggregatedContent); + + const aggregatedResponse: AgentResponse = { + agent: step.name, + status: 'done', + content: aggregatedContent, + timestamp: new Date(), + ...(matchedRuleIndex != null && { matchedRuleIndex }), + }; + + this.state.stepOutputs.set(step.name, aggregatedResponse); + this.emitStepReports(step); + return { response: aggregatedResponse, instruction: aggregatedInstruction }; + } + + /** + * Evaluate ai() conditions via AI judge. + * Collects all ai() rules, calls the judge, and maps the result back to the original rule index. + * Returns the 0-based rule index in the step's rules array, or -1 if no match. + */ + private async evaluateAiConditions(step: WorkflowStep, agentOutput: string): Promise { + if (!step.rules) return -1; + + const aiConditions: { index: number; text: string }[] = []; + for (let i = 0; i < step.rules.length; i++) { + const rule = step.rules[i]!; + if (rule.isAiCondition && rule.aiConditionText) { + aiConditions.push({ index: i, text: rule.aiConditionText }); + } + } + + if (aiConditions.length === 0) return -1; + + log.debug('Evaluating ai() conditions via judge', { + step: step.name, + conditionCount: aiConditions.length, + }); + + // Remap: judge returns 0-based index within aiConditions array + const judgeConditions = aiConditions.map((c, i) => ({ index: i, text: c.text })); + const judgeResult = await callAiJudge(agentOutput, judgeConditions, { cwd: this.cwd }); + + if (judgeResult >= 0 && judgeResult < aiConditions.length) { + const matched = aiConditions[judgeResult]!; + log.debug('AI judge matched condition', { + step: step.name, + judgeResult, + originalRuleIndex: matched.index, + condition: matched.text, + }); + return matched.index; + } + + log.debug('AI judge did not match any condition', { step: step.name }); + return -1; + } + /** * Determine next step for a completed step using rules-based routing. */ diff --git a/src/workflow/instruction-builder.ts b/src/workflow/instruction-builder.ts index cb952c5..71abf8d 100644 --- a/src/workflow/instruction-builder.ts +++ b/src/workflow/instruction-builder.ts @@ -494,10 +494,14 @@ export function buildInstruction( } // 7. Status rules (auto-generated from rules) + // Skip when ALL rules are ai() conditions — agent doesn't need to output status tags if (step.rules && step.rules.length > 0) { - const statusHeader = renderStatusRulesHeader(language); - const generatedPrompt = generateStatusRulesFromRules(step.name, step.rules, language); - sections.push(`${statusHeader}\n${generatedPrompt}`); + const allAiConditions = step.rules.every((r) => r.isAiCondition); + if (!allAiConditions) { + const statusHeader = renderStatusRulesHeader(language); + const generatedPrompt = generateStatusRulesFromRules(step.name, step.rules, language); + sections.push(`${statusHeader}\n${generatedPrompt}`); + } } return sections.join('\n\n');