From b10773d310667c133984f00d2a8fddc743019e54 Mon Sep 17 00:00:00 2001 From: nrslib <38722970+nrslib@users.noreply.github.com> Date: Fri, 30 Jan 2026 16:29:54 +0900 Subject: [PATCH] =?UTF-8?q?=E3=82=B9=E3=83=86=E3=83=BC=E3=82=BF=E3=82=B9?= =?UTF-8?q?=E5=88=A4=E5=AE=9A=E3=82=92Phase=203=E3=81=AB=E5=88=86=E9=9B=A2?= =?UTF-8?q?=E3=81=97=E3=80=81=E3=83=87=E3=83=83=E3=83=89=E3=82=B3=E3=83=BC?= =?UTF-8?q?=E3=83=89=E3=82=92=E6=95=B4=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - buildInstruction からステータスルール注入を除去(Phase 1はステータスタグなし) - buildStatusJudgmentInstruction を新設(Phase 3: セッション再開でステータスタグ出力) - detectMatchedRule のシグネチャを (agentContent, tagContent) に変更 - ルール存在時にマッチなしなら即座にthrow(Fail Fast) - runReportPhase / runStatusJudgmentPhase の共通部分を buildResumeOptions に抽出 - sessionId 欠落時のサイレントフォールバックをエラーに変更 - renderStatusRulesHeader / STATUS_RULES_HEADER_STRINGS を削除(デッドコード) - StatusJudgmentContext から未使用の cwd を削除 - Status 型および StatusSchema から未使用の in_progress を削除 --- src/__tests__/instructionBuilder.test.ts | 210 ++++++++++++++++---- src/__tests__/parallel-and-loader.test.ts | 221 ++++++++++++++++++++++ src/config/workflowLoader.ts | 27 ++- src/models/schemas.ts | 1 - src/models/types.ts | 7 +- src/workflow/engine.ts | 193 ++++++++++++++++--- src/workflow/instruction-builder.ts | 90 +++++---- 7 files changed, 636 insertions(+), 113 deletions(-) diff --git a/src/__tests__/instructionBuilder.test.ts b/src/__tests__/instructionBuilder.test.ts index d168624..285ad56 100644 --- a/src/__tests__/instructionBuilder.test.ts +++ b/src/__tests__/instructionBuilder.test.ts @@ -6,13 +6,14 @@ import { describe, it, expect } from 'vitest'; import { buildInstruction, buildReportInstruction, + buildStatusJudgmentInstruction, buildExecutionMetadata, renderExecutionMetadata, - renderStatusRulesHeader, generateStatusRulesFromRules, isReportObjectConfig, type InstructionContext, type ReportInstructionContext, + type StatusJudgmentContext, } from '../workflow/instruction-builder.js'; import type { WorkflowStep, WorkflowRule } from '../models/types.js'; @@ -296,30 +297,6 @@ describe('instruction-builder', () => { }); }); - describe('renderStatusRulesHeader', () => { - it('should render Japanese header when language is ja', () => { - const header = renderStatusRulesHeader('ja'); - - expect(header).toContain('# ⚠️ 必須: ステータス出力ルール ⚠️'); - expect(header).toContain('このタグがないとワークフローが停止します'); - expect(header).toContain('最終出力には必ず以下のルールに従ったステータスタグを含めてください'); - }); - - it('should render English header when language is en', () => { - const header = renderStatusRulesHeader('en'); - - expect(header).toContain('# ⚠️ Required: Status Output Rules ⚠️'); - expect(header).toContain('The workflow will stop without this tag'); - expect(header).toContain('Your final output MUST include a status tag'); - }); - - it('should end with trailing empty line', () => { - const header = renderStatusRulesHeader('en'); - - expect(header).toMatch(/\n$/); - }); - }); - describe('generateStatusRulesFromRules', () => { const rules: WorkflowRule[] = [ { condition: '要件が明確で実装可能', next: 'implement' }, @@ -385,8 +362,8 @@ describe('instruction-builder', () => { }); }); - describe('buildInstruction with rules', () => { - it('should auto-generate status rules from rules', () => { + describe('buildInstruction with rules (Phase 1 — no status tags)', () => { + it('should NOT include status rules even when rules exist (phase separation)', () => { const step = createMinimalStep('Do work'); step.name = 'plan'; step.rules = [ @@ -397,12 +374,10 @@ describe('instruction-builder', () => { const result = buildInstruction(step, context); - // Should contain status header - expect(result).toContain('⚠️ Required: Status Output Rules ⚠️'); - // Should contain auto-generated criteria table - expect(result).toContain('## Decision Criteria'); - expect(result).toContain('`[PLAN:1]`'); - expect(result).toContain('`[PLAN:2]`'); + // Phase 1 should NOT contain status header or criteria + expect(result).not.toContain('Status Output Rules'); + expect(result).not.toContain('Decision Criteria'); + expect(result).not.toContain('[PLAN:'); }); it('should not add status rules when rules do not exist', () => { @@ -411,7 +386,7 @@ describe('instruction-builder', () => { const result = buildInstruction(step, context); - expect(result).not.toContain('⚠️ Required'); + expect(result).not.toContain('Status Output Rules'); expect(result).not.toContain('Decision Criteria'); }); @@ -422,7 +397,7 @@ describe('instruction-builder', () => { const result = buildInstruction(step, context); - expect(result).not.toContain('⚠️ Required'); + expect(result).not.toContain('Status Output Rules'); expect(result).not.toContain('Decision Criteria'); }); }); @@ -884,8 +859,8 @@ describe('instruction-builder', () => { }); }); - describe('ai() condition status tag skip', () => { - it('should skip status rules when ALL rules are ai() conditions', () => { + describe('phase separation — buildInstruction never includes status rules', () => { + it('should NOT include status rules even with ai() conditions', () => { const step = createMinimalStep('Do work'); step.rules = [ { condition: 'ai("No issues")', next: 'COMPLETE', isAiCondition: true, aiConditionText: 'No issues' }, @@ -899,7 +874,7 @@ describe('instruction-builder', () => { expect(result).not.toContain('[TEST-STEP:'); }); - it('should include status rules when some rules are NOT ai() conditions', () => { + it('should NOT include status rules with mixed regular and ai() conditions', () => { const step = createMinimalStep('Do work'); step.rules = [ { condition: 'Error occurred', next: 'ABORT' }, @@ -909,10 +884,10 @@ describe('instruction-builder', () => { const result = buildInstruction(step, context); - expect(result).toContain('Status Output Rules'); + expect(result).not.toContain('Status Output Rules'); }); - it('should include status rules when no rules are ai() conditions', () => { + it('should NOT include status rules with regular conditions only', () => { const step = createMinimalStep('Do work'); step.rules = [ { condition: 'Done', next: 'COMPLETE' }, @@ -922,7 +897,47 @@ describe('instruction-builder', () => { const result = buildInstruction(step, context); - expect(result).toContain('Status Output Rules'); + expect(result).not.toContain('Status Output Rules'); + }); + + it('should NOT include status rules with aggregate conditions', () => { + const step = createMinimalStep('Do work'); + step.rules = [ + { condition: 'all("approved")', next: 'COMPLETE', isAggregateCondition: true, aggregateType: 'all' as const, aggregateConditionText: 'approved' }, + { condition: 'any("rejected")', next: 'fix', isAggregateCondition: true, aggregateType: 'any' as const, aggregateConditionText: 'rejected' }, + ]; + const context = createMinimalContext({ language: 'en' }); + + const result = buildInstruction(step, context); + + expect(result).not.toContain('Status Output Rules'); + }); + + it('should NOT include status rules with mixed ai() and aggregate conditions', () => { + const step = createMinimalStep('Do work'); + step.rules = [ + { condition: 'all("approved")', next: 'COMPLETE', isAggregateCondition: true, aggregateType: 'all' as const, aggregateConditionText: 'approved' }, + { condition: 'any("rejected")', next: 'fix', isAggregateCondition: true, aggregateType: 'any' as const, aggregateConditionText: 'rejected' }, + { condition: 'ai("Judgment needed")', next: 'manual', isAiCondition: true, aiConditionText: 'Judgment needed' }, + ]; + const context = createMinimalContext({ language: 'en' }); + + const result = buildInstruction(step, context); + + expect(result).not.toContain('Status Output Rules'); + }); + + it('should NOT include status rules with mixed aggregate and regular conditions', () => { + const step = createMinimalStep('Do work'); + step.rules = [ + { condition: 'all("approved")', next: 'COMPLETE', isAggregateCondition: true, aggregateType: 'all' as const, aggregateConditionText: 'approved' }, + { condition: 'Error occurred', next: 'ABORT' }, + ]; + const context = createMinimalContext({ language: 'en' }); + + const result = buildInstruction(step, context); + + expect(result).not.toContain('Status Output Rules'); }); }); @@ -943,4 +958,117 @@ describe('instruction-builder', () => { expect(isReportObjectConfig([{ label: 'Scope', path: '01-scope.md' }])).toBe(false); }); }); + + describe('buildStatusJudgmentInstruction (Phase 3)', () => { + function createJudgmentContext(overrides: Partial = {}): StatusJudgmentContext { + return { + language: 'en', + ...overrides, + }; + } + + it('should include header instruction (en)', () => { + const step = createMinimalStep('Do work'); + step.name = 'plan'; + step.rules = [ + { condition: 'Clear requirements', next: 'implement' }, + { condition: 'Unclear', next: 'ABORT' }, + ]; + const ctx = createJudgmentContext(); + + const result = buildStatusJudgmentInstruction(step, ctx); + + expect(result).toContain('Review your work results and determine the status'); + expect(result).toContain('Do NOT perform any additional work'); + }); + + it('should include header instruction (ja)', () => { + const step = createMinimalStep('Do work'); + step.name = 'plan'; + step.rules = [ + { condition: '要件が明確', next: 'implement' }, + { condition: '不明確', next: 'ABORT' }, + ]; + const ctx = createJudgmentContext({ language: 'ja' }); + + const result = buildStatusJudgmentInstruction(step, ctx); + + expect(result).toContain('作業結果を振り返り、ステータスを判定してください'); + expect(result).toContain('追加の作業は行わないでください'); + }); + + it('should include criteria table with tags', () => { + const step = createMinimalStep('Do work'); + step.name = 'plan'; + step.rules = [ + { condition: 'Clear requirements', next: 'implement' }, + { condition: 'Unclear', next: 'ABORT' }, + ]; + const ctx = createJudgmentContext(); + + const result = buildStatusJudgmentInstruction(step, ctx); + + expect(result).toContain('## Decision Criteria'); + expect(result).toContain('`[PLAN:1]`'); + expect(result).toContain('`[PLAN:2]`'); + }); + + it('should include output format section', () => { + const step = createMinimalStep('Do work'); + step.name = 'review'; + step.rules = [ + { condition: 'Approved', next: 'COMPLETE' }, + { condition: 'Rejected', next: 'fix' }, + ]; + const ctx = createJudgmentContext(); + + const result = buildStatusJudgmentInstruction(step, ctx); + + expect(result).toContain('## Output Format'); + expect(result).toContain('`[REVIEW:1]` — Approved'); + expect(result).toContain('`[REVIEW:2]` — Rejected'); + }); + + it('should throw error when step has no rules', () => { + const step = createMinimalStep('Do work'); + const ctx = createJudgmentContext(); + + expect(() => buildStatusJudgmentInstruction(step, ctx)).toThrow('no rules'); + }); + + it('should throw error when step has empty rules', () => { + const step = createMinimalStep('Do work'); + step.rules = []; + const ctx = createJudgmentContext(); + + expect(() => buildStatusJudgmentInstruction(step, ctx)).toThrow('no rules'); + }); + + it('should default language to en', () => { + const step = createMinimalStep('Do work'); + step.name = 'test'; + step.rules = [{ condition: 'Done', next: 'COMPLETE' }]; + const ctx: StatusJudgmentContext = {}; + + const result = buildStatusJudgmentInstruction(step, ctx); + + expect(result).toContain('Review your work results'); + expect(result).toContain('## Decision Criteria'); + }); + + it('should include appendix template when rules have appendix', () => { + const step = createMinimalStep('Do work'); + step.name = 'plan'; + step.rules = [ + { condition: 'Done', next: 'COMPLETE' }, + { condition: 'Blocked', next: 'ABORT', appendix: '確認事項:\n- {質問1}' }, + ]; + const ctx = createJudgmentContext(); + + const result = buildStatusJudgmentInstruction(step, ctx); + + expect(result).toContain('Appendix Template'); + expect(result).toContain('確認事項:'); + }); + }); }); diff --git a/src/__tests__/parallel-and-loader.test.ts b/src/__tests__/parallel-and-loader.test.ts index b6dfde7..b8a53e3 100644 --- a/src/__tests__/parallel-and-loader.test.ts +++ b/src/__tests__/parallel-and-loader.test.ts @@ -252,6 +252,227 @@ describe('ai() condition regex parsing', () => { }); }); +describe('all()/any() aggregate condition regex parsing', () => { + const AGGREGATE_CONDITION_REGEX = /^(all|any)\("(.+)"\)$/; + + it('should match all() condition', () => { + const match = 'all("approved")'.match(AGGREGATE_CONDITION_REGEX); + expect(match).not.toBeNull(); + expect(match![1]).toBe('all'); + expect(match![2]).toBe('approved'); + }); + + it('should match any() condition', () => { + const match = 'any("rejected")'.match(AGGREGATE_CONDITION_REGEX); + expect(match).not.toBeNull(); + expect(match![1]).toBe('any'); + expect(match![2]).toBe('rejected'); + }); + + it('should match with Japanese text', () => { + const match = 'all("承認済み")'.match(AGGREGATE_CONDITION_REGEX); + expect(match).not.toBeNull(); + expect(match![1]).toBe('all'); + expect(match![2]).toBe('承認済み'); + }); + + it('should not match regular condition text', () => { + expect('approved'.match(AGGREGATE_CONDITION_REGEX)).toBeNull(); + }); + + it('should not match ai() condition', () => { + expect('ai("something")'.match(AGGREGATE_CONDITION_REGEX)).toBeNull(); + }); + + it('should not match invalid patterns', () => { + expect('all(missing quotes)'.match(AGGREGATE_CONDITION_REGEX)).toBeNull(); + expect('all("")'.match(AGGREGATE_CONDITION_REGEX)).toBeNull(); + expect('not all("text")'.match(AGGREGATE_CONDITION_REGEX)).toBeNull(); + expect('all("text") extra'.match(AGGREGATE_CONDITION_REGEX)).toBeNull(); + expect('ALL("text")'.match(AGGREGATE_CONDITION_REGEX)).toBeNull(); + }); + + it('should match with special characters in text', () => { + const match = 'any("issues found (critical)")'.match(AGGREGATE_CONDITION_REGEX); + expect(match).not.toBeNull(); + expect(match![2]).toBe('issues found (critical)'); + }); +}); + +describe('all()/any() condition in WorkflowStepRawSchema', () => { + it('should accept all() condition as a string', () => { + const raw = { + name: 'parallel-review', + parallel: [ + { name: 'arch-review', agent: 'reviewer.md', instruction_template: 'Review' }, + ], + rules: [ + { condition: 'all("approved")', next: 'COMPLETE' }, + { condition: 'any("rejected")', next: 'fix' }, + ], + }; + + const result = WorkflowStepRawSchema.safeParse(raw); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.rules?.[0].condition).toBe('all("approved")'); + expect(result.data.rules?.[1].condition).toBe('any("rejected")'); + } + }); + + it('should accept mixed regular, ai(), and all()/any() conditions', () => { + const raw = { + name: 'mixed-rules', + parallel: [ + { name: 'sub', agent: 'agent.md' }, + ], + rules: [ + { condition: 'all("approved")', next: 'COMPLETE' }, + { condition: 'any("rejected")', next: 'fix' }, + { condition: 'ai("Difficult judgment")', next: 'manual-review' }, + ], + }; + + const result = WorkflowStepRawSchema.safeParse(raw); + expect(result.success).toBe(true); + }); +}); + +describe('aggregate condition evaluation logic', () => { + // Simulate the evaluation logic from engine.ts + type SubResult = { name: string; matchedRuleIndex?: number; rules?: { condition: string }[] }; + + function evaluateAggregate( + aggregateType: 'all' | 'any', + targetCondition: string, + subSteps: SubResult[], + ): boolean { + if (subSteps.length === 0) return false; + + if (aggregateType === 'all') { + return subSteps.every((sub) => { + if (sub.matchedRuleIndex == null || !sub.rules) return false; + const matchedRule = sub.rules[sub.matchedRuleIndex]; + return matchedRule?.condition === targetCondition; + }); + } + // 'any' + return subSteps.some((sub) => { + if (sub.matchedRuleIndex == null || !sub.rules) return false; + const matchedRule = sub.rules[sub.matchedRuleIndex]; + return matchedRule?.condition === targetCondition; + }); + } + + const rules = [ + { condition: 'approved' }, + { condition: 'rejected' }, + ]; + + it('all(): true when all sub-steps match', () => { + const subs: SubResult[] = [ + { name: 'a', matchedRuleIndex: 0, rules }, + { name: 'b', matchedRuleIndex: 0, rules }, + ]; + expect(evaluateAggregate('all', 'approved', subs)).toBe(true); + }); + + it('all(): false when some sub-steps do not match', () => { + const subs: SubResult[] = [ + { name: 'a', matchedRuleIndex: 0, rules }, + { name: 'b', matchedRuleIndex: 1, rules }, + ]; + expect(evaluateAggregate('all', 'approved', subs)).toBe(false); + }); + + it('all(): false when sub-step has no matched rule', () => { + const subs: SubResult[] = [ + { name: 'a', matchedRuleIndex: 0, rules }, + { name: 'b', matchedRuleIndex: undefined, rules }, + ]; + expect(evaluateAggregate('all', 'approved', subs)).toBe(false); + }); + + it('all(): false when sub-step has no rules', () => { + const subs: SubResult[] = [ + { name: 'a', matchedRuleIndex: 0, rules }, + { name: 'b', matchedRuleIndex: 0, rules: undefined }, + ]; + expect(evaluateAggregate('all', 'approved', subs)).toBe(false); + }); + + it('all(): false with zero sub-steps', () => { + expect(evaluateAggregate('all', 'approved', [])).toBe(false); + }); + + it('any(): true when one sub-step matches', () => { + const subs: SubResult[] = [ + { name: 'a', matchedRuleIndex: 0, rules }, + { name: 'b', matchedRuleIndex: 1, rules }, + ]; + expect(evaluateAggregate('any', 'rejected', subs)).toBe(true); + }); + + it('any(): true when all sub-steps match', () => { + const subs: SubResult[] = [ + { name: 'a', matchedRuleIndex: 1, rules }, + { name: 'b', matchedRuleIndex: 1, rules }, + ]; + expect(evaluateAggregate('any', 'rejected', subs)).toBe(true); + }); + + it('any(): false when no sub-steps match', () => { + const subs: SubResult[] = [ + { name: 'a', matchedRuleIndex: 0, rules }, + { name: 'b', matchedRuleIndex: 0, rules }, + ]; + expect(evaluateAggregate('any', 'rejected', subs)).toBe(false); + }); + + it('any(): false with zero sub-steps', () => { + expect(evaluateAggregate('any', 'rejected', [])).toBe(false); + }); + + it('any(): skips sub-steps without matched rule (does not count as match)', () => { + const subs: SubResult[] = [ + { name: 'a', matchedRuleIndex: undefined, rules }, + { name: 'b', matchedRuleIndex: 1, rules }, + ]; + expect(evaluateAggregate('any', 'rejected', subs)).toBe(true); + }); + + it('any(): false when only unmatched sub-steps exist', () => { + const subs: SubResult[] = [ + { name: 'a', matchedRuleIndex: undefined, rules }, + { name: 'b', matchedRuleIndex: undefined, rules }, + ]; + expect(evaluateAggregate('any', 'rejected', subs)).toBe(false); + }); + + it('evaluation priority: first matching aggregate rule wins', () => { + const parentRules = [ + { type: 'all' as const, condition: 'approved' }, + { type: 'any' as const, condition: 'rejected' }, + ]; + const subs: SubResult[] = [ + { name: 'a', matchedRuleIndex: 0, rules }, + { name: 'b', matchedRuleIndex: 0, rules }, + ]; + + // Find the first matching rule + let matchedIndex = -1; + for (let i = 0; i < parentRules.length; i++) { + const r = parentRules[i]!; + if (evaluateAggregate(r.type, r.condition, subs)) { + matchedIndex = i; + break; + } + } + + expect(matchedIndex).toBe(0); // all("approved") matches first + }); +}); + describe('parallel step aggregation format', () => { it('should aggregate sub-step outputs in the expected format', () => { // Mirror the aggregation logic from engine.ts diff --git a/src/config/workflowLoader.ts b/src/config/workflowLoader.ts index 0688512..a7bc5c1 100644 --- a/src/config/workflowLoader.ts +++ b/src/config/workflowLoader.ts @@ -123,21 +123,38 @@ function normalizeReport( /** Regex to detect ai("...") condition expressions */ const AI_CONDITION_REGEX = /^ai\("(.+)"\)$/; +/** Regex to detect all("...")/any("...") aggregate condition expressions */ +const AGGREGATE_CONDITION_REGEX = /^(all|any)\("(.+)"\)$/; + /** - * Parse a rule's condition for ai() expressions. - * If condition is `ai("some text")`, sets isAiCondition and aiConditionText. + * Parse a rule's condition for ai() and all()/any() expressions. + * - `ai("text")` → sets isAiCondition and aiConditionText + * - `all("text")` / `any("text")` → sets isAggregateCondition, aggregateType, aggregateConditionText */ function normalizeRule(r: { condition: string; next: string; appendix?: string }): WorkflowRule { - const match = r.condition.match(AI_CONDITION_REGEX); - if (match?.[1]) { + const aiMatch = r.condition.match(AI_CONDITION_REGEX); + if (aiMatch?.[1]) { return { condition: r.condition, next: r.next, appendix: r.appendix, isAiCondition: true, - aiConditionText: match[1], + aiConditionText: aiMatch[1], }; } + + const aggMatch = r.condition.match(AGGREGATE_CONDITION_REGEX); + if (aggMatch?.[1] && aggMatch[2]) { + return { + condition: r.condition, + next: r.next, + appendix: r.appendix, + isAggregateCondition: true, + aggregateType: aggMatch[1] as 'all' | 'any', + aggregateConditionText: aggMatch[2], + }; + } + return { condition: r.condition, next: r.next, diff --git a/src/models/schemas.ts b/src/models/schemas.ts index 4ec6ef8..502adc5 100644 --- a/src/models/schemas.ts +++ b/src/models/schemas.ts @@ -13,7 +13,6 @@ export const AgentTypeSchema = z.enum(['coder', 'architect', 'supervisor', 'cust /** Status schema */ export const StatusSchema = z.enum([ 'pending', - 'in_progress', 'done', 'blocked', 'approved', diff --git a/src/models/types.ts b/src/models/types.ts index 88efc90..7db34e1 100644 --- a/src/models/types.ts +++ b/src/models/types.ts @@ -8,7 +8,6 @@ export type AgentType = 'coder' | 'architect' | 'supervisor' | 'custom'; /** Execution status for agents and workflows */ export type Status = | 'pending' - | 'in_progress' | 'done' | 'blocked' | 'approved' @@ -52,6 +51,12 @@ export interface WorkflowRule { isAiCondition?: boolean; /** The condition text inside ai("...") for AI judge evaluation (set by loader) */ aiConditionText?: string; + /** Whether this condition uses all()/any() aggregate expression (set by loader) */ + isAggregateCondition?: boolean; + /** Aggregate type: 'all' requires all sub-steps match, 'any' requires at least one (set by loader) */ + aggregateType?: 'all' | 'any'; + /** The condition text inside all("...")/any("...") to match against sub-step results (set by loader) */ + aggregateConditionText?: string; } /** Report file configuration for a workflow step (label: path pair) */ diff --git a/src/workflow/engine.ts b/src/workflow/engine.ts index 34a5c43..612e0d4 100644 --- a/src/workflow/engine.ts +++ b/src/workflow/engine.ts @@ -16,7 +16,7 @@ import { COMPLETE_STEP, ABORT_STEP, ERROR_MESSAGES } from './constants.js'; import type { WorkflowEngineOptions } from './types.js'; import { determineNextStepByRules } from './transitions.js'; import { detectRuleIndex, callAiJudge } from '../claude/client.js'; -import { buildInstruction as buildInstructionFromTemplate, buildReportInstruction as buildReportInstructionFromTemplate, isReportObjectConfig } from './instruction-builder.js'; +import { buildInstruction as buildInstructionFromTemplate, buildReportInstruction as buildReportInstructionFromTemplate, buildStatusJudgmentInstruction as buildStatusJudgmentInstructionFromTemplate, isReportObjectConfig } from './instruction-builder.js'; import { LoopDetector } from './loop-detector.js'; import { handleBlocked } from './blocked-handler.js'; import { @@ -226,6 +226,27 @@ export class WorkflowEngine extends EventEmitter { }; } + /** + * Build RunAgentOptions for session-resume phases (Phase 2, Phase 3). + * Shares common fields with the original step's agent config. + */ + private buildResumeOptions(step: WorkflowStep, sessionId: string, overrides: Pick): RunAgentOptions { + return { + cwd: this.cwd, + sessionId, + agentPath: step.agentPath, + allowedTools: overrides.allowedTools, + maxTurns: overrides.maxTurns, + provider: step.provider, + model: step.model, + permissionMode: step.permissionMode, + onStream: this.options.onStream, + onPermissionRequest: this.options.onPermissionRequest, + onAskUserQuestion: this.options.onAskUserQuestion, + bypassPermissions: this.options.bypassPermissions, + }; + } + /** Update agent session and notify via callback if session changed */ private updateAgentSession(agent: string, sessionId: string | undefined): void { if (!sessionId) return; @@ -240,23 +261,96 @@ export class WorkflowEngine extends EventEmitter { /** * Detect matched rule for a step's response. - * 1. Try standard [STEP:N] tag detection - * 2. Fallback to ai() condition evaluation via AI judge + * Evaluation order (first match wins): + * 1. Aggregate conditions: all()/any() — evaluate sub-step results + * 2. Standard [STEP:N] tag detection (from tagContent, i.e. Phase 3 output) + * 3. ai() condition evaluation via AI judge (from agentContent, i.e. Phase 1 output) + * + * Returns undefined for steps without rules. + * Throws if rules exist but no rule matched (Fail Fast). + * + * @param step - The workflow step + * @param agentContent - Phase 1 output (main execution) + * @param tagContent - Phase 3 output (status judgment); empty string skips tag detection */ - private async detectMatchedRule(step: WorkflowStep, content: string): Promise { + private async detectMatchedRule(step: WorkflowStep, agentContent: string, tagContent: string): Promise { if (!step.rules || step.rules.length === 0) return undefined; - const ruleIndex = detectRuleIndex(content, step.name); - if (ruleIndex >= 0 && ruleIndex < step.rules.length) { - return ruleIndex; + // 1. Aggregate conditions (all/any) — only meaningful for parallel parent steps + const aggIndex = this.evaluateAggregateConditions(step); + if (aggIndex >= 0) { + return aggIndex; } - const aiRuleIndex = await this.evaluateAiConditions(step, content); + // 2. Standard tag detection (from Phase 3 output) + if (tagContent) { + const ruleIndex = detectRuleIndex(tagContent, step.name); + if (ruleIndex >= 0 && ruleIndex < step.rules.length) { + return ruleIndex; + } + } + + // 3. AI judge fallback (from Phase 1 output) + const aiRuleIndex = await this.evaluateAiConditions(step, agentContent); if (aiRuleIndex >= 0) { return aiRuleIndex; } - return undefined; + throw new Error(`Status not found for step "${step.name}": no rule matched after all detection phases`); + } + + /** + * Evaluate aggregate conditions (all()/any()) against sub-step results. + * Returns the 0-based rule index in the step's rules array, or -1 if no match. + * + * For each aggregate rule, checks the matched condition text of sub-steps: + * - all("X"): true when ALL sub-steps have matched condition === X + * - any("X"): true when at least ONE sub-step has matched condition === X + * + * Edge cases per spec: + * - Sub-step with no matched rule: all() → false, any() → skip that sub-step + * - No sub-steps (0 件): both → false + * - Non-parallel step: both → false + */ + private evaluateAggregateConditions(step: WorkflowStep): number { + if (!step.rules || !step.parallel || step.parallel.length === 0) return -1; + + for (let i = 0; i < step.rules.length; i++) { + const rule = step.rules[i]!; + if (!rule.isAggregateCondition || !rule.aggregateType || !rule.aggregateConditionText) { + continue; + } + + const subSteps = step.parallel; + const targetCondition = rule.aggregateConditionText; + + if (rule.aggregateType === 'all') { + const allMatch = subSteps.every((sub) => { + const output = this.state.stepOutputs.get(sub.name); + if (!output || output.matchedRuleIndex == null || !sub.rules) return false; + const matchedRule = sub.rules[output.matchedRuleIndex]; + return matchedRule?.condition === targetCondition; + }); + if (allMatch) { + log.debug('Aggregate all() matched', { step: step.name, condition: targetCondition, ruleIndex: i }); + return i; + } + } else { + // 'any' + const anyMatch = subSteps.some((sub) => { + const output = this.state.stepOutputs.get(sub.name); + if (!output || output.matchedRuleIndex == null || !sub.rules) return false; + const matchedRule = sub.rules[output.matchedRuleIndex]; + return matchedRule?.condition === targetCondition; + }); + if (anyMatch) { + log.debug('Aggregate any() matched', { step: step.name, condition: targetCondition, ruleIndex: i }); + return i; + } + } + } + + return -1; } /** Run a normal (non-parallel) step */ @@ -281,8 +375,13 @@ export class WorkflowEngine extends EventEmitter { await this.runReportPhase(step, stepIteration); } - // Status detection uses phase 1 response - const matchedRuleIndex = await this.detectMatchedRule(step, response.content); + // Phase 3: status judgment (resume session, no tools, output status tag) + let tagContent = ''; + if (this.needsStatusJudgmentPhase(step)) { + tagContent = await this.runStatusJudgmentPhase(step); + } + + const matchedRuleIndex = await this.detectMatchedRule(step, response.content, tagContent); if (matchedRuleIndex != null) { response = { ...response, matchedRuleIndex }; } @@ -300,8 +399,7 @@ export class WorkflowEngine extends EventEmitter { private async runReportPhase(step: WorkflowStep, stepIteration: number): Promise { const sessionId = this.state.agentSessions.get(step.agent); if (!sessionId) { - log.debug('Skipping report phase: no sessionId to resume', { step: step.name }); - return; + throw new Error(`Report phase requires a session to resume, but no sessionId found for agent "${step.agent}" in step "${step.name}"`); } log.debug('Running report phase', { step: step.name, sessionId }); @@ -313,20 +411,10 @@ export class WorkflowEngine extends EventEmitter { language: this.language, }); - const reportOptions: RunAgentOptions = { - cwd: this.cwd, - sessionId, - agentPath: step.agentPath, + const reportOptions = this.buildResumeOptions(step, sessionId, { allowedTools: ['Write'], maxTurns: 3, - provider: step.provider, - model: step.model, - permissionMode: step.permissionMode, - onStream: this.options.onStream, - onPermissionRequest: this.options.onPermissionRequest, - onAskUserQuestion: this.options.onAskUserQuestion, - bypassPermissions: this.options.bypassPermissions, - }; + }); const reportResponse = await runAgent(step.agent, reportInstruction, reportOptions); @@ -336,6 +424,48 @@ export class WorkflowEngine extends EventEmitter { log.debug('Report phase complete', { step: step.name, status: reportResponse.status }); } + /** + * Check if a step needs Phase 3 (status judgment). + * Returns true when at least one rule requires tag-based detection + * (i.e., not all rules are ai() or aggregate conditions). + */ + private needsStatusJudgmentPhase(step: WorkflowStep): boolean { + if (!step.rules || step.rules.length === 0) return false; + const allNonTagConditions = step.rules.every((r) => r.isAiCondition || r.isAggregateCondition); + return !allNonTagConditions; + } + + /** + * Phase 3: Status judgment. + * Resumes the agent session with no tools to ask the agent to output a status tag. + * Returns the Phase 3 response content (containing the status tag). + */ + private async runStatusJudgmentPhase(step: WorkflowStep): Promise { + const sessionId = this.state.agentSessions.get(step.agent); + if (!sessionId) { + throw new Error(`Status judgment phase requires a session to resume, but no sessionId found for agent "${step.agent}" in step "${step.name}"`); + } + + log.debug('Running status judgment phase', { step: step.name, sessionId }); + + const judgmentInstruction = buildStatusJudgmentInstructionFromTemplate(step, { + language: this.language, + }); + + const judgmentOptions = this.buildResumeOptions(step, sessionId, { + allowedTools: [], + maxTurns: 1, + }); + + const judgmentResponse = await runAgent(step.agent, judgmentInstruction, judgmentOptions); + + // Update session (phase 3 may update it) + this.updateAgentSession(step.agent, judgmentResponse.sessionId); + + log.debug('Status judgment phase complete', { step: step.name, status: judgmentResponse.status }); + return judgmentResponse.content; + } + /** * Run a parallel step: execute all sub-steps concurrently, then aggregate results. * The aggregated output becomes the parent step's response for rules evaluation. @@ -365,8 +495,13 @@ export class WorkflowEngine extends EventEmitter { await this.runReportPhase(subStep, subIteration); } - // Detect sub-step rule matches (tag detection + ai() fallback) - const matchedRuleIndex = await this.detectMatchedRule(subStep, subResponse.content); + // Phase 3: status judgment for sub-step + let subTagContent = ''; + if (this.needsStatusJudgmentPhase(subStep)) { + subTagContent = await this.runStatusJudgmentPhase(subStep); + } + + const matchedRuleIndex = await this.detectMatchedRule(subStep, subResponse.content, subTagContent); const finalResponse = matchedRuleIndex != null ? { ...subResponse, matchedRuleIndex } : subResponse; @@ -387,8 +522,8 @@ export class WorkflowEngine extends EventEmitter { .map((r) => r.instruction) .join('\n\n'); - // Evaluate parent step's rules against aggregated output - const matchedRuleIndex = await this.detectMatchedRule(step, aggregatedContent); + // Parent step uses aggregate conditions, so tagContent is empty + const matchedRuleIndex = await this.detectMatchedRule(step, aggregatedContent, ''); const aggregatedResponse: AgentResponse = { agent: step.name, diff --git a/src/workflow/instruction-builder.ts b/src/workflow/instruction-builder.ts index 1dabef0..487a991 100644 --- a/src/workflow/instruction-builder.ts +++ b/src/workflow/instruction-builder.ts @@ -5,7 +5,8 @@ * 1. Auto-injecting standard sections (Execution Context, Workflow Context, * User Request, Previous Response, Additional User Inputs, Instructions header) * 2. Replacing template placeholders with actual values - * 3. Appending auto-generated status rules from workflow rules + * + * Status judgment is handled separately in Phase 3 (buildStatusJudgmentInstruction). */ import type { WorkflowStep, WorkflowRule, AgentResponse, Language, ReportConfig, ReportObjectConfig } from '../models/types.js'; @@ -60,29 +61,6 @@ export function buildExecutionMetadata(context: InstructionContext, edit?: boole }; } -/** Localized strings for status rules header */ -const STATUS_RULES_HEADER_STRINGS = { - en: { - heading: '# ⚠️ Required: Status Output Rules ⚠️', - warning: '**The workflow will stop without this tag.**', - instruction: 'Your final output MUST include a status tag following the rules below.', - }, - ja: { - heading: '# ⚠️ 必須: ステータス出力ルール ⚠️', - warning: '**このタグがないとワークフローが停止します。**', - instruction: '最終出力には必ず以下のルールに従ったステータスタグを含めてください。', - }, -} as const; - -/** - * Render status rules header. - * Prepended to auto-generated status rules from workflow rules. - */ -export function renderStatusRulesHeader(language: Language): string { - const strings = STATUS_RULES_HEADER_STRINGS[language]; - return [strings.heading, '', strings.warning, strings.instruction, ''].join('\n'); -} - /** Localized strings for rules-based status prompt */ const RULES_PROMPT_STRINGS = { en: { @@ -428,7 +406,8 @@ function replaceTemplatePlaceholders( * 4. Previous Response — if passPreviousResponse and has content, unless template contains {previous_response} * 5. Additional User Inputs — unless template contains {user_inputs} * 6. Instructions header + instruction_template content — always - * 7. Status Output Rules — if rules exist + * + * Status judgment is handled separately in Phase 3 (buildStatusJudgmentInstruction). * * Template placeholders ({task}, {previous_response}, etc.) are still replaced * within the instruction_template body for backward compatibility. @@ -483,17 +462,6 @@ export function buildInstruction( ); sections.push(`${s.instructions}\n${processedTemplate}`); - // 7. Status rules (auto-generated from rules) - // Skip when ALL rules are ai() conditions — agent doesn't need to output status tags - if (step.rules && step.rules.length > 0) { - const allAiConditions = step.rules.every((r) => r.isAiCondition); - if (!allAiConditions) { - const statusHeader = renderStatusRulesHeader(language); - const generatedPrompt = generateStatusRulesFromRules(step.name, step.rules, language); - sections.push(`${statusHeader}\n${generatedPrompt}`); - } - } - return sections.join('\n\n'); } @@ -613,3 +581,53 @@ export function buildReportInstruction( return sections.join('\n\n'); } + +/** Localized strings for status judgment phase (Phase 3) */ +const STATUS_JUDGMENT_STRINGS = { + en: { + header: 'Review your work results and determine the status. Do NOT perform any additional work.', + }, + ja: { + header: '作業結果を振り返り、ステータスを判定してください。追加の作業は行わないでください。', + }, +} as const; + +/** + * Context for building status judgment instruction (Phase 3). + */ +export interface StatusJudgmentContext { + /** Language */ + language?: Language; +} + +/** + * Build instruction for Phase 3 (status judgment). + * + * Resumes the agent session and asks it to evaluate its work + * and output the appropriate status tag. No tools are allowed. + * + * Includes: + * - Header instruction (review and determine status) + * - Status rules (criteria table + output format) from generateStatusRulesFromRules() + */ +export function buildStatusJudgmentInstruction( + step: WorkflowStep, + context: StatusJudgmentContext, +): string { + if (!step.rules || step.rules.length === 0) { + throw new Error(`buildStatusJudgmentInstruction called for step "${step.name}" which has no rules`); + } + + const language = context.language ?? 'en'; + const s = STATUS_JUDGMENT_STRINGS[language]; + const sections: string[] = []; + + // Header + sections.push(s.header); + + // Status rules (criteria table + output format) + const generatedPrompt = generateStatusRulesFromRules(step.name, step.rules, language); + sections.push(generatedPrompt); + + return sections.join('\n\n'); +}