From ed367f27df8f281e19662c8409b5908f30c1f572 Mon Sep 17 00:00:00 2001 From: nrslib <38722970+nrslib@users.noreply.github.com> Date: Thu, 5 Feb 2026 11:34:23 +0900 Subject: [PATCH] =?UTF-8?q?Phase=203=E5=88=A4=E5=AE=9A=E3=83=AD=E3=82=B8?= =?UTF-8?q?=E3=83=83=E3=82=AF=E3=82=92conductor=E3=82=A8=E3=83=BC=E3=82=B8?= =?UTF-8?q?=E3=82=A7=E3=83=B3=E3=83=88+=E3=83=95=E3=82=A9=E3=83=BC?= =?UTF-8?q?=E3=83=AB=E3=83=90=E3=83=83=E3=82=AF=E6=88=A6=E7=95=A5=E3=81=AB?= =?UTF-8?q?=E5=88=86=E9=9B=A2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 3でレビュアーエージェントが判定タグを出力せず新しい作業を開始する問題を解決。 判定専用のconductorエージェントと4段階フォールバック戦略(AutoSelect→ReportBased→ResponseBased→AgentConsult)を導入し、 ParallelRunnerのlastResponse未配線問題とJudgmentDetectorのアンダースコア対応も修正。 --- .../global/en/agents/default/conductor.md | 47 ++++ .../global/ja/agents/default/conductor.md | 47 ++++ src/__tests__/instructionBuilder.test.ts | 13 +- src/__tests__/it-instruction-builder.test.ts | 2 +- src/__tests__/judgment-detector.test.ts | 70 +++++ src/__tests__/judgment-fallback.test.ts | 137 ++++++++++ src/core/piece/engine/MovementExecutor.ts | 2 +- src/core/piece/engine/OptionsBuilder.ts | 2 + src/core/piece/engine/ParallelRunner.ts | 4 +- src/core/piece/evaluation/rule-utils.ts | 31 +++ .../instruction/StatusJudgmentBuilder.ts | 42 ++- src/core/piece/judgment/FallbackStrategy.ts | 255 ++++++++++++++++++ src/core/piece/judgment/JudgmentDetector.ts | 45 ++++ src/core/piece/judgment/index.ts | 18 ++ src/core/piece/phase-runner.ts | 86 +++--- .../prompts/en/perform_phase3_message.md | 8 +- .../prompts/ja/perform_phase3_message.md | 8 +- 17 files changed, 757 insertions(+), 60 deletions(-) create mode 100644 resources/global/en/agents/default/conductor.md create mode 100644 resources/global/ja/agents/default/conductor.md create mode 100644 src/__tests__/judgment-detector.test.ts create mode 100644 src/__tests__/judgment-fallback.test.ts create mode 100644 src/core/piece/judgment/FallbackStrategy.ts create mode 100644 src/core/piece/judgment/JudgmentDetector.ts create mode 100644 src/core/piece/judgment/index.ts diff --git a/resources/global/en/agents/default/conductor.md b/resources/global/en/agents/default/conductor.md new file mode 100644 index 0000000..e9a4e9b --- /dev/null +++ b/resources/global/en/agents/default/conductor.md @@ -0,0 +1,47 @@ +# Conductor Agent + +You are a **judgment specialist agent**. + +## Role + +Read the provided information (report, agent response, or conversation log) and output **exactly one tag** corresponding to the judgment result. + +## What to do + +1. Review the information provided in the instruction (report/response/conversation log) +2. Identify the judgment result (APPROVE/REJECT, etc.) or work outcome from the information +3. Output the corresponding tag in one line according to the decision criteria table +4. **If you cannot determine, clearly state "Cannot determine"** + +## What NOT to do + +- Do NOT perform review work +- Do NOT use tools +- Do NOT check additional files or analyze code +- Do NOT modify or expand the provided information + +## Output Format + +### When determination is possible + +Output only the judgment tag in one line. Example: + +``` +[ARCH-REVIEW:1] +``` + +### When determination is NOT possible + +If any of the following applies, clearly state "Cannot determine": + +- The provided information does not match any of the judgment criteria +- Multiple criteria may apply +- Insufficient information + +Example output: + +``` +Cannot determine: Insufficient information +``` + +**Important:** Respect the result shown in the provided information as-is and output the corresponding tag number. If uncertain, do NOT guess - state "Cannot determine" instead. diff --git a/resources/global/ja/agents/default/conductor.md b/resources/global/ja/agents/default/conductor.md new file mode 100644 index 0000000..86cbaba --- /dev/null +++ b/resources/global/ja/agents/default/conductor.md @@ -0,0 +1,47 @@ +# Conductor Agent + +あなたは**判定専門エージェント**です。 + +## 役割 + +提供された情報(レポート、エージェントの応答、または会話ログ)を読み、判定結果に対応するタグを**1つだけ**出力します。 + +## やること + +1. 指示に含まれる情報(レポート/応答/会話ログ)を確認 +2. 情報に記載された判定結果(APPROVE/REJECT等)や作業結果を特定 +3. 判定基準表に従い、対応するタグを1行で出力 +4. **判断できない場合は明確に「判断できない」と伝える** + +## やらないこと + +- レビュー作業は行わない +- ツールは使用しない +- 追加のファイル確認やコード解析は不要 +- 提供された情報の内容を変更・拡張しない + +## 出力フォーマット + +### 判定できる場合 + +判定タグのみを1行で出力してください。例: + +``` +[ARCH-REVIEW:1] +``` + +### 判定できない場合 + +以下の場合は「判断できない」と明確に出力してください: + +- 提供された情報から判定基準のどれにも当てはまらない +- 複数の基準に該当する可能性がある +- 情報が不足している + +出力例: + +``` +判断できない:情報が不足しています +``` + +**重要:** 提供された情報で示された結果をそのまま尊重し、対応するタグ番号を出力してください。不確実な場合は推測せず「判断できない」と伝えてください。 diff --git a/src/__tests__/instructionBuilder.test.ts b/src/__tests__/instructionBuilder.test.ts index c19b809..51a8756 100644 --- a/src/__tests__/instructionBuilder.test.ts +++ b/src/__tests__/instructionBuilder.test.ts @@ -1047,6 +1047,7 @@ describe('instruction-builder', () => { function createJudgmentContext(overrides: Partial = {}): StatusJudgmentContext { return { language: 'en', + reportContent: '# Test Report\n\nReport content for testing.', ...overrides, }; } @@ -1062,8 +1063,8 @@ describe('instruction-builder', () => { const result = buildStatusJudgmentInstruction(step, ctx); - expect(result).toContain('Review your work results and determine the status'); - expect(result).toContain('Do NOT perform any additional work'); + expect(result).toContain('Review is already complete'); + expect(result).toContain('Output exactly one tag corresponding to the judgment result'); }); it('should include header instruction (ja)', () => { @@ -1077,8 +1078,8 @@ describe('instruction-builder', () => { const result = buildStatusJudgmentInstruction(step, ctx); - expect(result).toContain('作業結果を振り返り、ステータスを判定してください'); - expect(result).toContain('追加の作業は行わないでください'); + expect(result).toContain('既にレビューは完了しています'); + expect(result).toContain('レポートで示された判定結果に対応するタグを1つだけ出力してください'); }); it('should include criteria table with tags', () => { @@ -1132,11 +1133,11 @@ describe('instruction-builder', () => { const step = createMinimalStep('Do work'); step.name = 'test'; step.rules = [{ condition: 'Done', next: 'COMPLETE' }]; - const ctx: StatusJudgmentContext = {}; + const ctx: StatusJudgmentContext = { reportContent: 'Test report content' }; const result = buildStatusJudgmentInstruction(step, ctx); - expect(result).toContain('Review your work results'); + expect(result).toContain('Review is already complete'); expect(result).toContain('## Decision Criteria'); }); diff --git a/src/__tests__/it-instruction-builder.test.ts b/src/__tests__/it-instruction-builder.test.ts index 662d7f6..79d3bb6 100644 --- a/src/__tests__/it-instruction-builder.test.ts +++ b/src/__tests__/it-instruction-builder.test.ts @@ -322,7 +322,7 @@ describe('Instruction Builder IT: buildStatusJudgmentInstruction', () => { ], }); - const result = buildStatusJudgmentInstruction(step, { language: 'en' }); + const result = buildStatusJudgmentInstruction(step, { language: 'en', reportContent: 'Test report content' }); expect(result).toContain('[PLAN:'); expect(result).toContain('Clear'); diff --git a/src/__tests__/judgment-detector.test.ts b/src/__tests__/judgment-detector.test.ts new file mode 100644 index 0000000..1bd198c --- /dev/null +++ b/src/__tests__/judgment-detector.test.ts @@ -0,0 +1,70 @@ +/** + * Test for JudgmentDetector + */ + +import { describe, it, expect } from 'vitest'; +import { JudgmentDetector } from '../core/piece/judgment/JudgmentDetector.js'; + +describe('JudgmentDetector', () => { + describe('detect', () => { + it('should detect tag in simple response', () => { + const result = JudgmentDetector.detect('[ARCH-REVIEW:1]'); + expect(result.success).toBe(true); + expect(result.tag).toBe('[ARCH-REVIEW:1]'); + }); + + it('should detect tag with surrounding text', () => { + const result = JudgmentDetector.detect('Based on the review, I choose [MOVEMENT:2] because...'); + expect(result.success).toBe(true); + expect(result.tag).toBe('[MOVEMENT:2]'); + }); + + it('should detect tag with hyphenated movement name', () => { + const result = JudgmentDetector.detect('[AI-ANTIPATTERN-REVIEW:1]'); + expect(result.success).toBe(true); + expect(result.tag).toBe('[AI-ANTIPATTERN-REVIEW:1]'); + }); + + it('should detect tag with underscored movement name', () => { + const result = JudgmentDetector.detect('[AI_REVIEW:1]'); + expect(result.success).toBe(true); + expect(result.tag).toBe('[AI_REVIEW:1]'); + }); + + it('should detect "判断できない" (Japanese)', () => { + const result = JudgmentDetector.detect('判断できない:情報が不足しています'); + expect(result.success).toBe(false); + expect(result.reason).toBe('Conductor explicitly stated it cannot judge'); + }); + + it('should detect "Cannot determine" (English)', () => { + const result = JudgmentDetector.detect('Cannot determine: Insufficient information'); + expect(result.success).toBe(false); + expect(result.reason).toBe('Conductor explicitly stated it cannot judge'); + }); + + it('should detect "unable to judge"', () => { + const result = JudgmentDetector.detect('I am unable to judge based on the provided information.'); + expect(result.success).toBe(false); + expect(result.reason).toBe('Conductor explicitly stated it cannot judge'); + }); + + it('should fail when no tag and no explicit "cannot judge"', () => { + const result = JudgmentDetector.detect('This is a response without a tag or explicit statement.'); + expect(result.success).toBe(false); + expect(result.reason).toBe('No tag found and no explicit "cannot judge" statement'); + }); + + it('should fail on empty response', () => { + const result = JudgmentDetector.detect(''); + expect(result.success).toBe(false); + expect(result.reason).toBe('No tag found and no explicit "cannot judge" statement'); + }); + + it('should detect first tag when multiple tags exist', () => { + const result = JudgmentDetector.detect('[MOVEMENT:1] or [MOVEMENT:2]'); + expect(result.success).toBe(true); + expect(result.tag).toBe('[MOVEMENT:1]'); + }); + }); +}); diff --git a/src/__tests__/judgment-fallback.test.ts b/src/__tests__/judgment-fallback.test.ts new file mode 100644 index 0000000..d96133c --- /dev/null +++ b/src/__tests__/judgment-fallback.test.ts @@ -0,0 +1,137 @@ +/** + * Test for Fallback Strategies + */ + +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import type { PieceMovement } from '../core/models/types.js'; +import type { JudgmentContext } from '../core/piece/judgment/FallbackStrategy.js'; +import { + AutoSelectStrategy, + ReportBasedStrategy, + ResponseBasedStrategy, + AgentConsultStrategy, + JudgmentStrategyFactory, +} from '../core/piece/judgment/FallbackStrategy.js'; + +// Mock runAgent +vi.mock('../agents/runner.js', () => ({ + runAgent: vi.fn(), +})); + +describe('JudgmentStrategies', () => { + const mockStep: PieceMovement = { + name: 'test-movement', + agent: 'test-agent', + rules: [ + { description: 'Rule 1', condition: 'approved' }, + { description: 'Rule 2', condition: 'rejected' }, + ], + }; + + const mockContext: JudgmentContext = { + step: mockStep, + cwd: '/test/cwd', + language: 'en', + reportDir: '/test/reports', + lastResponse: 'Last response content', + sessionId: 'session-123', + }; + + beforeEach(() => { + vi.clearAllMocks(); + }); + + describe('AutoSelectStrategy', () => { + it('should apply when step has only one rule', () => { + const singleRuleStep: PieceMovement = { + name: 'single-rule', + rules: [{ description: 'Only rule', condition: 'always' }], + }; + const strategy = new AutoSelectStrategy(); + expect(strategy.canApply({ ...mockContext, step: singleRuleStep })).toBe(true); + }); + + it('should not apply when step has multiple rules', () => { + const strategy = new AutoSelectStrategy(); + expect(strategy.canApply(mockContext)).toBe(false); + }); + + it('should return auto-selected tag', async () => { + const singleRuleStep: PieceMovement = { + name: 'single-rule', + rules: [{ description: 'Only rule', condition: 'always' }], + }; + const strategy = new AutoSelectStrategy(); + const result = await strategy.execute({ ...mockContext, step: singleRuleStep }); + expect(result.success).toBe(true); + expect(result.tag).toBe('[SINGLE-RULE:1]'); + }); + }); + + describe('ReportBasedStrategy', () => { + it('should apply when reportDir and report files are configured', () => { + const strategy = new ReportBasedStrategy(); + const stepWithReport: PieceMovement = { + ...mockStep, + report: 'review-report.md', + }; + expect(strategy.canApply({ ...mockContext, step: stepWithReport })).toBe(true); + }); + + it('should not apply when reportDir is missing', () => { + const strategy = new ReportBasedStrategy(); + expect(strategy.canApply({ ...mockContext, reportDir: undefined })).toBe(false); + }); + + it('should not apply when step has no report files configured', () => { + const strategy = new ReportBasedStrategy(); + // mockStep has no report field → getReportFiles returns [] + expect(strategy.canApply(mockContext)).toBe(false); + }); + }); + + describe('ResponseBasedStrategy', () => { + it('should apply when lastResponse is provided', () => { + const strategy = new ResponseBasedStrategy(); + expect(strategy.canApply(mockContext)).toBe(true); + }); + + it('should not apply when lastResponse is missing', () => { + const strategy = new ResponseBasedStrategy(); + expect(strategy.canApply({ ...mockContext, lastResponse: undefined })).toBe(false); + }); + + it('should not apply when lastResponse is empty', () => { + const strategy = new ResponseBasedStrategy(); + expect(strategy.canApply({ ...mockContext, lastResponse: '' })).toBe(false); + }); + }); + + describe('AgentConsultStrategy', () => { + it('should apply when sessionId is provided', () => { + const strategy = new AgentConsultStrategy(); + expect(strategy.canApply(mockContext)).toBe(true); + }); + + it('should not apply when sessionId is missing', () => { + const strategy = new AgentConsultStrategy(); + expect(strategy.canApply({ ...mockContext, sessionId: undefined })).toBe(false); + }); + + it('should not apply when sessionId is empty', () => { + const strategy = new AgentConsultStrategy(); + expect(strategy.canApply({ ...mockContext, sessionId: '' })).toBe(false); + }); + }); + + describe('JudgmentStrategyFactory', () => { + it('should create strategies in correct order', () => { + const strategies = JudgmentStrategyFactory.createStrategies(); + expect(strategies).toHaveLength(4); + expect(strategies[0]).toBeInstanceOf(AutoSelectStrategy); + expect(strategies[1]).toBeInstanceOf(ReportBasedStrategy); + expect(strategies[2]).toBeInstanceOf(ResponseBasedStrategy); + expect(strategies[3]).toBeInstanceOf(AgentConsultStrategy); + }); + }); +}); diff --git a/src/core/piece/engine/MovementExecutor.ts b/src/core/piece/engine/MovementExecutor.ts index f42c54b..83db492 100644 --- a/src/core/piece/engine/MovementExecutor.ts +++ b/src/core/piece/engine/MovementExecutor.ts @@ -112,7 +112,7 @@ export class MovementExecutor { updateAgentSession(sessionKey, response.sessionId); this.deps.onPhaseComplete?.(step, 1, 'execute', response.content, response.status, response.error); - const phaseCtx = this.deps.optionsBuilder.buildPhaseRunnerContext(state, updateAgentSession, this.deps.onPhaseStart, this.deps.onPhaseComplete); + const phaseCtx = this.deps.optionsBuilder.buildPhaseRunnerContext(state, response.content, updateAgentSession, this.deps.onPhaseStart, this.deps.onPhaseComplete); // Phase 2: report output (resume same session, Write only) if (step.report) { diff --git a/src/core/piece/engine/OptionsBuilder.ts b/src/core/piece/engine/OptionsBuilder.ts index 2ae892a..829923a 100644 --- a/src/core/piece/engine/OptionsBuilder.ts +++ b/src/core/piece/engine/OptionsBuilder.ts @@ -89,6 +89,7 @@ export class OptionsBuilder { /** Build PhaseRunnerContext for Phase 2/3 execution */ buildPhaseRunnerContext( state: PieceState, + lastResponse: string | undefined, updateAgentSession: (agent: string, sessionId: string | undefined) => void, onPhaseStart?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, instruction: string) => void, onPhaseComplete?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, content: string, status: string, error?: string) => void, @@ -98,6 +99,7 @@ export class OptionsBuilder { reportDir: join(this.getProjectCwd(), this.getReportDir()), language: this.getLanguage(), interactive: this.engineOptions.interactive, + lastResponse, getSessionId: (agent: string) => state.agentSessions.get(agent), buildResumeOptions: this.buildResumeOptions.bind(this), updateAgentSession, diff --git a/src/core/piece/engine/ParallelRunner.ts b/src/core/piece/engine/ParallelRunner.ts index 70c34e8..ff2ff20 100644 --- a/src/core/piece/engine/ParallelRunner.ts +++ b/src/core/piece/engine/ParallelRunner.ts @@ -74,7 +74,6 @@ export class ParallelRunner { }) : undefined; - const phaseCtx = this.deps.optionsBuilder.buildPhaseRunnerContext(state, updateAgentSession, this.deps.onPhaseStart, this.deps.onPhaseComplete); const ruleCtx = { state, cwd: this.deps.getCwd(), @@ -103,6 +102,9 @@ export class ParallelRunner { updateAgentSession(subSessionKey, subResponse.sessionId); this.deps.onPhaseComplete?.(subMovement, 1, 'execute', subResponse.content, subResponse.status, subResponse.error); + // Build phase context for this sub-movement with its lastResponse + const phaseCtx = this.deps.optionsBuilder.buildPhaseRunnerContext(state, subResponse.content, updateAgentSession, this.deps.onPhaseStart, this.deps.onPhaseComplete); + // Phase 2: report output for sub-movement if (subMovement.report) { await runReportPhase(subMovement, subIteration, phaseCtx); diff --git a/src/core/piece/evaluation/rule-utils.ts b/src/core/piece/evaluation/rule-utils.ts index 2eae770..d9f76d8 100644 --- a/src/core/piece/evaluation/rule-utils.ts +++ b/src/core/piece/evaluation/rule-utils.ts @@ -3,6 +3,7 @@ */ import type { PieceMovement } from '../../models/types.js'; +import { isReportObjectConfig } from '../instruction/InstructionBuilder.js'; /** * Check whether a movement has tag-based rules (i.e., rules that require @@ -16,3 +17,33 @@ export function hasTagBasedRules(step: PieceMovement): boolean { const allNonTagConditions = step.rules.every((r) => r.isAiCondition || r.isAggregateCondition); return !allNonTagConditions; } + +/** + * Check if a movement has only one branch (automatic selection possible). + * Returns true when rules.length === 1, meaning no actual choice is needed. + */ +export function hasOnlyOneBranch(step: PieceMovement): boolean { + return step.rules !== undefined && step.rules.length === 1; +} + +/** + * Get the auto-selected tag when there's only one branch. + * Returns the tag for the first rule (e.g., "[MOVEMENT:1]"). + */ +export function getAutoSelectedTag(step: PieceMovement): string { + if (!hasOnlyOneBranch(step)) { + throw new Error('Cannot auto-select tag when multiple branches exist'); + } + return `[${step.name.toUpperCase()}:1]`; +} + +/** + * Get report file names from a movement's report configuration. + * Handles all three report config formats: string, ReportObjectConfig, and ReportConfig[]. + */ +export function getReportFiles(report: PieceMovement['report']): string[] { + if (!report) return []; + if (typeof report === 'string') return [report]; + if (isReportObjectConfig(report)) return [report.name]; + return report.map((rc) => rc.path); +} diff --git a/src/core/piece/instruction/StatusJudgmentBuilder.ts b/src/core/piece/instruction/StatusJudgmentBuilder.ts index 7260d85..85a9354 100644 --- a/src/core/piece/instruction/StatusJudgmentBuilder.ts +++ b/src/core/piece/instruction/StatusJudgmentBuilder.ts @@ -1,8 +1,9 @@ /** * Phase 3 instruction builder (status judgment) * - * Resumes the agent session and asks it to evaluate its work - * and output the appropriate status tag. No tools are allowed. + * Builds instructions for the conductor agent to evaluate work results + * and output the appropriate status tag. Supports report-based and + * response-based input sources. * * Renders a single complete template combining the judgment header * and status rules (criteria table + output format). @@ -20,6 +21,12 @@ export interface StatusJudgmentContext { language?: Language; /** Whether interactive-only rules are enabled */ interactive?: boolean; + /** Pre-read report content (from gatherInput) */ + reportContent?: string; + /** Last response from Phase 1 (from gatherInput) */ + lastResponse?: string; + /** Input source type for fallback strategies */ + inputSource?: 'report' | 'response'; } /** @@ -47,11 +54,42 @@ export class StatusJudgmentBuilder { { interactive: this.context.interactive }, ); + // 情報源に応じた内容を構築 + const inputSource = this.context.inputSource || 'report'; + let contentToJudge = ''; + + if (inputSource === 'report') { + contentToJudge = this.buildFromReport(); + } else if (inputSource === 'response') { + contentToJudge = this.buildFromResponse(); + } + return loadTemplate('perform_phase3_message', language, { + reportContent: contentToJudge, criteriaTable: components.criteriaTable, outputList: components.outputList, hasAppendix: components.hasAppendix, appendixContent: components.appendixContent, }); } + + /** + * Build judgment content from pre-read report content. + */ + private buildFromReport(): string { + if (!this.context.reportContent) { + throw new Error('reportContent is required for report-based judgment'); + } + return this.context.reportContent; + } + + /** + * Build judgment content from last response. + */ + private buildFromResponse(): string { + if (!this.context.lastResponse) { + throw new Error('lastResponse is required for response-based judgment'); + } + return `\n## Agent Response\n\n${this.context.lastResponse}`; + } } diff --git a/src/core/piece/judgment/FallbackStrategy.ts b/src/core/piece/judgment/FallbackStrategy.ts new file mode 100644 index 0000000..b2855c3 --- /dev/null +++ b/src/core/piece/judgment/FallbackStrategy.ts @@ -0,0 +1,255 @@ +/** + * Fallback strategies for Phase 3 judgment. + * + * Implements Chain of Responsibility pattern to try multiple judgment methods + * when conductor cannot determine the status from report alone. + */ + +import { readFileSync } from 'node:fs'; +import { resolve } from 'node:path'; +import type { PieceMovement, Language } from '../../models/types.js'; +import { runAgent } from '../../../agents/runner.js'; +import { StatusJudgmentBuilder } from '../instruction/StatusJudgmentBuilder.js'; +import { JudgmentDetector, type JudgmentResult } from './JudgmentDetector.js'; +import { hasOnlyOneBranch, getAutoSelectedTag, getReportFiles } from '../evaluation/rule-utils.js'; +import { createLogger } from '../../../shared/utils/index.js'; + +const log = createLogger('fallback-strategy'); + +export interface JudgmentContext { + step: PieceMovement; + cwd: string; + language?: Language; + reportDir?: string; + lastResponse?: string; // Phase 1の最終応答 + sessionId?: string; +} + +export interface JudgmentStrategy { + readonly name: string; + canApply(context: JudgmentContext): boolean; + execute(context: JudgmentContext): Promise; +} + +/** + * Base class for judgment strategies using Template Method Pattern. + */ +abstract class JudgmentStrategyBase implements JudgmentStrategy { + abstract readonly name: string; + + abstract canApply(context: JudgmentContext): boolean; + + async execute(context: JudgmentContext): Promise { + try { + // 1. 情報収集(サブクラスで実装) + const input = await this.gatherInput(context); + + // 2. 指示生成(サブクラスで実装) + const instruction = this.buildInstruction(input, context); + + // 3. conductor実行(共通) + const response = await this.runConductor(instruction, context); + + // 4. 結果検出(共通) + return JudgmentDetector.detect(response); + } catch (error) { + const errorMsg = error instanceof Error ? error.message : String(error); + log.debug(`Strategy ${this.name} threw error`, { error: errorMsg }); + return { + success: false, + reason: `Strategy failed with error: ${errorMsg}`, + }; + } + } + + protected abstract gatherInput(context: JudgmentContext): Promise; + + protected abstract buildInstruction(input: string, context: JudgmentContext): string; + + protected async runConductor(instruction: string, context: JudgmentContext): Promise { + const response = await runAgent('conductor', instruction, { + cwd: context.cwd, + allowedTools: [], + maxTurns: 3, + language: context.language, + }); + + if (response.status !== 'done') { + throw new Error(`Conductor failed: ${response.error || response.content || 'Unknown error'}`); + } + + return response.content; + } +} + +/** + * Strategy 1: Auto-select when there's only one branch. + * This strategy doesn't use conductor - just returns the single tag. + */ +export class AutoSelectStrategy implements JudgmentStrategy { + readonly name = 'AutoSelect'; + + canApply(context: JudgmentContext): boolean { + return hasOnlyOneBranch(context.step); + } + + async execute(context: JudgmentContext): Promise { + const tag = getAutoSelectedTag(context.step); + log.debug('Auto-selected tag (single branch)', { tag }); + return { + success: true, + tag, + }; + } +} + +/** + * Strategy 2: Report-based judgment. + * Read report files and ask conductor to judge. + */ +export class ReportBasedStrategy extends JudgmentStrategyBase { + readonly name = 'ReportBased'; + + canApply(context: JudgmentContext): boolean { + return context.reportDir !== undefined && getReportFiles(context.step.report).length > 0; + } + + protected async gatherInput(context: JudgmentContext): Promise { + if (!context.reportDir) { + throw new Error('Report directory not provided'); + } + + const reportFiles = getReportFiles(context.step.report); + if (reportFiles.length === 0) { + throw new Error('No report files configured'); + } + + const reportContents: string[] = []; + for (const fileName of reportFiles) { + const filePath = resolve(context.reportDir, fileName); + try { + const content = readFileSync(filePath, 'utf-8'); + reportContents.push(`# ${fileName}\n\n${content}`); + } catch (error) { + const errorMsg = error instanceof Error ? error.message : String(error); + throw new Error(`Failed to read report file ${fileName}: ${errorMsg}`); + } + } + + return reportContents.join('\n\n---\n\n'); + } + + protected buildInstruction(input: string, context: JudgmentContext): string { + return new StatusJudgmentBuilder(context.step, { + language: context.language, + reportContent: input, + inputSource: 'report', + }).build(); + } +} + +/** + * Strategy 3: Response-based judgment. + * Use the last response from Phase 1 to judge. + */ +export class ResponseBasedStrategy extends JudgmentStrategyBase { + readonly name = 'ResponseBased'; + + canApply(context: JudgmentContext): boolean { + return context.lastResponse !== undefined && context.lastResponse.length > 0; + } + + protected async gatherInput(context: JudgmentContext): Promise { + if (!context.lastResponse) { + throw new Error('Last response not provided'); + } + return context.lastResponse; + } + + protected buildInstruction(input: string, context: JudgmentContext): string { + return new StatusJudgmentBuilder(context.step, { + language: context.language, + lastResponse: input, + inputSource: 'response', + }).build(); + } +} + +/** + * Strategy 4: Agent consult. + * Resume the Phase 1 agent session and ask which tag is appropriate. + */ +export class AgentConsultStrategy implements JudgmentStrategy { + readonly name = 'AgentConsult'; + + canApply(context: JudgmentContext): boolean { + return context.sessionId !== undefined && context.sessionId.length > 0; + } + + async execute(context: JudgmentContext): Promise { + if (!context.sessionId) { + return { + success: false, + reason: 'Session ID not provided', + }; + } + + try { + const question = this.buildQuestion(context); + + const response = await runAgent(context.step.agent ?? context.step.name, question, { + cwd: context.cwd, + sessionId: context.sessionId, + maxTurns: 3, + language: context.language, + }); + + if (response.status !== 'done') { + return { + success: false, + reason: `Agent consultation failed: ${response.error || 'Unknown error'}`, + }; + } + + return JudgmentDetector.detect(response.content); + } catch (error) { + const errorMsg = error instanceof Error ? error.message : String(error); + log.debug('Agent consult strategy failed', { error: errorMsg }); + return { + success: false, + reason: `Agent consultation error: ${errorMsg}`, + }; + } + } + + private buildQuestion(context: JudgmentContext): string { + const rules = context.step.rules || []; + const ruleDescriptions = rules.map((rule, idx) => { + const tag = `[${context.step.name.toUpperCase()}:${idx + 1}]`; + const desc = rule.condition || `Rule ${idx + 1}`; + return `- ${tag}: ${desc}`; + }).join('\n'); + + const lang = context.language || 'en'; + + if (lang === 'ja') { + return `あなたの作業結果に基づいて、以下の判定タグのうちどれが適切か教えてください:\n\n${ruleDescriptions}\n\n該当するタグを1つだけ出力してください(例: [${context.step.name.toUpperCase()}:1])。`; + } else { + return `Based on your work, which of the following judgment tags is appropriate?\n\n${ruleDescriptions}\n\nPlease output only one tag (e.g., [${context.step.name.toUpperCase()}:1]).`; + } + } +} + +/** + * Factory for creating judgment strategies in order of priority. + */ +export class JudgmentStrategyFactory { + static createStrategies(): JudgmentStrategy[] { + return [ + new AutoSelectStrategy(), + new ReportBasedStrategy(), + new ResponseBasedStrategy(), + new AgentConsultStrategy(), + ]; + } +} diff --git a/src/core/piece/judgment/JudgmentDetector.ts b/src/core/piece/judgment/JudgmentDetector.ts new file mode 100644 index 0000000..a00a5da --- /dev/null +++ b/src/core/piece/judgment/JudgmentDetector.ts @@ -0,0 +1,45 @@ +/** + * Detect judgment result from conductor's response. + */ +export interface JudgmentResult { + success: boolean; + tag?: string; // e.g., "[ARCH-REVIEW:1]" + reason?: string; +} + +export class JudgmentDetector { + private static readonly TAG_PATTERN = /\[([A-Z_-]+):(\d+)\]/; + private static readonly CANNOT_JUDGE_PATTERNS = [ + /判断できない/i, + /cannot\s+determine/i, + /unable\s+to\s+judge/i, + /insufficient\s+information/i, + ]; + + static detect(response: string): JudgmentResult { + // 1. タグ検出 + const tagMatch = response.match(this.TAG_PATTERN); + if (tagMatch) { + return { + success: true, + tag: tagMatch[0], // e.g., "[ARCH-REVIEW:1]" + }; + } + + // 2. 「判断できない」検出 + for (const pattern of this.CANNOT_JUDGE_PATTERNS) { + if (pattern.test(response)) { + return { + success: false, + reason: 'Conductor explicitly stated it cannot judge', + }; + } + } + + // 3. タグも「判断できない」もない → 失敗 + return { + success: false, + reason: 'No tag found and no explicit "cannot judge" statement', + }; + } +} diff --git a/src/core/piece/judgment/index.ts b/src/core/piece/judgment/index.ts new file mode 100644 index 0000000..58f3cae --- /dev/null +++ b/src/core/piece/judgment/index.ts @@ -0,0 +1,18 @@ +/** + * Judgment module exports + */ + +export { + JudgmentDetector, + type JudgmentResult, +} from './JudgmentDetector.js'; + +export { + AutoSelectStrategy, + ReportBasedStrategy, + ResponseBasedStrategy, + AgentConsultStrategy, + JudgmentStrategyFactory, + type JudgmentContext, + type JudgmentStrategy, +} from './FallbackStrategy.js'; diff --git a/src/core/piece/phase-runner.ts b/src/core/piece/phase-runner.ts index 138a99b..a11e6b4 100644 --- a/src/core/piece/phase-runner.ts +++ b/src/core/piece/phase-runner.ts @@ -11,9 +11,8 @@ import type { PieceMovement, Language } from '../models/types.js'; import type { PhaseName } from './types.js'; import { runAgent, type RunAgentOptions } from '../../agents/runner.js'; import { ReportInstructionBuilder } from './instruction/ReportInstructionBuilder.js'; -import { StatusJudgmentBuilder } from './instruction/StatusJudgmentBuilder.js'; -import { hasTagBasedRules } from './evaluation/rule-utils.js'; -import { isReportObjectConfig } from './instruction/InstructionBuilder.js'; +import { hasTagBasedRules, getReportFiles } from './evaluation/rule-utils.js'; +import { JudgmentStrategyFactory, type JudgmentContext } from './judgment/index.js'; import { createLogger } from '../../shared/utils/index.js'; const log = createLogger('phase-runner'); @@ -27,6 +26,8 @@ export interface PhaseRunnerContext { language?: Language; /** Whether interactive-only rules are enabled */ interactive?: boolean; + /** Last response from Phase 1 */ + lastResponse?: string; /** Get agent session ID */ getSessionId: (agent: string) => string | undefined; /** Build resume options for a movement */ @@ -47,12 +48,6 @@ export function needsStatusJudgmentPhase(step: PieceMovement): boolean { return hasTagBasedRules(step); } -function getReportFiles(report: PieceMovement['report']): string[] { - if (!report) return []; - if (typeof report === 'string') return [report]; - if (isReportObjectConfig(report)) return [report.name]; - return report.map((rc) => rc.path); -} function writeReportFile(reportDir: string, fileName: string, content: string): void { const baseDir = resolve(reportDir); @@ -152,53 +147,54 @@ export async function runReportPhase( /** * Phase 3: Status judgment. - * Resumes the agent session with no tools to ask the agent to output a status tag. + * Uses the 'conductor' agent in a new session to output a status tag. + * Implements multi-stage fallback logic to ensure judgment succeeds. * Returns the Phase 3 response content (containing the status tag). */ export async function runStatusJudgmentPhase( step: PieceMovement, ctx: PhaseRunnerContext, ): Promise { + log.debug('Running status judgment phase', { movement: step.name }); + + // フォールバック戦略を順次試行(AutoSelectStrategy含む) + const strategies = JudgmentStrategyFactory.createStrategies(); const sessionKey = step.agent ?? step.name; - const sessionId = ctx.getSessionId(sessionKey); - if (!sessionId) { - throw new Error(`Status judgment phase requires a session to resume, but no sessionId found for agent "${sessionKey}" in movement "${step.name}"`); - } - - log.debug('Running status judgment phase', { movement: step.name, sessionId }); - - const judgmentInstruction = new StatusJudgmentBuilder(step, { + const judgmentContext: JudgmentContext = { + step, + cwd: ctx.cwd, language: ctx.language, - interactive: ctx.interactive, - }).build(); + reportDir: ctx.reportDir, + lastResponse: ctx.lastResponse, + sessionId: ctx.getSessionId(sessionKey), + }; - ctx.onPhaseStart?.(step, 3, 'judge', judgmentInstruction); + for (const strategy of strategies) { + if (!strategy.canApply(judgmentContext)) { + log.debug(`Strategy ${strategy.name} not applicable, skipping`); + continue; + } - const judgmentOptions = ctx.buildResumeOptions(step, sessionId, { - allowedTools: [], - maxTurns: 3, - }); + log.debug(`Trying strategy: ${strategy.name}`); + ctx.onPhaseStart?.(step, 3, 'judge', `Strategy: ${strategy.name}`); - let judgmentResponse; - try { - judgmentResponse = await runAgent(step.agent, judgmentInstruction, judgmentOptions); - } catch (error) { - const errorMsg = error instanceof Error ? error.message : String(error); - ctx.onPhaseComplete?.(step, 3, 'judge', '', 'error', errorMsg); - throw error; + try { + const result = await strategy.execute(judgmentContext); + if (result.success) { + log.debug(`Strategy ${strategy.name} succeeded`, { tag: result.tag }); + ctx.onPhaseComplete?.(step, 3, 'judge', result.tag!, 'done'); + return result.tag!; + } + + log.debug(`Strategy ${strategy.name} failed`, { reason: result.reason }); + } catch (error) { + const errorMsg = error instanceof Error ? error.message : String(error); + log.debug(`Strategy ${strategy.name} threw error`, { error: errorMsg }); + } } - // Check for errors in status judgment phase - if (judgmentResponse.status !== 'done') { - const errorMsg = judgmentResponse.error || judgmentResponse.content || 'Unknown error'; - ctx.onPhaseComplete?.(step, 3, 'judge', judgmentResponse.content, judgmentResponse.status, errorMsg); - throw new Error(`Status judgment phase failed: ${errorMsg}`); - } - - // Update session (phase 3 may update it) - ctx.updateAgentSession(sessionKey, judgmentResponse.sessionId); - - ctx.onPhaseComplete?.(step, 3, 'judge', judgmentResponse.content, judgmentResponse.status); - log.debug('Status judgment phase complete', { movement: step.name, status: judgmentResponse.status }); - return judgmentResponse.content; + // 全戦略失敗 + const errorMsg = 'All judgment strategies failed'; + ctx.onPhaseComplete?.(step, 3, 'judge', '', 'error', errorMsg); + throw new Error(errorMsg); } diff --git a/src/shared/prompts/en/perform_phase3_message.md b/src/shared/prompts/en/perform_phase3_message.md index f40a080..a3aa41b 100644 --- a/src/shared/prompts/en/perform_phase3_message.md +++ b/src/shared/prompts/en/perform_phase3_message.md @@ -1,10 +1,12 @@ -Review your work results and determine the status. Do NOT perform any additional work. +**Review is already complete. Output exactly one tag corresponding to the judgment result shown in the report below.** + +{{reportContent}} ## Decision Criteria @@ -12,6 +14,8 @@ Review your work results and determine the status. Do NOT perform any additional ## Output Format +**Output the tag corresponding to the judgment shown in the report in one line:** + {{outputList}} {{#if hasAppendix}} diff --git a/src/shared/prompts/ja/perform_phase3_message.md b/src/shared/prompts/ja/perform_phase3_message.md index 35feb80..becfa29 100644 --- a/src/shared/prompts/ja/perform_phase3_message.md +++ b/src/shared/prompts/ja/perform_phase3_message.md @@ -1,10 +1,12 @@ -作業結果を振り返り、ステータスを判定してください。追加の作業は行わないでください。 +**既にレビューは完了しています。以下のレポートで示された判定結果に対応するタグを1つだけ出力してください。** + +{{reportContent}} ## 判定基準 @@ -12,6 +14,8 @@ ## 出力フォーマット +**レポートで示した判定に対応するタグを1行で出力してください:** + {{outputList}} {{#if hasAppendix}}