takt: github-issue-257

This commit is contained in:
nrslib 2026-02-12 13:32:28 +09:00
parent be0f299727
commit bf4196d3b3
50 changed files with 891 additions and 801 deletions

View File

@ -0,0 +1,33 @@
{
"type": "object",
"properties": {
"parts": {
"type": "array",
"items": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "Unique part identifier"
},
"title": {
"type": "string",
"description": "Human-readable part title"
},
"instruction": {
"type": "string",
"description": "Instruction for the part agent"
},
"timeout_ms": {
"type": "integer",
"description": "Optional timeout in ms"
}
},
"required": ["id", "title", "instruction"],
"additionalProperties": false
}
}
},
"required": ["parts"],
"additionalProperties": false
}

View File

@ -0,0 +1,15 @@
{
"type": "object",
"properties": {
"matched_index": {
"type": "integer",
"description": "Matched condition number (1-based)"
},
"reason": {
"type": "string",
"description": "Why this condition was matched"
}
},
"required": ["matched_index"],
"additionalProperties": false
}

View File

@ -0,0 +1,15 @@
{
"type": "object",
"properties": {
"step": {
"type": "integer",
"description": "Matched rule number (1-based)"
},
"reason": {
"type": "string",
"description": "Brief justification for the decision"
}
},
"required": ["step"],
"additionalProperties": false
}

View File

@ -0,0 +1,185 @@
import { beforeEach, describe, expect, it, vi } from 'vitest';
import { runAgent } from '../agents/runner.js';
import { parseParts } from '../core/piece/engine/task-decomposer.js';
import { detectJudgeIndex } from '../agents/judge-utils.js';
import {
executeAgent,
generateReport,
executePart,
evaluateCondition,
judgeStatus,
decomposeTask,
} from '../core/piece/agent-usecases.js';
vi.mock('../agents/runner.js', () => ({
runAgent: vi.fn(),
}));
vi.mock('../core/piece/schema-loader.js', () => ({
loadJudgmentSchema: vi.fn(() => ({ type: 'judgment' })),
loadEvaluationSchema: vi.fn(() => ({ type: 'evaluation' })),
loadDecompositionSchema: vi.fn((maxParts: number) => ({ type: 'decomposition', maxParts })),
}));
vi.mock('../core/piece/engine/task-decomposer.js', () => ({
parseParts: vi.fn(),
}));
vi.mock('../agents/judge-utils.js', () => ({
buildJudgePrompt: vi.fn(() => 'judge prompt'),
detectJudgeIndex: vi.fn(() => -1),
}));
function doneResponse(content: string, structuredOutput?: Record<string, unknown>) {
return {
persona: 'tester',
status: 'done' as const,
content,
timestamp: new Date('2026-02-12T00:00:00Z'),
structuredOutput,
};
}
describe('agent-usecases', () => {
beforeEach(() => {
vi.clearAllMocks();
});
it('executeAgent/generateReport/executePart は runAgent に委譲する', async () => {
vi.mocked(runAgent).mockResolvedValue(doneResponse('ok'));
await executeAgent('coder', 'do work', { cwd: '/tmp' });
await generateReport('coder', 'write report', { cwd: '/tmp' });
await executePart('coder', 'part work', { cwd: '/tmp' });
expect(runAgent).toHaveBeenCalledTimes(3);
expect(runAgent).toHaveBeenNthCalledWith(1, 'coder', 'do work', { cwd: '/tmp' });
expect(runAgent).toHaveBeenNthCalledWith(2, 'coder', 'write report', { cwd: '/tmp' });
expect(runAgent).toHaveBeenNthCalledWith(3, 'coder', 'part work', { cwd: '/tmp' });
});
it('evaluateCondition は構造化出力の matched_index を優先する', async () => {
vi.mocked(runAgent).mockResolvedValue(doneResponse('ignored', { matched_index: 2 }));
const result = await evaluateCondition('agent output', [
{ index: 0, text: 'first' },
{ index: 1, text: 'second' },
], { cwd: '/repo' });
expect(result).toBe(1);
expect(runAgent).toHaveBeenCalledWith(undefined, 'judge prompt', expect.objectContaining({
cwd: '/repo',
outputSchema: { type: 'evaluation' },
}));
});
it('evaluateCondition は構造化出力が使えない場合にタグ検出へフォールバックする', async () => {
vi.mocked(runAgent).mockResolvedValue(doneResponse('[JUDGE:2]'));
vi.mocked(detectJudgeIndex).mockReturnValue(1);
const result = await evaluateCondition('agent output', [
{ index: 0, text: 'first' },
{ index: 1, text: 'second' },
], { cwd: '/repo' });
expect(result).toBe(1);
expect(detectJudgeIndex).toHaveBeenCalledWith('[JUDGE:2]');
});
it('judgeStatus は単一ルール時に auto_select を返す', async () => {
const result = await judgeStatus('instruction', [{ condition: 'always', next: 'done' }], {
cwd: '/repo',
movementName: 'review',
});
expect(result).toEqual({ ruleIndex: 0, method: 'auto_select' });
expect(runAgent).not.toHaveBeenCalled();
});
it('judgeStatus は構造化出力 step を採用する', async () => {
vi.mocked(runAgent).mockResolvedValue(doneResponse('x', { step: 2 }));
const result = await judgeStatus('instruction', [
{ condition: 'a', next: 'one' },
{ condition: 'b', next: 'two' },
], {
cwd: '/repo',
movementName: 'review',
});
expect(result).toEqual({ ruleIndex: 1, method: 'structured_output' });
});
it('judgeStatus はタグフォールバックを使う', async () => {
vi.mocked(runAgent).mockResolvedValue(doneResponse('[REVIEW:2]'));
const result = await judgeStatus('instruction', [
{ condition: 'a', next: 'one' },
{ condition: 'b', next: 'two' },
], {
cwd: '/repo',
movementName: 'review',
});
expect(result).toEqual({ ruleIndex: 1, method: 'phase3_tag' });
});
it('judgeStatus は最終手段として AI Judge を使う', async () => {
vi.mocked(runAgent)
.mockResolvedValueOnce(doneResponse('no match'))
.mockResolvedValueOnce(doneResponse('ignored', { matched_index: 2 }));
const result = await judgeStatus('instruction', [
{ condition: 'a', next: 'one' },
{ condition: 'b', next: 'two' },
], {
cwd: '/repo',
movementName: 'review',
});
expect(result).toEqual({ ruleIndex: 1, method: 'ai_judge' });
expect(runAgent).toHaveBeenCalledTimes(2);
});
it('decomposeTask は構造化出力 parts を返す', async () => {
vi.mocked(runAgent).mockResolvedValue(doneResponse('x', {
parts: [
{ id: 'p1', title: 'Part 1', instruction: 'Do 1', timeout_ms: 1000 },
],
}));
const result = await decomposeTask('instruction', 3, { cwd: '/repo', persona: 'team-leader' });
expect(result).toEqual([
{ id: 'p1', title: 'Part 1', instruction: 'Do 1', timeoutMs: 1000 },
]);
expect(parseParts).not.toHaveBeenCalled();
});
it('decomposeTask は構造化出力がない場合 parseParts にフォールバックする', async () => {
vi.mocked(runAgent).mockResolvedValue(doneResponse('```json [] ```'));
vi.mocked(parseParts).mockReturnValue([
{ id: 'p1', title: 'Part 1', instruction: 'fallback', timeoutMs: undefined },
]);
const result = await decomposeTask('instruction', 2, { cwd: '/repo' });
expect(parseParts).toHaveBeenCalledWith('```json [] ```', 2);
expect(result).toEqual([
{ id: 'p1', title: 'Part 1', instruction: 'fallback', timeoutMs: undefined },
]);
});
it('decomposeTask は done 以外をエラーにする', async () => {
vi.mocked(runAgent).mockResolvedValue({
persona: 'team-leader',
status: 'error',
content: 'failure',
error: 'bad output',
timestamp: new Date('2026-02-12T00:00:00Z'),
});
await expect(decomposeTask('instruction', 2, { cwd: '/repo' }))
.rejects.toThrow('Team leader failed: bad output');
});
});

View File

@ -36,7 +36,7 @@ vi.mock('../shared/utils/index.js', async (importOriginal) => ({
import { PieceEngine } from '../core/piece/index.js';
import { runAgent } from '../agents/runner.js';
import { detectMatchedRule } from '../core/piece/index.js';
import { detectMatchedRule } from '../core/piece/evaluation/index.js';
import {
makeResponse,
makeMovement,

View File

@ -35,7 +35,7 @@ vi.mock('../shared/utils/index.js', async (importOriginal) => ({
import { PieceEngine } from '../core/piece/index.js';
import { runAgent } from '../agents/runner.js';
import { detectMatchedRule } from '../core/piece/index.js';
import { detectMatchedRule } from '../core/piece/evaluation/index.js';
import {
makeResponse,
makeMovement,

View File

@ -16,9 +16,9 @@ import { makeRule } from './test-helpers.js';
// --- Mock imports (consumers must call vi.mock before importing this) ---
import { runAgent } from '../agents/runner.js';
import { detectMatchedRule } from '../core/piece/index.js';
import type { RuleMatch } from '../core/piece/index.js';
import { needsStatusJudgmentPhase, runReportPhase, runStatusJudgmentPhase } from '../core/piece/index.js';
import { detectMatchedRule } from '../core/piece/evaluation/index.js';
import type { RuleMatch } from '../core/piece/evaluation/index.js';
import { needsStatusJudgmentPhase, runReportPhase, runStatusJudgmentPhase } from '../core/piece/phase-runner.js';
import { generateReportDir } from '../shared/utils/index.js';
// --- Factory functions ---

View File

@ -35,7 +35,7 @@ vi.mock('../shared/utils/index.js', async (importOriginal) => ({
// --- Imports (after mocks) ---
import { PieceEngine } from '../core/piece/index.js';
import { runReportPhase } from '../core/piece/index.js';
import { runReportPhase } from '../core/piece/phase-runner.js';
import {
makeResponse,
makeMovement,

View File

@ -34,7 +34,8 @@ vi.mock('../infra/config/project/projectConfig.js', () => ({
// --- Imports (after mocks) ---
import { detectMatchedRule, evaluateAggregateConditions } from '../core/piece/index.js';
import { evaluateAggregateConditions } from '../core/piece/index.js';
import { detectMatchedRule } from '../core/piece/evaluation/index.js';
import { detectRuleIndex } from '../infra/claude/index.js';
import type { RuleMatch, RuleEvaluatorContext } from '../core/piece/index.js';

View File

@ -1,183 +0,0 @@
/**
* Test for Fallback Strategies
*/
import { describe, it, expect, vi, beforeEach } from 'vitest';
import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from 'node:fs';
import { join } from 'node:path';
import { tmpdir } from 'node:os';
import type { PieceMovement } from '../core/models/types.js';
import type { JudgmentContext } from '../core/piece/judgment/FallbackStrategy.js';
import { runAgent } from '../agents/runner.js';
import {
AutoSelectStrategy,
ReportBasedStrategy,
ResponseBasedStrategy,
AgentConsultStrategy,
JudgmentStrategyFactory,
} from '../core/piece/judgment/FallbackStrategy.js';
// Mock runAgent
vi.mock('../agents/runner.js', () => ({
runAgent: vi.fn(),
}));
describe('JudgmentStrategies', () => {
const mockStep: PieceMovement = {
name: 'test-movement',
persona: 'test-agent',
rules: [
{ description: 'Rule 1', condition: 'approved' },
{ description: 'Rule 2', condition: 'rejected' },
],
};
const mockContext: JudgmentContext = {
step: mockStep,
cwd: '/test/cwd',
language: 'en',
reportDir: '/test/reports',
lastResponse: 'Last response content',
sessionId: 'session-123',
};
beforeEach(() => {
vi.clearAllMocks();
});
describe('AutoSelectStrategy', () => {
it('should apply when step has only one rule', () => {
const singleRuleStep: PieceMovement = {
name: 'single-rule',
rules: [{ description: 'Only rule', condition: 'always' }],
};
const strategy = new AutoSelectStrategy();
expect(strategy.canApply({ ...mockContext, step: singleRuleStep })).toBe(true);
});
it('should not apply when step has multiple rules', () => {
const strategy = new AutoSelectStrategy();
expect(strategy.canApply(mockContext)).toBe(false);
});
it('should return auto-selected tag', async () => {
const singleRuleStep: PieceMovement = {
name: 'single-rule',
rules: [{ description: 'Only rule', condition: 'always' }],
};
const strategy = new AutoSelectStrategy();
const result = await strategy.execute({ ...mockContext, step: singleRuleStep });
expect(result.success).toBe(true);
expect(result.tag).toBe('[SINGLE-RULE:1]');
});
});
describe('ReportBasedStrategy', () => {
it('should apply when reportDir and output contracts are configured', () => {
const strategy = new ReportBasedStrategy();
const stepWithOutputContracts: PieceMovement = {
...mockStep,
outputContracts: [{ label: 'review', path: 'review-report.md' }],
};
expect(strategy.canApply({ ...mockContext, step: stepWithOutputContracts })).toBe(true);
});
it('should not apply when reportDir is missing', () => {
const strategy = new ReportBasedStrategy();
expect(strategy.canApply({ ...mockContext, reportDir: undefined })).toBe(false);
});
it('should not apply when step has no output contracts configured', () => {
const strategy = new ReportBasedStrategy();
// mockStep has no outputContracts field → getReportFiles returns []
expect(strategy.canApply(mockContext)).toBe(false);
});
it('should use only latest report file from reports directory', async () => {
const tmpRoot = mkdtempSync(join(tmpdir(), 'takt-judgment-report-'));
try {
const reportDir = join(tmpRoot, '.takt', 'runs', 'sample-run', 'reports');
const historyDir = join(tmpRoot, '.takt', 'runs', 'sample-run', 'logs', 'reports-history');
mkdirSync(reportDir, { recursive: true });
mkdirSync(historyDir, { recursive: true });
const latestFile = '05-architect-review.md';
writeFileSync(join(reportDir, latestFile), 'LATEST-ONLY-CONTENT');
writeFileSync(join(historyDir, '05-architect-review.20260210T061143Z.md'), 'OLD-HISTORY-CONTENT');
const stepWithOutputContracts: PieceMovement = {
...mockStep,
outputContracts: [{ name: latestFile }],
};
const runAgentMock = vi.mocked(runAgent);
runAgentMock.mockResolvedValue({
persona: 'conductor',
status: 'done',
content: '[TEST-MOVEMENT:1]',
timestamp: new Date('2026-02-10T07:11:43Z'),
});
const strategy = new ReportBasedStrategy();
const result = await strategy.execute({
...mockContext,
step: stepWithOutputContracts,
reportDir,
});
expect(result.success).toBe(true);
expect(runAgentMock).toHaveBeenCalledTimes(1);
const instruction = runAgentMock.mock.calls[0]?.[1];
expect(instruction).toContain('LATEST-ONLY-CONTENT');
expect(instruction).not.toContain('OLD-HISTORY-CONTENT');
} finally {
rmSync(tmpRoot, { recursive: true, force: true });
}
});
});
describe('ResponseBasedStrategy', () => {
it('should apply when lastResponse is provided', () => {
const strategy = new ResponseBasedStrategy();
expect(strategy.canApply(mockContext)).toBe(true);
});
it('should not apply when lastResponse is missing', () => {
const strategy = new ResponseBasedStrategy();
expect(strategy.canApply({ ...mockContext, lastResponse: undefined })).toBe(false);
});
it('should not apply when lastResponse is empty', () => {
const strategy = new ResponseBasedStrategy();
expect(strategy.canApply({ ...mockContext, lastResponse: '' })).toBe(false);
});
});
describe('AgentConsultStrategy', () => {
it('should apply when sessionId is provided', () => {
const strategy = new AgentConsultStrategy();
expect(strategy.canApply(mockContext)).toBe(true);
});
it('should not apply when sessionId is missing', () => {
const strategy = new AgentConsultStrategy();
expect(strategy.canApply({ ...mockContext, sessionId: undefined })).toBe(false);
});
it('should not apply when sessionId is empty', () => {
const strategy = new AgentConsultStrategy();
expect(strategy.canApply({ ...mockContext, sessionId: '' })).toBe(false);
});
});
describe('JudgmentStrategyFactory', () => {
it('should create strategies in correct order', () => {
const strategies = JudgmentStrategyFactory.createStrategies();
expect(strategies).toHaveLength(4);
expect(strategies[0]).toBeInstanceOf(AutoSelectStrategy);
expect(strategies[1]).toBeInstanceOf(ReportBasedStrategy);
expect(strategies[2]).toBeInstanceOf(ResponseBasedStrategy);
expect(strategies[3]).toBeInstanceOf(AgentConsultStrategy);
});
});
});

View File

@ -1,194 +0,0 @@
/**
* Unit tests for FallbackStrategy judgment strategies
*
* Tests AutoSelectStrategy and canApply logic for all strategies.
* Strategies requiring external agent calls (ReportBased, ResponseBased,
* AgentConsult) are tested for canApply and input validation only.
*/
import { describe, it, expect } from 'vitest';
import {
AutoSelectStrategy,
ReportBasedStrategy,
ResponseBasedStrategy,
AgentConsultStrategy,
JudgmentStrategyFactory,
type JudgmentContext,
} from '../core/piece/judgment/FallbackStrategy.js';
import { makeMovement } from './test-helpers.js';
function makeContext(overrides: Partial<JudgmentContext> = {}): JudgmentContext {
return {
step: makeMovement(),
cwd: '/tmp/test',
...overrides,
};
}
describe('AutoSelectStrategy', () => {
const strategy = new AutoSelectStrategy();
it('should have name "AutoSelect"', () => {
expect(strategy.name).toBe('AutoSelect');
});
describe('canApply', () => {
it('should return true when movement has exactly one rule', () => {
const ctx = makeContext({
step: makeMovement({
rules: [{ condition: 'done', next: 'COMPLETE' }],
}),
});
expect(strategy.canApply(ctx)).toBe(true);
});
it('should return false when movement has multiple rules', () => {
const ctx = makeContext({
step: makeMovement({
rules: [
{ condition: 'approved', next: 'implement' },
{ condition: 'rejected', next: 'review' },
],
}),
});
expect(strategy.canApply(ctx)).toBe(false);
});
it('should return false when movement has no rules', () => {
const ctx = makeContext({
step: makeMovement({ rules: undefined }),
});
expect(strategy.canApply(ctx)).toBe(false);
});
});
describe('execute', () => {
it('should return auto-selected tag for single-branch movement', async () => {
const ctx = makeContext({
step: makeMovement({
name: 'review',
rules: [{ condition: 'done', next: 'COMPLETE' }],
}),
});
const result = await strategy.execute(ctx);
expect(result.success).toBe(true);
expect(result.tag).toBe('[REVIEW:1]');
});
});
});
describe('ReportBasedStrategy', () => {
const strategy = new ReportBasedStrategy();
it('should have name "ReportBased"', () => {
expect(strategy.name).toBe('ReportBased');
});
describe('canApply', () => {
it('should return true when reportDir and outputContracts are present', () => {
const ctx = makeContext({
reportDir: '/tmp/reports',
step: makeMovement({
outputContracts: [{ name: 'report.md' }],
}),
});
expect(strategy.canApply(ctx)).toBe(true);
});
it('should return false when reportDir is missing', () => {
const ctx = makeContext({
step: makeMovement({
outputContracts: [{ name: 'report.md' }],
}),
});
expect(strategy.canApply(ctx)).toBe(false);
});
it('should return false when outputContracts is empty', () => {
const ctx = makeContext({
reportDir: '/tmp/reports',
step: makeMovement({ outputContracts: [] }),
});
expect(strategy.canApply(ctx)).toBe(false);
});
it('should return false when outputContracts is undefined', () => {
const ctx = makeContext({
reportDir: '/tmp/reports',
step: makeMovement(),
});
expect(strategy.canApply(ctx)).toBe(false);
});
});
});
describe('ResponseBasedStrategy', () => {
const strategy = new ResponseBasedStrategy();
it('should have name "ResponseBased"', () => {
expect(strategy.name).toBe('ResponseBased');
});
describe('canApply', () => {
it('should return true when lastResponse is non-empty', () => {
const ctx = makeContext({ lastResponse: 'some response' });
expect(strategy.canApply(ctx)).toBe(true);
});
it('should return false when lastResponse is undefined', () => {
const ctx = makeContext({ lastResponse: undefined });
expect(strategy.canApply(ctx)).toBe(false);
});
it('should return false when lastResponse is empty string', () => {
const ctx = makeContext({ lastResponse: '' });
expect(strategy.canApply(ctx)).toBe(false);
});
});
});
describe('AgentConsultStrategy', () => {
const strategy = new AgentConsultStrategy();
it('should have name "AgentConsult"', () => {
expect(strategy.name).toBe('AgentConsult');
});
describe('canApply', () => {
it('should return true when sessionId is non-empty', () => {
const ctx = makeContext({ sessionId: 'session-123' });
expect(strategy.canApply(ctx)).toBe(true);
});
it('should return false when sessionId is undefined', () => {
const ctx = makeContext({ sessionId: undefined });
expect(strategy.canApply(ctx)).toBe(false);
});
it('should return false when sessionId is empty string', () => {
const ctx = makeContext({ sessionId: '' });
expect(strategy.canApply(ctx)).toBe(false);
});
});
describe('execute', () => {
it('should return failure when sessionId is not provided', async () => {
const ctx = makeContext({ sessionId: undefined });
const result = await strategy.execute(ctx);
expect(result.success).toBe(false);
expect(result.reason).toBe('Session ID not provided');
});
});
});
describe('JudgmentStrategyFactory', () => {
it('should create strategies in correct priority order', () => {
const strategies = JudgmentStrategyFactory.createStrategies();
expect(strategies).toHaveLength(4);
expect(strategies[0]!.name).toBe('AutoSelect');
expect(strategies[1]!.name).toBe('ReportBased');
expect(strategies[2]!.name).toBe('ResponseBased');
expect(strategies[3]!.name).toBe('AgentConsult');
});
});

View File

@ -34,7 +34,7 @@ vi.mock('../shared/utils/index.js', async (importOriginal) => ({
// --- Imports (after mocks) ---
import { PieceEngine } from '../core/piece/index.js';
import { runReportPhase } from '../core/piece/index.js';
import { runReportPhase } from '../core/piece/phase-runner.js';
import {
makeResponse,
makeMovement,

View File

@ -0,0 +1,76 @@
import { beforeEach, describe, expect, it, vi } from 'vitest';
const readFileSyncMock = vi.fn((path: string) => {
if (path.endsWith('judgment.json')) {
return JSON.stringify({ type: 'object', properties: { step: { type: 'integer' } } });
}
if (path.endsWith('evaluation.json')) {
return JSON.stringify({ type: 'object', properties: { matched_index: { type: 'integer' } } });
}
if (path.endsWith('decomposition.json')) {
return JSON.stringify({
type: 'object',
properties: {
parts: {
type: 'array',
items: {
type: 'object',
properties: {
id: { type: 'string' },
title: { type: 'string' },
instruction: { type: 'string' },
},
},
},
},
});
}
throw new Error(`Unexpected schema path: ${path}`);
});
vi.mock('node:fs', () => ({
readFileSync: readFileSyncMock,
}));
vi.mock('../infra/resources/index.js', () => ({
getResourcesDir: vi.fn(() => '/mock/resources'),
}));
describe('schema-loader', () => {
beforeEach(() => {
vi.resetModules();
readFileSyncMock.mockClear();
});
it('同じスキーマを複数回ロードしても readFileSync は1回だけ', async () => {
const { loadJudgmentSchema } = await import('../core/piece/schema-loader.js');
const first = loadJudgmentSchema();
const second = loadJudgmentSchema();
expect(first).toEqual(second);
expect(readFileSyncMock).toHaveBeenCalledTimes(1);
expect(readFileSyncMock).toHaveBeenCalledWith('/mock/resources/schemas/judgment.json', 'utf-8');
});
it('loadDecompositionSchema は maxItems を注入し、呼び出しごとに独立したオブジェクトを返す', async () => {
const { loadDecompositionSchema } = await import('../core/piece/schema-loader.js');
const first = loadDecompositionSchema(2);
const second = loadDecompositionSchema(5);
const firstParts = (first.properties as Record<string, unknown>).parts as Record<string, unknown>;
const secondParts = (second.properties as Record<string, unknown>).parts as Record<string, unknown>;
expect(firstParts.maxItems).toBe(2);
expect(secondParts.maxItems).toBe(5);
expect(readFileSyncMock).toHaveBeenCalledTimes(1);
});
it('loadDecompositionSchema は不正な maxParts を拒否する', async () => {
const { loadDecompositionSchema } = await import('../core/piece/schema-loader.js');
expect(() => loadDecompositionSchema(0)).toThrow('maxParts must be a positive integer: 0');
expect(() => loadDecompositionSchema(-1)).toThrow('maxParts must be a positive integer: -1');
});
});

View File

@ -6,39 +6,12 @@
*/
import type { AiJudgeCaller, AiJudgeCondition } from '../core/piece/types.js';
import { loadTemplate } from '../shared/prompts/index.js';
import { createLogger } from '../shared/utils/index.js';
import { runAgent } from './runner.js';
import { evaluateCondition } from '../core/piece/agent-usecases.js';
const log = createLogger('ai-judge');
/**
* Detect judge rule index from [JUDGE:N] tag pattern.
* Returns 0-based rule index, or -1 if no match.
*/
export function detectJudgeIndex(content: string): number {
const regex = /\[JUDGE:(\d+)\]/i;
const match = content.match(regex);
if (match?.[1]) {
const index = Number.parseInt(match[1], 10) - 1;
return index >= 0 ? index : -1;
}
return -1;
}
/**
* Build the prompt for the AI judge that evaluates agent output against ai() conditions.
*/
export function buildJudgePrompt(
agentOutput: string,
aiConditions: AiJudgeCondition[],
): string {
const conditionList = aiConditions
.map((c) => `| ${c.index + 1} | ${c.text} |`)
.join('\n');
return loadTemplate('perform_judge_message', 'en', { agentOutput, conditionList });
}
export { detectJudgeIndex, buildJudgePrompt } from './judge-utils.js';
/**
* Call AI judge to evaluate agent output against ai() conditions.
@ -50,18 +23,9 @@ export const callAiJudge: AiJudgeCaller = async (
conditions: AiJudgeCondition[],
options: { cwd: string },
): Promise<number> => {
const prompt = buildJudgePrompt(agentOutput, conditions);
const response = await runAgent(undefined, prompt, {
cwd: options.cwd,
maxTurns: 1,
permissionMode: 'readonly',
});
if (response.status !== 'done') {
log.error('AI judge call failed', { error: response.error });
return -1;
const result = await evaluateCondition(agentOutput, conditions, options);
if (result < 0) {
log.error('AI judge call failed to match a condition');
}
return detectJudgeIndex(response.content);
return result;
};

View File

@ -2,6 +2,5 @@
* Agents module - exports agent execution utilities
*/
export { AgentRunner, runAgent } from './runner.js';
export { callAiJudge, detectJudgeIndex, buildJudgePrompt } from './ai-judge.js';
export { AgentRunner } from './runner.js';
export type { RunAgentOptions, StreamCallback } from './types.js';

22
src/agents/judge-utils.ts Normal file
View File

@ -0,0 +1,22 @@
import { loadTemplate } from '../shared/prompts/index.js';
export function detectJudgeIndex(content: string): number {
const regex = /\[JUDGE:(\d+)\]/i;
const match = content.match(regex);
if (match?.[1]) {
const index = Number.parseInt(match[1], 10) - 1;
return index >= 0 ? index : -1;
}
return -1;
}
export function buildJudgePrompt(
agentOutput: string,
aiConditions: Array<{ index: number; text: string }>,
): string {
const conditionList = aiConditions
.map((c) => `| ${c.index + 1} | ${c.text} |`)
.join('\n');
return loadTemplate('perform_judge_message', 'en', { agentOutput, conditionList });
}

View File

@ -111,6 +111,7 @@ export class AgentRunner {
onPermissionRequest: options.onPermissionRequest,
onAskUserQuestion: options.onAskUserQuestion,
bypassPermissions: options.bypassPermissions,
outputSchema: options.outputSchema,
};
}

View File

@ -39,4 +39,6 @@ export interface RunAgentOptions {
movementsList: ReadonlyArray<{ name: string; description?: string }>;
currentPosition: string;
};
/** JSON Schema for structured output */
outputSchema?: Record<string, unknown>;
}

View File

@ -17,5 +17,6 @@ export interface AgentResponse {
matchedRuleIndex?: number;
/** How the rule match was detected */
matchedRuleMethod?: RuleMatchMethod;
/** Structured output returned by provider SDK (JSON Schema mode) */
structuredOutput?: Record<string, unknown>;
}

View File

@ -21,6 +21,8 @@ export type Status =
/** How a rule match was detected */
export type RuleMatchMethod =
| 'aggregate'
| 'auto_select'
| 'structured_output'
| 'phase3_tag'
| 'phase1_tag'
| 'ai_judge'

View File

@ -0,0 +1,235 @@
import type { AgentResponse, PartDefinition, PieceRule, RuleMatchMethod, Language } from '../models/types.js';
import { runAgent, type RunAgentOptions } from '../../agents/runner.js';
import { detectJudgeIndex, buildJudgePrompt } from '../../agents/judge-utils.js';
import { parseParts } from './engine/task-decomposer.js';
import { loadJudgmentSchema, loadEvaluationSchema, loadDecompositionSchema } from './schema-loader.js';
export type UsecaseOptions = RunAgentOptions;
export interface JudgeStatusOptions {
cwd: string;
movementName: string;
language?: Language;
}
export interface JudgeStatusResult {
ruleIndex: number;
method: RuleMatchMethod;
}
export interface EvaluateConditionOptions {
cwd: string;
}
export interface DecomposeTaskOptions {
cwd: string;
persona?: string;
language?: Language;
model?: string;
provider?: 'claude' | 'codex' | 'opencode' | 'mock';
}
function detectRuleIndex(content: string, movementName: string): number {
const tag = movementName.toUpperCase();
const regex = new RegExp(`\\[${tag}:(\\d+)\\]`, 'gi');
const matches = [...content.matchAll(regex)];
const match = matches.at(-1);
if (match?.[1]) {
const index = Number.parseInt(match[1], 10) - 1;
return index >= 0 ? index : -1;
}
return -1;
}
function toPartDefinitions(raw: unknown, maxParts: number): PartDefinition[] {
if (!Array.isArray(raw)) {
throw new Error('Structured output "parts" must be an array');
}
if (raw.length === 0) {
throw new Error('Structured output "parts" must not be empty');
}
if (raw.length > maxParts) {
throw new Error(`Structured output produced too many parts: ${raw.length} > ${maxParts}`);
}
const parts: PartDefinition[] = raw.map((entry, index) => {
if (typeof entry !== 'object' || entry == null || Array.isArray(entry)) {
throw new Error(`Part[${index}] must be an object`);
}
const row = entry as Record<string, unknown>;
const id = row.id;
const title = row.title;
const instruction = row.instruction;
const timeoutMs = row.timeout_ms;
if (typeof id !== 'string' || id.trim().length === 0) {
throw new Error(`Part[${index}] "id" must be a non-empty string`);
}
if (typeof title !== 'string' || title.trim().length === 0) {
throw new Error(`Part[${index}] "title" must be a non-empty string`);
}
if (typeof instruction !== 'string' || instruction.trim().length === 0) {
throw new Error(`Part[${index}] "instruction" must be a non-empty string`);
}
if (
timeoutMs != null
&& (typeof timeoutMs !== 'number' || !Number.isInteger(timeoutMs) || timeoutMs <= 0)
) {
throw new Error(`Part[${index}] "timeout_ms" must be a positive integer`);
}
return {
id,
title,
instruction,
timeoutMs: timeoutMs as number | undefined,
};
});
const seen = new Set<string>();
for (const part of parts) {
if (seen.has(part.id)) {
throw new Error(`Duplicate part id: ${part.id}`);
}
seen.add(part.id);
}
return parts;
}
export async function executeAgent(
persona: string | undefined,
instruction: string,
options: UsecaseOptions,
): Promise<AgentResponse> {
return runAgent(persona, instruction, options);
}
export async function generateReport(
persona: string | undefined,
instruction: string,
options: UsecaseOptions,
): Promise<AgentResponse> {
return runAgent(persona, instruction, options);
}
export async function executePart(
persona: string | undefined,
instruction: string,
options: UsecaseOptions,
): Promise<AgentResponse> {
return runAgent(persona, instruction, options);
}
export async function evaluateCondition(
agentOutput: string,
conditions: Array<{ index: number; text: string }>,
options: EvaluateConditionOptions,
): Promise<number> {
const prompt = buildJudgePrompt(agentOutput, conditions);
const response = await runAgent(undefined, prompt, {
cwd: options.cwd,
maxTurns: 1,
permissionMode: 'readonly',
outputSchema: loadEvaluationSchema(),
});
if (response.status !== 'done') {
return -1;
}
const matchedIndex = response.structuredOutput?.matched_index;
if (typeof matchedIndex === 'number' && Number.isInteger(matchedIndex)) {
const zeroBased = matchedIndex - 1;
if (zeroBased >= 0 && zeroBased < conditions.length) {
return zeroBased;
}
}
return detectJudgeIndex(response.content);
}
export async function judgeStatus(
instruction: string,
rules: PieceRule[],
options: JudgeStatusOptions,
): Promise<JudgeStatusResult> {
if (rules.length === 0) {
throw new Error('judgeStatus requires at least one rule');
}
if (rules.length === 1) {
return {
ruleIndex: 0,
method: 'auto_select',
};
}
const response = await runAgent('conductor', instruction, {
cwd: options.cwd,
maxTurns: 3,
permissionMode: 'readonly',
language: options.language,
outputSchema: loadJudgmentSchema(),
});
if (response.status === 'done') {
const stepNumber = response.structuredOutput?.step;
if (typeof stepNumber === 'number' && Number.isInteger(stepNumber)) {
const ruleIndex = stepNumber - 1;
if (ruleIndex >= 0 && ruleIndex < rules.length) {
return {
ruleIndex,
method: 'structured_output',
};
}
}
const tagRuleIndex = detectRuleIndex(response.content, options.movementName);
if (tagRuleIndex >= 0 && tagRuleIndex < rules.length) {
return {
ruleIndex: tagRuleIndex,
method: 'phase3_tag',
};
}
}
const conditions = rules.map((rule, index) => ({ index, text: rule.condition }));
const fallbackIndex = await evaluateCondition(instruction, conditions, { cwd: options.cwd });
if (fallbackIndex >= 0 && fallbackIndex < rules.length) {
return {
ruleIndex: fallbackIndex,
method: 'ai_judge',
};
}
throw new Error(`Status not found for movement "${options.movementName}"`);
}
export async function decomposeTask(
instruction: string,
maxParts: number,
options: DecomposeTaskOptions,
): Promise<PartDefinition[]> {
const response = await runAgent(options.persona, instruction, {
cwd: options.cwd,
language: options.language,
model: options.model,
provider: options.provider,
permissionMode: 'readonly',
maxTurns: 3,
outputSchema: loadDecompositionSchema(maxParts),
});
if (response.status !== 'done') {
const detail = response.error ?? response.content;
throw new Error(`Team leader failed: ${detail}`);
}
const parts = response.structuredOutput?.parts;
if (parts != null) {
return toPartDefinitions(parts, maxParts);
}
return parseParts(response.content, maxParts);
}

View File

@ -15,7 +15,8 @@ import type { ArpeggioMovementConfig, BatchResult, DataBatch } from '../arpeggio
import { createDataSource } from '../arpeggio/data-source-factory.js';
import { loadTemplate, expandTemplate } from '../arpeggio/template.js';
import { buildMergeFn, writeMergedOutput } from '../arpeggio/merge.js';
import { runAgent, type RunAgentOptions } from '../../../agents/runner.js';
import type { RunAgentOptions } from '../../../agents/runner.js';
import { executeAgent } from '../agent-usecases.js';
import { detectMatchedRule } from '../evaluation/index.js';
import { incrementMovementIteration } from './state-manager.js';
import { createLogger } from '../../../shared/utils/index.js';
@ -84,7 +85,7 @@ async function executeBatchWithRetry(
for (let attempt = 0; attempt <= maxRetries; attempt++) {
try {
const response = await runAgent(persona, prompt, agentOptions);
const response = await executeAgent(persona, prompt, agentOptions);
if (response.status === 'error') {
lastError = response.error ?? response.content ?? 'Agent returned error status';
log.info('Batch execution failed, retrying', {

View File

@ -15,7 +15,7 @@ import type {
Language,
} from '../../models/types.js';
import type { PhaseName } from '../types.js';
import { runAgent } from '../../../agents/runner.js';
import { executeAgent } from '../agent-usecases.js';
import { InstructionBuilder, isOutputContractItem } from '../instruction/InstructionBuilder.js';
import { needsStatusJudgmentPhase, runReportPhase, runStatusJudgmentPhase } from '../phase-runner.js';
import { detectMatchedRule } from '../evaluation/index.js';
@ -202,7 +202,7 @@ export class MovementExecutor {
// Phase 1: main execution (Write excluded if movement has report)
this.deps.onPhaseStart?.(step, 1, 'execute', instruction);
const agentOptions = this.deps.optionsBuilder.buildAgentOptions(step);
let response = await runAgent(step.persona, instruction, agentOptions);
let response = await executeAgent(step.persona, instruction, agentOptions);
updatePersonaSession(sessionKey, response.sessionId);
this.deps.onPhaseComplete?.(step, 1, 'execute', response.content, response.status, response.error);

View File

@ -10,7 +10,7 @@ import type {
PieceState,
AgentResponse,
} from '../../models/types.js';
import { runAgent } from '../../../agents/runner.js';
import { executeAgent } from '../agent-usecases.js';
import { ParallelLogger } from './parallel-logger.js';
import { needsStatusJudgmentPhase, runReportPhase, runStatusJudgmentPhase } from '../phase-runner.js';
import { detectMatchedRule } from '../evaluation/index.js';
@ -101,7 +101,7 @@ export class ParallelRunner {
: baseOptions;
this.deps.onPhaseStart?.(subMovement, 1, 'execute', subInstruction);
const subResponse = await runAgent(subMovement.persona, subInstruction, agentOptions);
const subResponse = await executeAgent(subMovement.persona, subInstruction, agentOptions);
updatePersonaSession(subSessionKey, subResponse.sessionId);
this.deps.onPhaseComplete?.(subMovement, 1, 'execute', subResponse.content, subResponse.status, subResponse.error);

View File

@ -1,4 +1,3 @@
import { runAgent } from '../../../agents/runner.js';
import type {
PieceMovement,
PieceState,
@ -6,11 +5,11 @@ import type {
PartDefinition,
PartResult,
} from '../../models/types.js';
import { decomposeTask, executePart } from '../agent-usecases.js';
import { detectMatchedRule } from '../evaluation/index.js';
import { buildSessionKey } from '../session-key.js';
import { ParallelLogger } from './parallel-logger.js';
import { incrementMovementIteration } from './state-manager.js';
import { parseParts } from './task-decomposer.js';
import { buildAbortSignal } from './abort-signal.js';
import { createLogger, getErrorMessage } from '../../../shared/utils/index.js';
import type { OptionsBuilder } from './OptionsBuilder.js';
@ -99,26 +98,19 @@ export class TeamLeaderRunner {
);
this.deps.onPhaseStart?.(leaderStep, 1, 'execute', instruction);
const leaderResponse = await runAgent(
leaderStep.persona,
instruction,
this.deps.optionsBuilder.buildAgentOptions(leaderStep),
);
updatePersonaSession(buildSessionKey(leaderStep), leaderResponse.sessionId);
this.deps.onPhaseComplete?.(
leaderStep,
1,
'execute',
leaderResponse.content,
leaderResponse.status,
leaderResponse.error,
);
if (leaderResponse.status === 'error') {
const detail = leaderResponse.error ?? leaderResponse.content;
throw new Error(`Team leader failed: ${detail}`);
}
const parts = parseParts(leaderResponse.content, teamLeaderConfig.maxParts);
const parts = await decomposeTask(instruction, teamLeaderConfig.maxParts, {
cwd: this.deps.getCwd(),
persona: leaderStep.persona,
model: leaderStep.model,
provider: leaderStep.provider,
});
const leaderResponse: AgentResponse = {
persona: leaderStep.persona ?? leaderStep.name,
status: 'done',
content: JSON.stringify({ parts }, null, 2),
timestamp: new Date(),
};
this.deps.onPhaseComplete?.(leaderStep, 1, 'execute', leaderResponse.content, leaderResponse.status, leaderResponse.error);
log.debug('Team leader decomposed parts', {
movement: step.name,
partCount: parts.length,
@ -240,7 +232,7 @@ export class TeamLeaderRunner {
: { ...baseOptions, abortSignal: signal };
try {
const response = await runAgent(partMovement.persona, part.instruction, options);
const response = await executePart(partMovement.persona, part.instruction, options);
updatePersonaSession(buildSessionKey(partMovement), response.sessionId);
return {
part,

View File

@ -60,8 +60,19 @@ export { buildEditRule, type InstructionContext } from './instruction/instructio
export { generateStatusRulesComponents, type StatusRulesComponents } from './instruction/status-rules.js';
// Rule evaluation
export { RuleEvaluator, type RuleMatch, type RuleEvaluatorContext, detectMatchedRule, evaluateAggregateConditions } from './evaluation/index.js';
export { RuleEvaluator, type RuleMatch, type RuleEvaluatorContext, evaluateAggregateConditions } from './evaluation/index.js';
export { AggregateEvaluator } from './evaluation/AggregateEvaluator.js';
// Phase runner
export { needsStatusJudgmentPhase, runReportPhase, runStatusJudgmentPhase, type ReportPhaseBlockedResult } from './phase-runner.js';
export { needsStatusJudgmentPhase, type ReportPhaseBlockedResult } from './phase-runner.js';
// Agent usecases
export {
executeAgent,
generateReport,
executePart,
judgeStatus,
evaluateCondition,
decomposeTask,
type JudgeStatusResult,
} from './agent-usecases.js';

View File

@ -27,6 +27,8 @@ export interface StatusJudgmentContext {
lastResponse?: string;
/** Input source type for fallback strategies */
inputSource?: 'report' | 'response';
/** Structured output mode omits tag-format instructions */
useStructuredOutput?: boolean;
}
/**
@ -67,7 +69,9 @@ export class StatusJudgmentBuilder {
return loadTemplate('perform_phase3_message', language, {
reportContent: contentToJudge,
criteriaTable: components.criteriaTable,
outputList: components.outputList,
outputList: this.context.useStructuredOutput
? ''
: components.outputList,
hasAppendix: components.hasAppendix,
appendixContent: components.appendixContent,
});

View File

@ -1,255 +0,0 @@
/**
* Fallback strategies for Phase 3 judgment.
*
* Implements Chain of Responsibility pattern to try multiple judgment methods
* when conductor cannot determine the status from report alone.
*/
import { readFileSync } from 'node:fs';
import { resolve } from 'node:path';
import type { PieceMovement, Language } from '../../models/types.js';
import { runAgent } from '../../../agents/runner.js';
import { StatusJudgmentBuilder } from '../instruction/StatusJudgmentBuilder.js';
import { JudgmentDetector, type JudgmentResult } from './JudgmentDetector.js';
import { hasOnlyOneBranch, getAutoSelectedTag, getReportFiles } from '../evaluation/rule-utils.js';
import { createLogger } from '../../../shared/utils/index.js';
const log = createLogger('fallback-strategy');
export interface JudgmentContext {
step: PieceMovement;
cwd: string;
language?: Language;
reportDir?: string;
lastResponse?: string; // Phase 1の最終応答
sessionId?: string;
}
export interface JudgmentStrategy {
readonly name: string;
canApply(context: JudgmentContext): boolean;
execute(context: JudgmentContext): Promise<JudgmentResult>;
}
/**
* Base class for judgment strategies using Template Method Pattern.
*/
abstract class JudgmentStrategyBase implements JudgmentStrategy {
abstract readonly name: string;
abstract canApply(context: JudgmentContext): boolean;
async execute(context: JudgmentContext): Promise<JudgmentResult> {
try {
// 1. 情報収集(サブクラスで実装)
const input = await this.gatherInput(context);
// 2. 指示生成(サブクラスで実装)
const instruction = this.buildInstruction(input, context);
// 3. conductor実行共通
const response = await this.runConductor(instruction, context);
// 4. 結果検出(共通)
return JudgmentDetector.detect(response);
} catch (error) {
const errorMsg = error instanceof Error ? error.message : String(error);
log.debug(`Strategy ${this.name} threw error`, { error: errorMsg });
return {
success: false,
reason: `Strategy failed with error: ${errorMsg}`,
};
}
}
protected abstract gatherInput(context: JudgmentContext): Promise<string>;
protected abstract buildInstruction(input: string, context: JudgmentContext): string;
protected async runConductor(instruction: string, context: JudgmentContext): Promise<string> {
const response = await runAgent('conductor', instruction, {
cwd: context.cwd,
maxTurns: 3,
permissionMode: 'readonly',
language: context.language,
});
if (response.status !== 'done') {
throw new Error(`Conductor failed: ${response.error || response.content || 'Unknown error'}`);
}
return response.content;
}
}
/**
* Strategy 1: Auto-select when there's only one branch.
* This strategy doesn't use conductor - just returns the single tag.
*/
export class AutoSelectStrategy implements JudgmentStrategy {
readonly name = 'AutoSelect';
canApply(context: JudgmentContext): boolean {
return hasOnlyOneBranch(context.step);
}
async execute(context: JudgmentContext): Promise<JudgmentResult> {
const tag = getAutoSelectedTag(context.step);
log.debug('Auto-selected tag (single branch)', { tag });
return {
success: true,
tag,
};
}
}
/**
* Strategy 2: Report-based judgment.
* Read report files and ask conductor to judge.
*/
export class ReportBasedStrategy extends JudgmentStrategyBase {
readonly name = 'ReportBased';
canApply(context: JudgmentContext): boolean {
return context.reportDir !== undefined && getReportFiles(context.step.outputContracts).length > 0;
}
protected async gatherInput(context: JudgmentContext): Promise<string> {
if (!context.reportDir) {
throw new Error('Report directory not provided');
}
const reportFiles = getReportFiles(context.step.outputContracts);
if (reportFiles.length === 0) {
throw new Error('No report files configured');
}
const reportContents: string[] = [];
for (const fileName of reportFiles) {
const filePath = resolve(context.reportDir, fileName);
try {
const content = readFileSync(filePath, 'utf-8');
reportContents.push(`# ${fileName}\n\n${content}`);
} catch (error) {
const errorMsg = error instanceof Error ? error.message : String(error);
throw new Error(`Failed to read report file ${fileName}: ${errorMsg}`);
}
}
return reportContents.join('\n\n---\n\n');
}
protected buildInstruction(input: string, context: JudgmentContext): string {
return new StatusJudgmentBuilder(context.step, {
language: context.language,
reportContent: input,
inputSource: 'report',
}).build();
}
}
/**
* Strategy 3: Response-based judgment.
* Use the last response from Phase 1 to judge.
*/
export class ResponseBasedStrategy extends JudgmentStrategyBase {
readonly name = 'ResponseBased';
canApply(context: JudgmentContext): boolean {
return context.lastResponse !== undefined && context.lastResponse.length > 0;
}
protected async gatherInput(context: JudgmentContext): Promise<string> {
if (!context.lastResponse) {
throw new Error('Last response not provided');
}
return context.lastResponse;
}
protected buildInstruction(input: string, context: JudgmentContext): string {
return new StatusJudgmentBuilder(context.step, {
language: context.language,
lastResponse: input,
inputSource: 'response',
}).build();
}
}
/**
* Strategy 4: Agent consult.
* Resume the Phase 1 agent session and ask which tag is appropriate.
*/
export class AgentConsultStrategy implements JudgmentStrategy {
readonly name = 'AgentConsult';
canApply(context: JudgmentContext): boolean {
return context.sessionId !== undefined && context.sessionId.length > 0;
}
async execute(context: JudgmentContext): Promise<JudgmentResult> {
if (!context.sessionId) {
return {
success: false,
reason: 'Session ID not provided',
};
}
try {
const question = this.buildQuestion(context);
const response = await runAgent(context.step.persona ?? context.step.name, question, {
cwd: context.cwd,
sessionId: context.sessionId,
maxTurns: 3,
language: context.language,
});
if (response.status !== 'done') {
return {
success: false,
reason: `Agent consultation failed: ${response.error || 'Unknown error'}`,
};
}
return JudgmentDetector.detect(response.content);
} catch (error) {
const errorMsg = error instanceof Error ? error.message : String(error);
log.debug('Agent consult strategy failed', { error: errorMsg });
return {
success: false,
reason: `Agent consultation error: ${errorMsg}`,
};
}
}
private buildQuestion(context: JudgmentContext): string {
const rules = context.step.rules || [];
const ruleDescriptions = rules.map((rule, idx) => {
const tag = `[${context.step.name.toUpperCase()}:${idx + 1}]`;
const desc = rule.condition || `Rule ${idx + 1}`;
return `- ${tag}: ${desc}`;
}).join('\n');
const lang = context.language || 'en';
if (lang === 'ja') {
return `あなたの作業結果に基づいて、以下の判定タグのうちどれが適切か教えてください:\n\n${ruleDescriptions}\n\n該当するタグを1つだけ出力してください例: [${context.step.name.toUpperCase()}:1])。`;
} else {
return `Based on your work, which of the following judgment tags is appropriate?\n\n${ruleDescriptions}\n\nPlease output only one tag (e.g., [${context.step.name.toUpperCase()}:1]).`;
}
}
}
/**
* Factory for creating judgment strategies in order of priority.
*/
export class JudgmentStrategyFactory {
static createStrategies(): JudgmentStrategy[] {
return [
new AutoSelectStrategy(),
new ReportBasedStrategy(),
new ResponseBasedStrategy(),
new AgentConsultStrategy(),
];
}
}

View File

@ -6,13 +6,3 @@ export {
JudgmentDetector,
type JudgmentResult,
} from './JudgmentDetector.js';
export {
AutoSelectStrategy,
ReportBasedStrategy,
ResponseBasedStrategy,
AgentConsultStrategy,
JudgmentStrategyFactory,
type JudgmentContext,
type JudgmentStrategy,
} from './FallbackStrategy.js';

View File

@ -9,12 +9,13 @@ import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
import { dirname, parse, resolve, sep } from 'node:path';
import type { PieceMovement, Language, AgentResponse } from '../models/types.js';
import type { PhaseName } from './types.js';
import { runAgent, type RunAgentOptions } from '../../agents/runner.js';
import type { RunAgentOptions } from '../../agents/runner.js';
import { ReportInstructionBuilder } from './instruction/ReportInstructionBuilder.js';
import { hasTagBasedRules, getReportFiles } from './evaluation/rule-utils.js';
import { JudgmentStrategyFactory, type JudgmentContext } from './judgment/index.js';
import { generateReport } from './agent-usecases.js';
import { createLogger } from '../../shared/utils/index.js';
import { buildSessionKey } from './session-key.js';
export { runStatusJudgmentPhase } from './status-judgment-phase.js';
const log = createLogger('phase-runner');
@ -213,7 +214,7 @@ async function runSingleReportAttempt(
let response: AgentResponse;
try {
response = await runAgent(step.persona, instruction, options);
response = await generateReport(step.persona, instruction, options);
} catch (error) {
const errorMsg = error instanceof Error ? error.message : String(error);
ctx.onPhaseComplete?.(step, 2, 'report', '', 'error', errorMsg);
@ -241,55 +242,3 @@ async function runSingleReportAttempt(
ctx.onPhaseComplete?.(step, 2, 'report', response.content, response.status);
return { kind: 'success', content: trimmedContent, response };
}
/**
* Phase 3: Status judgment.
* Uses the 'conductor' agent in a new session to output a status tag.
* Implements multi-stage fallback logic to ensure judgment succeeds.
* Returns the Phase 3 response content (containing the status tag).
*/
export async function runStatusJudgmentPhase(
step: PieceMovement,
ctx: PhaseRunnerContext,
): Promise<string> {
log.debug('Running status judgment phase', { movement: step.name });
const strategies = JudgmentStrategyFactory.createStrategies();
const sessionKey = buildSessionKey(step);
const judgmentContext: JudgmentContext = {
step,
cwd: ctx.cwd,
language: ctx.language,
reportDir: ctx.reportDir,
lastResponse: ctx.lastResponse,
sessionId: ctx.getSessionId(sessionKey),
};
for (const strategy of strategies) {
if (!strategy.canApply(judgmentContext)) {
log.debug(`Strategy ${strategy.name} not applicable, skipping`);
continue;
}
log.debug(`Trying strategy: ${strategy.name}`);
ctx.onPhaseStart?.(step, 3, 'judge', `Strategy: ${strategy.name}`);
try {
const result = await strategy.execute(judgmentContext);
if (result.success) {
log.debug(`Strategy ${strategy.name} succeeded`, { tag: result.tag });
ctx.onPhaseComplete?.(step, 3, 'judge', result.tag!, 'done');
return result.tag!;
}
log.debug(`Strategy ${strategy.name} failed`, { reason: result.reason });
} catch (error) {
const errorMsg = error instanceof Error ? error.message : String(error);
log.debug(`Strategy ${strategy.name} threw error`, { error: errorMsg });
}
}
const errorMsg = 'All judgment strategies failed';
ctx.onPhaseComplete?.(step, 3, 'judge', '', 'error', errorMsg);
throw new Error(errorMsg);
}

View File

@ -0,0 +1,50 @@
import { readFileSync } from 'node:fs';
import { join } from 'node:path';
import { getResourcesDir } from '../../infra/resources/index.js';
type JsonSchema = Record<string, unknown>;
const schemaCache = new Map<string, JsonSchema>();
function loadSchema(name: string): JsonSchema {
const cached = schemaCache.get(name);
if (cached) {
return cached;
}
const schemaPath = join(getResourcesDir(), 'schemas', name);
const content = readFileSync(schemaPath, 'utf-8');
const parsed = JSON.parse(content) as JsonSchema;
schemaCache.set(name, parsed);
return parsed;
}
function cloneSchema(schema: JsonSchema): JsonSchema {
return JSON.parse(JSON.stringify(schema)) as JsonSchema;
}
export function loadJudgmentSchema(): JsonSchema {
return loadSchema('judgment.json');
}
export function loadEvaluationSchema(): JsonSchema {
return loadSchema('evaluation.json');
}
export function loadDecompositionSchema(maxParts: number): JsonSchema {
if (!Number.isInteger(maxParts) || maxParts <= 0) {
throw new Error(`maxParts must be a positive integer: ${maxParts}`);
}
const schema = cloneSchema(loadSchema('decomposition.json'));
const properties = schema.properties;
if (!properties || typeof properties !== 'object' || Array.isArray(properties)) {
throw new Error('decomposition schema is invalid: properties is missing');
}
const rawParts = (properties as Record<string, unknown>).parts;
if (!rawParts || typeof rawParts !== 'object' || Array.isArray(rawParts)) {
throw new Error('decomposition schema is invalid: parts is missing');
}
(rawParts as Record<string, unknown>).maxItems = maxParts;
return schema;
}

View File

@ -0,0 +1,78 @@
import { existsSync, readFileSync } from 'node:fs';
import { resolve } from 'node:path';
import type { PieceMovement } from '../models/types.js';
import { judgeStatus } from './agent-usecases.js';
import { StatusJudgmentBuilder } from './instruction/StatusJudgmentBuilder.js';
import { getReportFiles } from './evaluation/rule-utils.js';
import { createLogger } from '../../shared/utils/index.js';
import type { PhaseRunnerContext } from './phase-runner.js';
const log = createLogger('phase-runner');
/**
* Phase 3: Status judgment.
* Uses the 'conductor' agent in a new session to output a status tag.
* Implements multi-stage fallback logic to ensure judgment succeeds.
* Returns the Phase 3 response content (containing the status tag).
*/
export async function runStatusJudgmentPhase(
step: PieceMovement,
ctx: PhaseRunnerContext,
): Promise<string> {
log.debug('Running status judgment phase', { movement: step.name });
if (!step.rules || step.rules.length === 0) {
throw new Error(`Status judgment requires rules for movement "${step.name}"`);
}
const reportFiles = getReportFiles(step.outputContracts);
let instruction: string | undefined;
if (reportFiles.length > 0) {
const reports: string[] = [];
for (const fileName of reportFiles) {
const filePath = resolve(ctx.reportDir, fileName);
if (!existsSync(filePath)) {
continue;
}
const content = readFileSync(filePath, 'utf-8');
reports.push(`# ${fileName}\n\n${content}`);
}
if (reports.length > 0) {
instruction = new StatusJudgmentBuilder(step, {
language: ctx.language,
reportContent: reports.join('\n\n---\n\n'),
inputSource: 'report',
useStructuredOutput: true,
}).build();
}
}
if (instruction == null) {
if (!ctx.lastResponse) {
throw new Error(`Status judgment requires report or lastResponse for movement "${step.name}"`);
}
instruction = new StatusJudgmentBuilder(step, {
language: ctx.language,
lastResponse: ctx.lastResponse,
inputSource: 'response',
useStructuredOutput: true,
}).build();
}
ctx.onPhaseStart?.(step, 3, 'judge', instruction);
try {
const result = await judgeStatus(instruction, step.rules, {
cwd: ctx.cwd,
movementName: step.name,
language: ctx.language,
});
const tag = `[${step.name.toUpperCase()}:${result.ruleIndex + 1}]`;
ctx.onPhaseComplete?.(step, 3, 'judge', tag, 'done');
return tag;
} catch (error) {
const errorMsg = error instanceof Error ? error.message : String(error);
ctx.onPhaseComplete?.(step, 3, 'judge', '', 'error', errorMsg);
throw error;
}
}

View File

@ -113,6 +113,16 @@ function assertTaskPrefixPair(
}
}
function toJudgmentMatchMethod(
matchedRuleMethod: string | undefined,
): string | undefined {
if (!matchedRuleMethod) return undefined;
if (matchedRuleMethod === 'structured_output') return 'structured_output';
if (matchedRuleMethod === 'ai_judge' || matchedRuleMethod === 'ai_judge_fallback') return 'ai_judge';
if (matchedRuleMethod === 'phase3_tag' || matchedRuleMethod === 'phase1_tag') return 'tag_fallback';
return undefined;
}
function createOutputFns(prefixWriter: TaskPrefixWriter | undefined): OutputFns {
if (!prefixWriter) {
return {
@ -587,6 +597,7 @@ export async function executePiece(
}
// Write step_complete record to NDJSON log
const matchMethod = toJudgmentMatchMethod(response.matchedRuleMethod);
const record: NdjsonStepComplete = {
type: 'step_complete',
step: step.name,
@ -596,6 +607,7 @@ export async function executePiece(
instruction,
...(response.matchedRuleIndex != null ? { matchedRuleIndex: response.matchedRuleIndex } : {}),
...(response.matchedRuleMethod ? { matchedRuleMethod: response.matchedRuleMethod } : {}),
...(matchMethod ? { matchMethod } : {}),
...(response.error ? { error: response.error } : {}),
timestamp: response.timestamp.toISOString(),
};

View File

@ -61,12 +61,6 @@ export {
createCanUseToolCallback,
createAskUserQuestionHooks,
buildSdkOptions,
callClaude,
callClaudeCustom,
callClaudeAgent,
callClaudeSkill,
detectRuleIndex,
isRegexSafe,
} from './infra/claude/index.js';
export type {
StreamEvent,
@ -90,7 +84,8 @@ export type {
} from './infra/claude/index.js';
// Codex integration
export * from './infra/codex/index.js';
export { CodexClient, mapToCodexSandboxMode } from './infra/codex/index.js';
export type { CodexCallOptions, CodexSandboxMode } from './infra/codex/index.js';
// Agent execution
export * from './agents/index.js';
@ -115,12 +110,15 @@ export {
StatusJudgmentBuilder,
buildEditRule,
RuleEvaluator,
detectMatchedRule,
evaluateAggregateConditions,
AggregateEvaluator,
needsStatusJudgmentPhase,
runReportPhase,
runStatusJudgmentPhase,
executeAgent,
generateReport,
executePart,
judgeStatus,
evaluateCondition,
decomposeTask,
} from './core/piece/index.js';
export type {
PieceEvents,
@ -133,6 +131,7 @@ export type {
ProviderType,
RuleMatch,
RuleEvaluatorContext,
JudgeStatusResult,
ReportInstructionContext,
StatusJudgmentContext,
InstructionContext,

View File

@ -52,6 +52,7 @@ export class ClaudeClient {
onAskUserQuestion: options.onAskUserQuestion,
bypassPermissions: options.bypassPermissions,
anthropicApiKey: options.anthropicApiKey,
outputSchema: options.outputSchema,
};
}
@ -76,6 +77,7 @@ export class ClaudeClient {
timestamp: new Date(),
sessionId: result.sessionId,
error: result.error,
structuredOutput: result.structuredOutput,
};
}
@ -104,6 +106,7 @@ export class ClaudeClient {
timestamp: new Date(),
sessionId: result.sessionId,
error: result.error,
structuredOutput: result.structuredOutput,
};
}
@ -153,6 +156,7 @@ export class ClaudeClient {
timestamp: new Date(),
sessionId: result.sessionId,
error: result.error,
structuredOutput: result.structuredOutput,
};
}

View File

@ -94,6 +94,7 @@ export class QueryExecutor {
let resultContent: string | undefined;
let hasResultMessage = false;
let accumulatedAssistantText = '';
let structuredOutput: Record<string, unknown> | undefined;
let onExternalAbort: (() => void) | undefined;
try {
@ -139,6 +140,17 @@ export class QueryExecutor {
const resultMsg = message as SDKResultMessage;
if (resultMsg.subtype === 'success') {
resultContent = resultMsg.result;
const rawStructuredOutput = (resultMsg as unknown as {
structured_output?: unknown;
structuredOutput?: unknown;
}).structured_output ?? (resultMsg as unknown as { structuredOutput?: unknown }).structuredOutput;
if (
rawStructuredOutput
&& typeof rawStructuredOutput === 'object'
&& !Array.isArray(rawStructuredOutput)
) {
structuredOutput = rawStructuredOutput as Record<string, unknown>;
}
success = true;
} else {
success = false;
@ -169,6 +181,7 @@ export class QueryExecutor {
content: finalContent.trim(),
sessionId,
fullContent: accumulatedAssistantText.trim(),
structuredOutput,
};
} catch (error) {
if (onExternalAbort && options.abortSignal) {

View File

@ -65,6 +65,12 @@ export class SdkOptionsBuilder {
if (this.options.agents) sdkOptions.agents = this.options.agents;
if (this.options.mcpServers) sdkOptions.mcpServers = this.options.mcpServers;
if (this.options.systemPrompt) sdkOptions.systemPrompt = this.options.systemPrompt;
if (this.options.outputSchema) {
(sdkOptions as Record<string, unknown>).outputFormat = {
type: 'json_schema',
schema: this.options.outputSchema,
};
}
if (canUseTool) sdkOptions.canUseTool = canUseTool;
if (hooks) sdkOptions.hooks = hooks;

View File

@ -109,6 +109,8 @@ export interface ClaudeResult {
interrupted?: boolean;
/** All assistant text accumulated during execution (for status detection) */
fullContent?: string;
/** Structured output returned by Claude SDK */
structuredOutput?: Record<string, unknown>;
}
/** Extended result with query ID for concurrent execution */
@ -141,6 +143,8 @@ export interface ClaudeCallOptions {
bypassPermissions?: boolean;
/** Anthropic API key to inject via env (bypasses CLI auth) */
anthropicApiKey?: string;
/** JSON Schema for structured output */
outputSchema?: Record<string, unknown>;
}
/** Options for spawning a Claude SDK query (low-level, used by executor/process) */
@ -168,6 +172,8 @@ export interface ClaudeSpawnOptions {
bypassPermissions?: boolean;
/** Anthropic API key to inject via env (bypasses CLI auth) */
anthropicApiKey?: string;
/** JSON Schema for structured output */
outputSchema?: Record<string, unknown>;
/** Callback for stderr output from the Claude Code process */
onStderr?: (data: string) => void;
}

View File

@ -150,13 +150,18 @@ export class CodexClient {
const diag = createStreamDiagnostics('codex-sdk', { agentType, model: options.model, attempt });
diagRef = diag;
const { events } = await thread.runStreamed(fullPrompt, {
const runOptions: Record<string, unknown> = {
signal: streamAbortController.signal,
});
};
if (options.outputSchema) {
runOptions.outputSchema = options.outputSchema;
}
const { events } = await thread.runStreamed(fullPrompt, runOptions as never);
resetIdleTimeout();
diag.onConnected();
let content = '';
let structuredOutput: Record<string, unknown> | undefined;
const contentOffsets = new Map<string, number>();
let success = true;
let failureMessage = '';
@ -189,6 +194,20 @@ export class CodexClient {
break;
}
if (event.type === 'turn.completed') {
const rawFinalResponse = (event as unknown as {
turn?: { finalResponse?: unknown };
}).turn?.finalResponse;
if (
rawFinalResponse
&& typeof rawFinalResponse === 'object'
&& !Array.isArray(rawFinalResponse)
) {
structuredOutput = rawFinalResponse as Record<string, unknown>;
}
continue;
}
if (event.type === 'item.started') {
const item = event.item as CodexItem | undefined;
if (item) {
@ -278,6 +297,7 @@ export class CodexClient {
content: trimmed,
timestamp: new Date(),
sessionId: currentThreadId,
structuredOutput,
};
} catch (error) {
const message = getErrorMessage(error);

View File

@ -31,4 +31,6 @@ export interface CodexCallOptions {
onStream?: StreamCallback;
/** OpenAI API key (bypasses CLI auth) */
openaiApiKey?: string;
/** JSON Schema for structured output */
outputSchema?: Record<string, unknown>;
}

View File

@ -113,6 +113,7 @@ export class SessionManager {
...(record.error ? { error: record.error } : {}),
...(record.matchedRuleIndex != null ? { matchedRuleIndex: record.matchedRuleIndex } : {}),
...(record.matchedRuleMethod ? { matchedRuleMethod: record.matchedRuleMethod } : {}),
...(record.matchMethod ? { matchMethod: record.matchMethod } : {}),
});
sessionLog.iterations++;
}

View File

@ -65,6 +65,7 @@ export async function callMock(
content,
timestamp: new Date(),
sessionId,
structuredOutput: options.structuredOutput,
};
}

View File

@ -13,6 +13,8 @@ export interface MockCallOptions {
mockResponse?: string;
/** Fixed status to return (optional, defaults to 'done') */
mockStatus?: 'done' | 'blocked' | 'error' | 'approved' | 'rejected' | 'improve';
/** Structured output payload returned as-is */
structuredOutput?: Record<string, unknown>;
}
/** A single entry in a mock scenario */

View File

@ -329,14 +329,22 @@ export class OpenCodeClient {
diag.onConnected();
const tools = mapToOpenCodeTools(options.allowedTools);
const promptPayload: Record<string, unknown> = {
sessionID: sessionId,
directory: options.cwd,
model: parsedModel,
...(tools ? { tools } : {}),
parts: [{ type: 'text' as const, text: fullPrompt }],
};
if (options.outputSchema) {
promptPayload.outputFormat = {
type: 'json_schema',
schema: options.outputSchema,
};
}
await client.session.promptAsync(
{
sessionID: sessionId,
directory: options.cwd,
model: parsedModel,
...(tools ? { tools } : {}),
parts: [{ type: 'text' as const, text: fullPrompt }],
},
promptPayload as never,
{ signal: streamAbortController.signal },
);
@ -571,6 +579,17 @@ export class OpenCodeClient {
}
const trimmed = content.trim();
let structuredOutput: Record<string, unknown> | undefined;
if (options.outputSchema && trimmed.startsWith('{')) {
try {
const parsed = JSON.parse(trimmed) as unknown;
if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
structuredOutput = parsed as Record<string, unknown>;
}
} catch {
// Non-JSON response falls back to text path.
}
}
emitResult(options.onStream, true, trimmed, sessionId);
return {
@ -579,6 +598,7 @@ export class OpenCodeClient {
content: trimmed,
timestamp: new Date(),
sessionId,
structuredOutput,
};
} catch (error) {
const message = getErrorMessage(error);

View File

@ -170,4 +170,6 @@ export interface OpenCodeCallOptions {
onAskUserQuestion?: AskUserQuestionHandler;
/** OpenCode API key */
opencodeApiKey?: string;
/** JSON Schema for structured output */
outputSchema?: Record<string, unknown>;
}

View File

@ -22,6 +22,7 @@ function toClaudeOptions(options: ProviderCallOptions): ClaudeCallOptions {
onAskUserQuestion: options.onAskUserQuestion,
bypassPermissions: options.bypassPermissions,
anthropicApiKey: options.anthropicApiKey ?? resolveAnthropicApiKey(),
outputSchema: options.outputSchema,
};
}

View File

@ -33,6 +33,7 @@ function toCodexOptions(options: ProviderCallOptions): CodexCallOptions {
permissionMode: options.permissionMode,
onStream: options.onStream,
openaiApiKey: options.openaiApiKey ?? resolveOpenaiApiKey(),
outputSchema: options.outputSchema,
};
}

View File

@ -22,6 +22,7 @@ function toOpenCodeOptions(options: ProviderCallOptions): OpenCodeCallOptions {
onStream: options.onStream,
onAskUserQuestion: options.onAskUserQuestion,
opencodeApiKey: options.opencodeApiKey ?? resolveOpencodeApiKey(),
outputSchema: options.outputSchema,
};
}

View File

@ -40,6 +40,8 @@ export interface ProviderCallOptions {
openaiApiKey?: string;
/** OpenCode API key for OpenCode provider */
opencodeApiKey?: string;
/** JSON Schema for structured output */
outputSchema?: Record<string, unknown>;
}
/** A configured agent ready to be called */

View File

@ -26,6 +26,8 @@ export interface SessionLog {
matchedRuleIndex?: number;
/** How the rule match was detected */
matchedRuleMethod?: string;
/** Method used by status judgment phase */
matchMethod?: string;
}>;
}
@ -56,6 +58,7 @@ export interface NdjsonStepComplete {
instruction: string;
matchedRuleIndex?: number;
matchedRuleMethod?: string;
matchMethod?: string;
error?: string;
timestamp: string;
}