takt: github-issue-257
This commit is contained in:
parent
be0f299727
commit
bf4196d3b3
33
builtins/schemas/decomposition.json
Normal file
33
builtins/schemas/decomposition.json
Normal file
@ -0,0 +1,33 @@
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"parts": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "Unique part identifier"
|
||||
},
|
||||
"title": {
|
||||
"type": "string",
|
||||
"description": "Human-readable part title"
|
||||
},
|
||||
"instruction": {
|
||||
"type": "string",
|
||||
"description": "Instruction for the part agent"
|
||||
},
|
||||
"timeout_ms": {
|
||||
"type": "integer",
|
||||
"description": "Optional timeout in ms"
|
||||
}
|
||||
},
|
||||
"required": ["id", "title", "instruction"],
|
||||
"additionalProperties": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": ["parts"],
|
||||
"additionalProperties": false
|
||||
}
|
||||
15
builtins/schemas/evaluation.json
Normal file
15
builtins/schemas/evaluation.json
Normal file
@ -0,0 +1,15 @@
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"matched_index": {
|
||||
"type": "integer",
|
||||
"description": "Matched condition number (1-based)"
|
||||
},
|
||||
"reason": {
|
||||
"type": "string",
|
||||
"description": "Why this condition was matched"
|
||||
}
|
||||
},
|
||||
"required": ["matched_index"],
|
||||
"additionalProperties": false
|
||||
}
|
||||
15
builtins/schemas/judgment.json
Normal file
15
builtins/schemas/judgment.json
Normal file
@ -0,0 +1,15 @@
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"step": {
|
||||
"type": "integer",
|
||||
"description": "Matched rule number (1-based)"
|
||||
},
|
||||
"reason": {
|
||||
"type": "string",
|
||||
"description": "Brief justification for the decision"
|
||||
}
|
||||
},
|
||||
"required": ["step"],
|
||||
"additionalProperties": false
|
||||
}
|
||||
185
src/__tests__/agent-usecases.test.ts
Normal file
185
src/__tests__/agent-usecases.test.ts
Normal file
@ -0,0 +1,185 @@
|
||||
import { beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
import { runAgent } from '../agents/runner.js';
|
||||
import { parseParts } from '../core/piece/engine/task-decomposer.js';
|
||||
import { detectJudgeIndex } from '../agents/judge-utils.js';
|
||||
import {
|
||||
executeAgent,
|
||||
generateReport,
|
||||
executePart,
|
||||
evaluateCondition,
|
||||
judgeStatus,
|
||||
decomposeTask,
|
||||
} from '../core/piece/agent-usecases.js';
|
||||
|
||||
vi.mock('../agents/runner.js', () => ({
|
||||
runAgent: vi.fn(),
|
||||
}));
|
||||
|
||||
vi.mock('../core/piece/schema-loader.js', () => ({
|
||||
loadJudgmentSchema: vi.fn(() => ({ type: 'judgment' })),
|
||||
loadEvaluationSchema: vi.fn(() => ({ type: 'evaluation' })),
|
||||
loadDecompositionSchema: vi.fn((maxParts: number) => ({ type: 'decomposition', maxParts })),
|
||||
}));
|
||||
|
||||
vi.mock('../core/piece/engine/task-decomposer.js', () => ({
|
||||
parseParts: vi.fn(),
|
||||
}));
|
||||
|
||||
vi.mock('../agents/judge-utils.js', () => ({
|
||||
buildJudgePrompt: vi.fn(() => 'judge prompt'),
|
||||
detectJudgeIndex: vi.fn(() => -1),
|
||||
}));
|
||||
|
||||
function doneResponse(content: string, structuredOutput?: Record<string, unknown>) {
|
||||
return {
|
||||
persona: 'tester',
|
||||
status: 'done' as const,
|
||||
content,
|
||||
timestamp: new Date('2026-02-12T00:00:00Z'),
|
||||
structuredOutput,
|
||||
};
|
||||
}
|
||||
|
||||
describe('agent-usecases', () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
it('executeAgent/generateReport/executePart は runAgent に委譲する', async () => {
|
||||
vi.mocked(runAgent).mockResolvedValue(doneResponse('ok'));
|
||||
|
||||
await executeAgent('coder', 'do work', { cwd: '/tmp' });
|
||||
await generateReport('coder', 'write report', { cwd: '/tmp' });
|
||||
await executePart('coder', 'part work', { cwd: '/tmp' });
|
||||
|
||||
expect(runAgent).toHaveBeenCalledTimes(3);
|
||||
expect(runAgent).toHaveBeenNthCalledWith(1, 'coder', 'do work', { cwd: '/tmp' });
|
||||
expect(runAgent).toHaveBeenNthCalledWith(2, 'coder', 'write report', { cwd: '/tmp' });
|
||||
expect(runAgent).toHaveBeenNthCalledWith(3, 'coder', 'part work', { cwd: '/tmp' });
|
||||
});
|
||||
|
||||
it('evaluateCondition は構造化出力の matched_index を優先する', async () => {
|
||||
vi.mocked(runAgent).mockResolvedValue(doneResponse('ignored', { matched_index: 2 }));
|
||||
|
||||
const result = await evaluateCondition('agent output', [
|
||||
{ index: 0, text: 'first' },
|
||||
{ index: 1, text: 'second' },
|
||||
], { cwd: '/repo' });
|
||||
|
||||
expect(result).toBe(1);
|
||||
expect(runAgent).toHaveBeenCalledWith(undefined, 'judge prompt', expect.objectContaining({
|
||||
cwd: '/repo',
|
||||
outputSchema: { type: 'evaluation' },
|
||||
}));
|
||||
});
|
||||
|
||||
it('evaluateCondition は構造化出力が使えない場合にタグ検出へフォールバックする', async () => {
|
||||
vi.mocked(runAgent).mockResolvedValue(doneResponse('[JUDGE:2]'));
|
||||
vi.mocked(detectJudgeIndex).mockReturnValue(1);
|
||||
|
||||
const result = await evaluateCondition('agent output', [
|
||||
{ index: 0, text: 'first' },
|
||||
{ index: 1, text: 'second' },
|
||||
], { cwd: '/repo' });
|
||||
|
||||
expect(result).toBe(1);
|
||||
expect(detectJudgeIndex).toHaveBeenCalledWith('[JUDGE:2]');
|
||||
});
|
||||
|
||||
it('judgeStatus は単一ルール時に auto_select を返す', async () => {
|
||||
const result = await judgeStatus('instruction', [{ condition: 'always', next: 'done' }], {
|
||||
cwd: '/repo',
|
||||
movementName: 'review',
|
||||
});
|
||||
|
||||
expect(result).toEqual({ ruleIndex: 0, method: 'auto_select' });
|
||||
expect(runAgent).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('judgeStatus は構造化出力 step を採用する', async () => {
|
||||
vi.mocked(runAgent).mockResolvedValue(doneResponse('x', { step: 2 }));
|
||||
|
||||
const result = await judgeStatus('instruction', [
|
||||
{ condition: 'a', next: 'one' },
|
||||
{ condition: 'b', next: 'two' },
|
||||
], {
|
||||
cwd: '/repo',
|
||||
movementName: 'review',
|
||||
});
|
||||
|
||||
expect(result).toEqual({ ruleIndex: 1, method: 'structured_output' });
|
||||
});
|
||||
|
||||
it('judgeStatus はタグフォールバックを使う', async () => {
|
||||
vi.mocked(runAgent).mockResolvedValue(doneResponse('[REVIEW:2]'));
|
||||
|
||||
const result = await judgeStatus('instruction', [
|
||||
{ condition: 'a', next: 'one' },
|
||||
{ condition: 'b', next: 'two' },
|
||||
], {
|
||||
cwd: '/repo',
|
||||
movementName: 'review',
|
||||
});
|
||||
|
||||
expect(result).toEqual({ ruleIndex: 1, method: 'phase3_tag' });
|
||||
});
|
||||
|
||||
it('judgeStatus は最終手段として AI Judge を使う', async () => {
|
||||
vi.mocked(runAgent)
|
||||
.mockResolvedValueOnce(doneResponse('no match'))
|
||||
.mockResolvedValueOnce(doneResponse('ignored', { matched_index: 2 }));
|
||||
|
||||
const result = await judgeStatus('instruction', [
|
||||
{ condition: 'a', next: 'one' },
|
||||
{ condition: 'b', next: 'two' },
|
||||
], {
|
||||
cwd: '/repo',
|
||||
movementName: 'review',
|
||||
});
|
||||
|
||||
expect(result).toEqual({ ruleIndex: 1, method: 'ai_judge' });
|
||||
expect(runAgent).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
it('decomposeTask は構造化出力 parts を返す', async () => {
|
||||
vi.mocked(runAgent).mockResolvedValue(doneResponse('x', {
|
||||
parts: [
|
||||
{ id: 'p1', title: 'Part 1', instruction: 'Do 1', timeout_ms: 1000 },
|
||||
],
|
||||
}));
|
||||
|
||||
const result = await decomposeTask('instruction', 3, { cwd: '/repo', persona: 'team-leader' });
|
||||
|
||||
expect(result).toEqual([
|
||||
{ id: 'p1', title: 'Part 1', instruction: 'Do 1', timeoutMs: 1000 },
|
||||
]);
|
||||
expect(parseParts).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('decomposeTask は構造化出力がない場合 parseParts にフォールバックする', async () => {
|
||||
vi.mocked(runAgent).mockResolvedValue(doneResponse('```json [] ```'));
|
||||
vi.mocked(parseParts).mockReturnValue([
|
||||
{ id: 'p1', title: 'Part 1', instruction: 'fallback', timeoutMs: undefined },
|
||||
]);
|
||||
|
||||
const result = await decomposeTask('instruction', 2, { cwd: '/repo' });
|
||||
|
||||
expect(parseParts).toHaveBeenCalledWith('```json [] ```', 2);
|
||||
expect(result).toEqual([
|
||||
{ id: 'p1', title: 'Part 1', instruction: 'fallback', timeoutMs: undefined },
|
||||
]);
|
||||
});
|
||||
|
||||
it('decomposeTask は done 以外をエラーにする', async () => {
|
||||
vi.mocked(runAgent).mockResolvedValue({
|
||||
persona: 'team-leader',
|
||||
status: 'error',
|
||||
content: 'failure',
|
||||
error: 'bad output',
|
||||
timestamp: new Date('2026-02-12T00:00:00Z'),
|
||||
});
|
||||
|
||||
await expect(decomposeTask('instruction', 2, { cwd: '/repo' }))
|
||||
.rejects.toThrow('Team leader failed: bad output');
|
||||
});
|
||||
});
|
||||
@ -36,7 +36,7 @@ vi.mock('../shared/utils/index.js', async (importOriginal) => ({
|
||||
|
||||
import { PieceEngine } from '../core/piece/index.js';
|
||||
import { runAgent } from '../agents/runner.js';
|
||||
import { detectMatchedRule } from '../core/piece/index.js';
|
||||
import { detectMatchedRule } from '../core/piece/evaluation/index.js';
|
||||
import {
|
||||
makeResponse,
|
||||
makeMovement,
|
||||
|
||||
@ -35,7 +35,7 @@ vi.mock('../shared/utils/index.js', async (importOriginal) => ({
|
||||
|
||||
import { PieceEngine } from '../core/piece/index.js';
|
||||
import { runAgent } from '../agents/runner.js';
|
||||
import { detectMatchedRule } from '../core/piece/index.js';
|
||||
import { detectMatchedRule } from '../core/piece/evaluation/index.js';
|
||||
import {
|
||||
makeResponse,
|
||||
makeMovement,
|
||||
|
||||
@ -16,9 +16,9 @@ import { makeRule } from './test-helpers.js';
|
||||
// --- Mock imports (consumers must call vi.mock before importing this) ---
|
||||
|
||||
import { runAgent } from '../agents/runner.js';
|
||||
import { detectMatchedRule } from '../core/piece/index.js';
|
||||
import type { RuleMatch } from '../core/piece/index.js';
|
||||
import { needsStatusJudgmentPhase, runReportPhase, runStatusJudgmentPhase } from '../core/piece/index.js';
|
||||
import { detectMatchedRule } from '../core/piece/evaluation/index.js';
|
||||
import type { RuleMatch } from '../core/piece/evaluation/index.js';
|
||||
import { needsStatusJudgmentPhase, runReportPhase, runStatusJudgmentPhase } from '../core/piece/phase-runner.js';
|
||||
import { generateReportDir } from '../shared/utils/index.js';
|
||||
|
||||
// --- Factory functions ---
|
||||
|
||||
@ -35,7 +35,7 @@ vi.mock('../shared/utils/index.js', async (importOriginal) => ({
|
||||
// --- Imports (after mocks) ---
|
||||
|
||||
import { PieceEngine } from '../core/piece/index.js';
|
||||
import { runReportPhase } from '../core/piece/index.js';
|
||||
import { runReportPhase } from '../core/piece/phase-runner.js';
|
||||
import {
|
||||
makeResponse,
|
||||
makeMovement,
|
||||
|
||||
@ -34,7 +34,8 @@ vi.mock('../infra/config/project/projectConfig.js', () => ({
|
||||
|
||||
// --- Imports (after mocks) ---
|
||||
|
||||
import { detectMatchedRule, evaluateAggregateConditions } from '../core/piece/index.js';
|
||||
import { evaluateAggregateConditions } from '../core/piece/index.js';
|
||||
import { detectMatchedRule } from '../core/piece/evaluation/index.js';
|
||||
import { detectRuleIndex } from '../infra/claude/index.js';
|
||||
import type { RuleMatch, RuleEvaluatorContext } from '../core/piece/index.js';
|
||||
|
||||
|
||||
@ -1,183 +0,0 @@
|
||||
/**
|
||||
* Test for Fallback Strategies
|
||||
*/
|
||||
|
||||
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||
import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import { tmpdir } from 'node:os';
|
||||
import type { PieceMovement } from '../core/models/types.js';
|
||||
import type { JudgmentContext } from '../core/piece/judgment/FallbackStrategy.js';
|
||||
import { runAgent } from '../agents/runner.js';
|
||||
import {
|
||||
AutoSelectStrategy,
|
||||
ReportBasedStrategy,
|
||||
ResponseBasedStrategy,
|
||||
AgentConsultStrategy,
|
||||
JudgmentStrategyFactory,
|
||||
} from '../core/piece/judgment/FallbackStrategy.js';
|
||||
|
||||
// Mock runAgent
|
||||
vi.mock('../agents/runner.js', () => ({
|
||||
runAgent: vi.fn(),
|
||||
}));
|
||||
|
||||
describe('JudgmentStrategies', () => {
|
||||
const mockStep: PieceMovement = {
|
||||
name: 'test-movement',
|
||||
persona: 'test-agent',
|
||||
rules: [
|
||||
{ description: 'Rule 1', condition: 'approved' },
|
||||
{ description: 'Rule 2', condition: 'rejected' },
|
||||
],
|
||||
};
|
||||
|
||||
const mockContext: JudgmentContext = {
|
||||
step: mockStep,
|
||||
cwd: '/test/cwd',
|
||||
language: 'en',
|
||||
reportDir: '/test/reports',
|
||||
lastResponse: 'Last response content',
|
||||
sessionId: 'session-123',
|
||||
};
|
||||
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
describe('AutoSelectStrategy', () => {
|
||||
it('should apply when step has only one rule', () => {
|
||||
const singleRuleStep: PieceMovement = {
|
||||
name: 'single-rule',
|
||||
rules: [{ description: 'Only rule', condition: 'always' }],
|
||||
};
|
||||
const strategy = new AutoSelectStrategy();
|
||||
expect(strategy.canApply({ ...mockContext, step: singleRuleStep })).toBe(true);
|
||||
});
|
||||
|
||||
it('should not apply when step has multiple rules', () => {
|
||||
const strategy = new AutoSelectStrategy();
|
||||
expect(strategy.canApply(mockContext)).toBe(false);
|
||||
});
|
||||
|
||||
it('should return auto-selected tag', async () => {
|
||||
const singleRuleStep: PieceMovement = {
|
||||
name: 'single-rule',
|
||||
rules: [{ description: 'Only rule', condition: 'always' }],
|
||||
};
|
||||
const strategy = new AutoSelectStrategy();
|
||||
const result = await strategy.execute({ ...mockContext, step: singleRuleStep });
|
||||
expect(result.success).toBe(true);
|
||||
expect(result.tag).toBe('[SINGLE-RULE:1]');
|
||||
});
|
||||
});
|
||||
|
||||
describe('ReportBasedStrategy', () => {
|
||||
it('should apply when reportDir and output contracts are configured', () => {
|
||||
const strategy = new ReportBasedStrategy();
|
||||
const stepWithOutputContracts: PieceMovement = {
|
||||
...mockStep,
|
||||
outputContracts: [{ label: 'review', path: 'review-report.md' }],
|
||||
};
|
||||
expect(strategy.canApply({ ...mockContext, step: stepWithOutputContracts })).toBe(true);
|
||||
});
|
||||
|
||||
it('should not apply when reportDir is missing', () => {
|
||||
const strategy = new ReportBasedStrategy();
|
||||
expect(strategy.canApply({ ...mockContext, reportDir: undefined })).toBe(false);
|
||||
});
|
||||
|
||||
it('should not apply when step has no output contracts configured', () => {
|
||||
const strategy = new ReportBasedStrategy();
|
||||
// mockStep has no outputContracts field → getReportFiles returns []
|
||||
expect(strategy.canApply(mockContext)).toBe(false);
|
||||
});
|
||||
|
||||
it('should use only latest report file from reports directory', async () => {
|
||||
const tmpRoot = mkdtempSync(join(tmpdir(), 'takt-judgment-report-'));
|
||||
try {
|
||||
const reportDir = join(tmpRoot, '.takt', 'runs', 'sample-run', 'reports');
|
||||
const historyDir = join(tmpRoot, '.takt', 'runs', 'sample-run', 'logs', 'reports-history');
|
||||
mkdirSync(reportDir, { recursive: true });
|
||||
mkdirSync(historyDir, { recursive: true });
|
||||
|
||||
const latestFile = '05-architect-review.md';
|
||||
writeFileSync(join(reportDir, latestFile), 'LATEST-ONLY-CONTENT');
|
||||
writeFileSync(join(historyDir, '05-architect-review.20260210T061143Z.md'), 'OLD-HISTORY-CONTENT');
|
||||
|
||||
const stepWithOutputContracts: PieceMovement = {
|
||||
...mockStep,
|
||||
outputContracts: [{ name: latestFile }],
|
||||
};
|
||||
|
||||
const runAgentMock = vi.mocked(runAgent);
|
||||
runAgentMock.mockResolvedValue({
|
||||
persona: 'conductor',
|
||||
status: 'done',
|
||||
content: '[TEST-MOVEMENT:1]',
|
||||
timestamp: new Date('2026-02-10T07:11:43Z'),
|
||||
});
|
||||
|
||||
const strategy = new ReportBasedStrategy();
|
||||
const result = await strategy.execute({
|
||||
...mockContext,
|
||||
step: stepWithOutputContracts,
|
||||
reportDir,
|
||||
});
|
||||
|
||||
expect(result.success).toBe(true);
|
||||
expect(runAgentMock).toHaveBeenCalledTimes(1);
|
||||
const instruction = runAgentMock.mock.calls[0]?.[1];
|
||||
expect(instruction).toContain('LATEST-ONLY-CONTENT');
|
||||
expect(instruction).not.toContain('OLD-HISTORY-CONTENT');
|
||||
} finally {
|
||||
rmSync(tmpRoot, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('ResponseBasedStrategy', () => {
|
||||
it('should apply when lastResponse is provided', () => {
|
||||
const strategy = new ResponseBasedStrategy();
|
||||
expect(strategy.canApply(mockContext)).toBe(true);
|
||||
});
|
||||
|
||||
it('should not apply when lastResponse is missing', () => {
|
||||
const strategy = new ResponseBasedStrategy();
|
||||
expect(strategy.canApply({ ...mockContext, lastResponse: undefined })).toBe(false);
|
||||
});
|
||||
|
||||
it('should not apply when lastResponse is empty', () => {
|
||||
const strategy = new ResponseBasedStrategy();
|
||||
expect(strategy.canApply({ ...mockContext, lastResponse: '' })).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('AgentConsultStrategy', () => {
|
||||
it('should apply when sessionId is provided', () => {
|
||||
const strategy = new AgentConsultStrategy();
|
||||
expect(strategy.canApply(mockContext)).toBe(true);
|
||||
});
|
||||
|
||||
it('should not apply when sessionId is missing', () => {
|
||||
const strategy = new AgentConsultStrategy();
|
||||
expect(strategy.canApply({ ...mockContext, sessionId: undefined })).toBe(false);
|
||||
});
|
||||
|
||||
it('should not apply when sessionId is empty', () => {
|
||||
const strategy = new AgentConsultStrategy();
|
||||
expect(strategy.canApply({ ...mockContext, sessionId: '' })).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('JudgmentStrategyFactory', () => {
|
||||
it('should create strategies in correct order', () => {
|
||||
const strategies = JudgmentStrategyFactory.createStrategies();
|
||||
expect(strategies).toHaveLength(4);
|
||||
expect(strategies[0]).toBeInstanceOf(AutoSelectStrategy);
|
||||
expect(strategies[1]).toBeInstanceOf(ReportBasedStrategy);
|
||||
expect(strategies[2]).toBeInstanceOf(ResponseBasedStrategy);
|
||||
expect(strategies[3]).toBeInstanceOf(AgentConsultStrategy);
|
||||
});
|
||||
});
|
||||
});
|
||||
@ -1,194 +0,0 @@
|
||||
/**
|
||||
* Unit tests for FallbackStrategy judgment strategies
|
||||
*
|
||||
* Tests AutoSelectStrategy and canApply logic for all strategies.
|
||||
* Strategies requiring external agent calls (ReportBased, ResponseBased,
|
||||
* AgentConsult) are tested for canApply and input validation only.
|
||||
*/
|
||||
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import {
|
||||
AutoSelectStrategy,
|
||||
ReportBasedStrategy,
|
||||
ResponseBasedStrategy,
|
||||
AgentConsultStrategy,
|
||||
JudgmentStrategyFactory,
|
||||
type JudgmentContext,
|
||||
} from '../core/piece/judgment/FallbackStrategy.js';
|
||||
import { makeMovement } from './test-helpers.js';
|
||||
|
||||
function makeContext(overrides: Partial<JudgmentContext> = {}): JudgmentContext {
|
||||
return {
|
||||
step: makeMovement(),
|
||||
cwd: '/tmp/test',
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
describe('AutoSelectStrategy', () => {
|
||||
const strategy = new AutoSelectStrategy();
|
||||
|
||||
it('should have name "AutoSelect"', () => {
|
||||
expect(strategy.name).toBe('AutoSelect');
|
||||
});
|
||||
|
||||
describe('canApply', () => {
|
||||
it('should return true when movement has exactly one rule', () => {
|
||||
const ctx = makeContext({
|
||||
step: makeMovement({
|
||||
rules: [{ condition: 'done', next: 'COMPLETE' }],
|
||||
}),
|
||||
});
|
||||
expect(strategy.canApply(ctx)).toBe(true);
|
||||
});
|
||||
|
||||
it('should return false when movement has multiple rules', () => {
|
||||
const ctx = makeContext({
|
||||
step: makeMovement({
|
||||
rules: [
|
||||
{ condition: 'approved', next: 'implement' },
|
||||
{ condition: 'rejected', next: 'review' },
|
||||
],
|
||||
}),
|
||||
});
|
||||
expect(strategy.canApply(ctx)).toBe(false);
|
||||
});
|
||||
|
||||
it('should return false when movement has no rules', () => {
|
||||
const ctx = makeContext({
|
||||
step: makeMovement({ rules: undefined }),
|
||||
});
|
||||
expect(strategy.canApply(ctx)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('execute', () => {
|
||||
it('should return auto-selected tag for single-branch movement', async () => {
|
||||
const ctx = makeContext({
|
||||
step: makeMovement({
|
||||
name: 'review',
|
||||
rules: [{ condition: 'done', next: 'COMPLETE' }],
|
||||
}),
|
||||
});
|
||||
|
||||
const result = await strategy.execute(ctx);
|
||||
expect(result.success).toBe(true);
|
||||
expect(result.tag).toBe('[REVIEW:1]');
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('ReportBasedStrategy', () => {
|
||||
const strategy = new ReportBasedStrategy();
|
||||
|
||||
it('should have name "ReportBased"', () => {
|
||||
expect(strategy.name).toBe('ReportBased');
|
||||
});
|
||||
|
||||
describe('canApply', () => {
|
||||
it('should return true when reportDir and outputContracts are present', () => {
|
||||
const ctx = makeContext({
|
||||
reportDir: '/tmp/reports',
|
||||
step: makeMovement({
|
||||
outputContracts: [{ name: 'report.md' }],
|
||||
}),
|
||||
});
|
||||
expect(strategy.canApply(ctx)).toBe(true);
|
||||
});
|
||||
|
||||
it('should return false when reportDir is missing', () => {
|
||||
const ctx = makeContext({
|
||||
step: makeMovement({
|
||||
outputContracts: [{ name: 'report.md' }],
|
||||
}),
|
||||
});
|
||||
expect(strategy.canApply(ctx)).toBe(false);
|
||||
});
|
||||
|
||||
it('should return false when outputContracts is empty', () => {
|
||||
const ctx = makeContext({
|
||||
reportDir: '/tmp/reports',
|
||||
step: makeMovement({ outputContracts: [] }),
|
||||
});
|
||||
expect(strategy.canApply(ctx)).toBe(false);
|
||||
});
|
||||
|
||||
it('should return false when outputContracts is undefined', () => {
|
||||
const ctx = makeContext({
|
||||
reportDir: '/tmp/reports',
|
||||
step: makeMovement(),
|
||||
});
|
||||
expect(strategy.canApply(ctx)).toBe(false);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('ResponseBasedStrategy', () => {
|
||||
const strategy = new ResponseBasedStrategy();
|
||||
|
||||
it('should have name "ResponseBased"', () => {
|
||||
expect(strategy.name).toBe('ResponseBased');
|
||||
});
|
||||
|
||||
describe('canApply', () => {
|
||||
it('should return true when lastResponse is non-empty', () => {
|
||||
const ctx = makeContext({ lastResponse: 'some response' });
|
||||
expect(strategy.canApply(ctx)).toBe(true);
|
||||
});
|
||||
|
||||
it('should return false when lastResponse is undefined', () => {
|
||||
const ctx = makeContext({ lastResponse: undefined });
|
||||
expect(strategy.canApply(ctx)).toBe(false);
|
||||
});
|
||||
|
||||
it('should return false when lastResponse is empty string', () => {
|
||||
const ctx = makeContext({ lastResponse: '' });
|
||||
expect(strategy.canApply(ctx)).toBe(false);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('AgentConsultStrategy', () => {
|
||||
const strategy = new AgentConsultStrategy();
|
||||
|
||||
it('should have name "AgentConsult"', () => {
|
||||
expect(strategy.name).toBe('AgentConsult');
|
||||
});
|
||||
|
||||
describe('canApply', () => {
|
||||
it('should return true when sessionId is non-empty', () => {
|
||||
const ctx = makeContext({ sessionId: 'session-123' });
|
||||
expect(strategy.canApply(ctx)).toBe(true);
|
||||
});
|
||||
|
||||
it('should return false when sessionId is undefined', () => {
|
||||
const ctx = makeContext({ sessionId: undefined });
|
||||
expect(strategy.canApply(ctx)).toBe(false);
|
||||
});
|
||||
|
||||
it('should return false when sessionId is empty string', () => {
|
||||
const ctx = makeContext({ sessionId: '' });
|
||||
expect(strategy.canApply(ctx)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('execute', () => {
|
||||
it('should return failure when sessionId is not provided', async () => {
|
||||
const ctx = makeContext({ sessionId: undefined });
|
||||
const result = await strategy.execute(ctx);
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.reason).toBe('Session ID not provided');
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('JudgmentStrategyFactory', () => {
|
||||
it('should create strategies in correct priority order', () => {
|
||||
const strategies = JudgmentStrategyFactory.createStrategies();
|
||||
expect(strategies).toHaveLength(4);
|
||||
expect(strategies[0]!.name).toBe('AutoSelect');
|
||||
expect(strategies[1]!.name).toBe('ReportBased');
|
||||
expect(strategies[2]!.name).toBe('ResponseBased');
|
||||
expect(strategies[3]!.name).toBe('AgentConsult');
|
||||
});
|
||||
});
|
||||
@ -34,7 +34,7 @@ vi.mock('../shared/utils/index.js', async (importOriginal) => ({
|
||||
// --- Imports (after mocks) ---
|
||||
|
||||
import { PieceEngine } from '../core/piece/index.js';
|
||||
import { runReportPhase } from '../core/piece/index.js';
|
||||
import { runReportPhase } from '../core/piece/phase-runner.js';
|
||||
import {
|
||||
makeResponse,
|
||||
makeMovement,
|
||||
|
||||
76
src/__tests__/schema-loader.test.ts
Normal file
76
src/__tests__/schema-loader.test.ts
Normal file
@ -0,0 +1,76 @@
|
||||
import { beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
|
||||
const readFileSyncMock = vi.fn((path: string) => {
|
||||
if (path.endsWith('judgment.json')) {
|
||||
return JSON.stringify({ type: 'object', properties: { step: { type: 'integer' } } });
|
||||
}
|
||||
if (path.endsWith('evaluation.json')) {
|
||||
return JSON.stringify({ type: 'object', properties: { matched_index: { type: 'integer' } } });
|
||||
}
|
||||
if (path.endsWith('decomposition.json')) {
|
||||
return JSON.stringify({
|
||||
type: 'object',
|
||||
properties: {
|
||||
parts: {
|
||||
type: 'array',
|
||||
items: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
id: { type: 'string' },
|
||||
title: { type: 'string' },
|
||||
instruction: { type: 'string' },
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
}
|
||||
throw new Error(`Unexpected schema path: ${path}`);
|
||||
});
|
||||
|
||||
vi.mock('node:fs', () => ({
|
||||
readFileSync: readFileSyncMock,
|
||||
}));
|
||||
|
||||
vi.mock('../infra/resources/index.js', () => ({
|
||||
getResourcesDir: vi.fn(() => '/mock/resources'),
|
||||
}));
|
||||
|
||||
describe('schema-loader', () => {
|
||||
beforeEach(() => {
|
||||
vi.resetModules();
|
||||
readFileSyncMock.mockClear();
|
||||
});
|
||||
|
||||
it('同じスキーマを複数回ロードしても readFileSync は1回だけ', async () => {
|
||||
const { loadJudgmentSchema } = await import('../core/piece/schema-loader.js');
|
||||
|
||||
const first = loadJudgmentSchema();
|
||||
const second = loadJudgmentSchema();
|
||||
|
||||
expect(first).toEqual(second);
|
||||
expect(readFileSyncMock).toHaveBeenCalledTimes(1);
|
||||
expect(readFileSyncMock).toHaveBeenCalledWith('/mock/resources/schemas/judgment.json', 'utf-8');
|
||||
});
|
||||
|
||||
it('loadDecompositionSchema は maxItems を注入し、呼び出しごとに独立したオブジェクトを返す', async () => {
|
||||
const { loadDecompositionSchema } = await import('../core/piece/schema-loader.js');
|
||||
|
||||
const first = loadDecompositionSchema(2);
|
||||
const second = loadDecompositionSchema(5);
|
||||
|
||||
const firstParts = (first.properties as Record<string, unknown>).parts as Record<string, unknown>;
|
||||
const secondParts = (second.properties as Record<string, unknown>).parts as Record<string, unknown>;
|
||||
|
||||
expect(firstParts.maxItems).toBe(2);
|
||||
expect(secondParts.maxItems).toBe(5);
|
||||
expect(readFileSyncMock).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('loadDecompositionSchema は不正な maxParts を拒否する', async () => {
|
||||
const { loadDecompositionSchema } = await import('../core/piece/schema-loader.js');
|
||||
|
||||
expect(() => loadDecompositionSchema(0)).toThrow('maxParts must be a positive integer: 0');
|
||||
expect(() => loadDecompositionSchema(-1)).toThrow('maxParts must be a positive integer: -1');
|
||||
});
|
||||
});
|
||||
@ -6,39 +6,12 @@
|
||||
*/
|
||||
|
||||
import type { AiJudgeCaller, AiJudgeCondition } from '../core/piece/types.js';
|
||||
import { loadTemplate } from '../shared/prompts/index.js';
|
||||
import { createLogger } from '../shared/utils/index.js';
|
||||
import { runAgent } from './runner.js';
|
||||
import { evaluateCondition } from '../core/piece/agent-usecases.js';
|
||||
|
||||
const log = createLogger('ai-judge');
|
||||
|
||||
/**
|
||||
* Detect judge rule index from [JUDGE:N] tag pattern.
|
||||
* Returns 0-based rule index, or -1 if no match.
|
||||
*/
|
||||
export function detectJudgeIndex(content: string): number {
|
||||
const regex = /\[JUDGE:(\d+)\]/i;
|
||||
const match = content.match(regex);
|
||||
if (match?.[1]) {
|
||||
const index = Number.parseInt(match[1], 10) - 1;
|
||||
return index >= 0 ? index : -1;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the prompt for the AI judge that evaluates agent output against ai() conditions.
|
||||
*/
|
||||
export function buildJudgePrompt(
|
||||
agentOutput: string,
|
||||
aiConditions: AiJudgeCondition[],
|
||||
): string {
|
||||
const conditionList = aiConditions
|
||||
.map((c) => `| ${c.index + 1} | ${c.text} |`)
|
||||
.join('\n');
|
||||
|
||||
return loadTemplate('perform_judge_message', 'en', { agentOutput, conditionList });
|
||||
}
|
||||
export { detectJudgeIndex, buildJudgePrompt } from './judge-utils.js';
|
||||
|
||||
/**
|
||||
* Call AI judge to evaluate agent output against ai() conditions.
|
||||
@ -50,18 +23,9 @@ export const callAiJudge: AiJudgeCaller = async (
|
||||
conditions: AiJudgeCondition[],
|
||||
options: { cwd: string },
|
||||
): Promise<number> => {
|
||||
const prompt = buildJudgePrompt(agentOutput, conditions);
|
||||
|
||||
const response = await runAgent(undefined, prompt, {
|
||||
cwd: options.cwd,
|
||||
maxTurns: 1,
|
||||
permissionMode: 'readonly',
|
||||
});
|
||||
|
||||
if (response.status !== 'done') {
|
||||
log.error('AI judge call failed', { error: response.error });
|
||||
return -1;
|
||||
const result = await evaluateCondition(agentOutput, conditions, options);
|
||||
if (result < 0) {
|
||||
log.error('AI judge call failed to match a condition');
|
||||
}
|
||||
|
||||
return detectJudgeIndex(response.content);
|
||||
return result;
|
||||
};
|
||||
|
||||
@ -2,6 +2,5 @@
|
||||
* Agents module - exports agent execution utilities
|
||||
*/
|
||||
|
||||
export { AgentRunner, runAgent } from './runner.js';
|
||||
export { callAiJudge, detectJudgeIndex, buildJudgePrompt } from './ai-judge.js';
|
||||
export { AgentRunner } from './runner.js';
|
||||
export type { RunAgentOptions, StreamCallback } from './types.js';
|
||||
|
||||
22
src/agents/judge-utils.ts
Normal file
22
src/agents/judge-utils.ts
Normal file
@ -0,0 +1,22 @@
|
||||
import { loadTemplate } from '../shared/prompts/index.js';
|
||||
|
||||
export function detectJudgeIndex(content: string): number {
|
||||
const regex = /\[JUDGE:(\d+)\]/i;
|
||||
const match = content.match(regex);
|
||||
if (match?.[1]) {
|
||||
const index = Number.parseInt(match[1], 10) - 1;
|
||||
return index >= 0 ? index : -1;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
export function buildJudgePrompt(
|
||||
agentOutput: string,
|
||||
aiConditions: Array<{ index: number; text: string }>,
|
||||
): string {
|
||||
const conditionList = aiConditions
|
||||
.map((c) => `| ${c.index + 1} | ${c.text} |`)
|
||||
.join('\n');
|
||||
|
||||
return loadTemplate('perform_judge_message', 'en', { agentOutput, conditionList });
|
||||
}
|
||||
@ -111,6 +111,7 @@ export class AgentRunner {
|
||||
onPermissionRequest: options.onPermissionRequest,
|
||||
onAskUserQuestion: options.onAskUserQuestion,
|
||||
bypassPermissions: options.bypassPermissions,
|
||||
outputSchema: options.outputSchema,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@ -39,4 +39,6 @@ export interface RunAgentOptions {
|
||||
movementsList: ReadonlyArray<{ name: string; description?: string }>;
|
||||
currentPosition: string;
|
||||
};
|
||||
/** JSON Schema for structured output */
|
||||
outputSchema?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
@ -17,5 +17,6 @@ export interface AgentResponse {
|
||||
matchedRuleIndex?: number;
|
||||
/** How the rule match was detected */
|
||||
matchedRuleMethod?: RuleMatchMethod;
|
||||
/** Structured output returned by provider SDK (JSON Schema mode) */
|
||||
structuredOutput?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
|
||||
@ -21,6 +21,8 @@ export type Status =
|
||||
/** How a rule match was detected */
|
||||
export type RuleMatchMethod =
|
||||
| 'aggregate'
|
||||
| 'auto_select'
|
||||
| 'structured_output'
|
||||
| 'phase3_tag'
|
||||
| 'phase1_tag'
|
||||
| 'ai_judge'
|
||||
|
||||
235
src/core/piece/agent-usecases.ts
Normal file
235
src/core/piece/agent-usecases.ts
Normal file
@ -0,0 +1,235 @@
|
||||
import type { AgentResponse, PartDefinition, PieceRule, RuleMatchMethod, Language } from '../models/types.js';
|
||||
import { runAgent, type RunAgentOptions } from '../../agents/runner.js';
|
||||
import { detectJudgeIndex, buildJudgePrompt } from '../../agents/judge-utils.js';
|
||||
import { parseParts } from './engine/task-decomposer.js';
|
||||
import { loadJudgmentSchema, loadEvaluationSchema, loadDecompositionSchema } from './schema-loader.js';
|
||||
|
||||
export type UsecaseOptions = RunAgentOptions;
|
||||
|
||||
export interface JudgeStatusOptions {
|
||||
cwd: string;
|
||||
movementName: string;
|
||||
language?: Language;
|
||||
}
|
||||
|
||||
export interface JudgeStatusResult {
|
||||
ruleIndex: number;
|
||||
method: RuleMatchMethod;
|
||||
}
|
||||
|
||||
export interface EvaluateConditionOptions {
|
||||
cwd: string;
|
||||
}
|
||||
|
||||
export interface DecomposeTaskOptions {
|
||||
cwd: string;
|
||||
persona?: string;
|
||||
language?: Language;
|
||||
model?: string;
|
||||
provider?: 'claude' | 'codex' | 'opencode' | 'mock';
|
||||
}
|
||||
|
||||
function detectRuleIndex(content: string, movementName: string): number {
|
||||
const tag = movementName.toUpperCase();
|
||||
const regex = new RegExp(`\\[${tag}:(\\d+)\\]`, 'gi');
|
||||
const matches = [...content.matchAll(regex)];
|
||||
const match = matches.at(-1);
|
||||
if (match?.[1]) {
|
||||
const index = Number.parseInt(match[1], 10) - 1;
|
||||
return index >= 0 ? index : -1;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
function toPartDefinitions(raw: unknown, maxParts: number): PartDefinition[] {
|
||||
if (!Array.isArray(raw)) {
|
||||
throw new Error('Structured output "parts" must be an array');
|
||||
}
|
||||
if (raw.length === 0) {
|
||||
throw new Error('Structured output "parts" must not be empty');
|
||||
}
|
||||
if (raw.length > maxParts) {
|
||||
throw new Error(`Structured output produced too many parts: ${raw.length} > ${maxParts}`);
|
||||
}
|
||||
|
||||
const parts: PartDefinition[] = raw.map((entry, index) => {
|
||||
if (typeof entry !== 'object' || entry == null || Array.isArray(entry)) {
|
||||
throw new Error(`Part[${index}] must be an object`);
|
||||
}
|
||||
const row = entry as Record<string, unknown>;
|
||||
const id = row.id;
|
||||
const title = row.title;
|
||||
const instruction = row.instruction;
|
||||
const timeoutMs = row.timeout_ms;
|
||||
|
||||
if (typeof id !== 'string' || id.trim().length === 0) {
|
||||
throw new Error(`Part[${index}] "id" must be a non-empty string`);
|
||||
}
|
||||
if (typeof title !== 'string' || title.trim().length === 0) {
|
||||
throw new Error(`Part[${index}] "title" must be a non-empty string`);
|
||||
}
|
||||
if (typeof instruction !== 'string' || instruction.trim().length === 0) {
|
||||
throw new Error(`Part[${index}] "instruction" must be a non-empty string`);
|
||||
}
|
||||
if (
|
||||
timeoutMs != null
|
||||
&& (typeof timeoutMs !== 'number' || !Number.isInteger(timeoutMs) || timeoutMs <= 0)
|
||||
) {
|
||||
throw new Error(`Part[${index}] "timeout_ms" must be a positive integer`);
|
||||
}
|
||||
|
||||
return {
|
||||
id,
|
||||
title,
|
||||
instruction,
|
||||
timeoutMs: timeoutMs as number | undefined,
|
||||
};
|
||||
});
|
||||
|
||||
const seen = new Set<string>();
|
||||
for (const part of parts) {
|
||||
if (seen.has(part.id)) {
|
||||
throw new Error(`Duplicate part id: ${part.id}`);
|
||||
}
|
||||
seen.add(part.id);
|
||||
}
|
||||
|
||||
return parts;
|
||||
}
|
||||
|
||||
export async function executeAgent(
|
||||
persona: string | undefined,
|
||||
instruction: string,
|
||||
options: UsecaseOptions,
|
||||
): Promise<AgentResponse> {
|
||||
return runAgent(persona, instruction, options);
|
||||
}
|
||||
|
||||
export async function generateReport(
|
||||
persona: string | undefined,
|
||||
instruction: string,
|
||||
options: UsecaseOptions,
|
||||
): Promise<AgentResponse> {
|
||||
return runAgent(persona, instruction, options);
|
||||
}
|
||||
|
||||
export async function executePart(
|
||||
persona: string | undefined,
|
||||
instruction: string,
|
||||
options: UsecaseOptions,
|
||||
): Promise<AgentResponse> {
|
||||
return runAgent(persona, instruction, options);
|
||||
}
|
||||
|
||||
export async function evaluateCondition(
|
||||
agentOutput: string,
|
||||
conditions: Array<{ index: number; text: string }>,
|
||||
options: EvaluateConditionOptions,
|
||||
): Promise<number> {
|
||||
const prompt = buildJudgePrompt(agentOutput, conditions);
|
||||
const response = await runAgent(undefined, prompt, {
|
||||
cwd: options.cwd,
|
||||
maxTurns: 1,
|
||||
permissionMode: 'readonly',
|
||||
outputSchema: loadEvaluationSchema(),
|
||||
});
|
||||
|
||||
if (response.status !== 'done') {
|
||||
return -1;
|
||||
}
|
||||
|
||||
const matchedIndex = response.structuredOutput?.matched_index;
|
||||
if (typeof matchedIndex === 'number' && Number.isInteger(matchedIndex)) {
|
||||
const zeroBased = matchedIndex - 1;
|
||||
if (zeroBased >= 0 && zeroBased < conditions.length) {
|
||||
return zeroBased;
|
||||
}
|
||||
}
|
||||
|
||||
return detectJudgeIndex(response.content);
|
||||
}
|
||||
|
||||
export async function judgeStatus(
|
||||
instruction: string,
|
||||
rules: PieceRule[],
|
||||
options: JudgeStatusOptions,
|
||||
): Promise<JudgeStatusResult> {
|
||||
if (rules.length === 0) {
|
||||
throw new Error('judgeStatus requires at least one rule');
|
||||
}
|
||||
|
||||
if (rules.length === 1) {
|
||||
return {
|
||||
ruleIndex: 0,
|
||||
method: 'auto_select',
|
||||
};
|
||||
}
|
||||
|
||||
const response = await runAgent('conductor', instruction, {
|
||||
cwd: options.cwd,
|
||||
maxTurns: 3,
|
||||
permissionMode: 'readonly',
|
||||
language: options.language,
|
||||
outputSchema: loadJudgmentSchema(),
|
||||
});
|
||||
|
||||
if (response.status === 'done') {
|
||||
const stepNumber = response.structuredOutput?.step;
|
||||
if (typeof stepNumber === 'number' && Number.isInteger(stepNumber)) {
|
||||
const ruleIndex = stepNumber - 1;
|
||||
if (ruleIndex >= 0 && ruleIndex < rules.length) {
|
||||
return {
|
||||
ruleIndex,
|
||||
method: 'structured_output',
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
const tagRuleIndex = detectRuleIndex(response.content, options.movementName);
|
||||
if (tagRuleIndex >= 0 && tagRuleIndex < rules.length) {
|
||||
return {
|
||||
ruleIndex: tagRuleIndex,
|
||||
method: 'phase3_tag',
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
const conditions = rules.map((rule, index) => ({ index, text: rule.condition }));
|
||||
const fallbackIndex = await evaluateCondition(instruction, conditions, { cwd: options.cwd });
|
||||
if (fallbackIndex >= 0 && fallbackIndex < rules.length) {
|
||||
return {
|
||||
ruleIndex: fallbackIndex,
|
||||
method: 'ai_judge',
|
||||
};
|
||||
}
|
||||
|
||||
throw new Error(`Status not found for movement "${options.movementName}"`);
|
||||
}
|
||||
|
||||
export async function decomposeTask(
|
||||
instruction: string,
|
||||
maxParts: number,
|
||||
options: DecomposeTaskOptions,
|
||||
): Promise<PartDefinition[]> {
|
||||
const response = await runAgent(options.persona, instruction, {
|
||||
cwd: options.cwd,
|
||||
language: options.language,
|
||||
model: options.model,
|
||||
provider: options.provider,
|
||||
permissionMode: 'readonly',
|
||||
maxTurns: 3,
|
||||
outputSchema: loadDecompositionSchema(maxParts),
|
||||
});
|
||||
|
||||
if (response.status !== 'done') {
|
||||
const detail = response.error ?? response.content;
|
||||
throw new Error(`Team leader failed: ${detail}`);
|
||||
}
|
||||
|
||||
const parts = response.structuredOutput?.parts;
|
||||
if (parts != null) {
|
||||
return toPartDefinitions(parts, maxParts);
|
||||
}
|
||||
|
||||
return parseParts(response.content, maxParts);
|
||||
}
|
||||
@ -15,7 +15,8 @@ import type { ArpeggioMovementConfig, BatchResult, DataBatch } from '../arpeggio
|
||||
import { createDataSource } from '../arpeggio/data-source-factory.js';
|
||||
import { loadTemplate, expandTemplate } from '../arpeggio/template.js';
|
||||
import { buildMergeFn, writeMergedOutput } from '../arpeggio/merge.js';
|
||||
import { runAgent, type RunAgentOptions } from '../../../agents/runner.js';
|
||||
import type { RunAgentOptions } from '../../../agents/runner.js';
|
||||
import { executeAgent } from '../agent-usecases.js';
|
||||
import { detectMatchedRule } from '../evaluation/index.js';
|
||||
import { incrementMovementIteration } from './state-manager.js';
|
||||
import { createLogger } from '../../../shared/utils/index.js';
|
||||
@ -84,7 +85,7 @@ async function executeBatchWithRetry(
|
||||
|
||||
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
||||
try {
|
||||
const response = await runAgent(persona, prompt, agentOptions);
|
||||
const response = await executeAgent(persona, prompt, agentOptions);
|
||||
if (response.status === 'error') {
|
||||
lastError = response.error ?? response.content ?? 'Agent returned error status';
|
||||
log.info('Batch execution failed, retrying', {
|
||||
|
||||
@ -15,7 +15,7 @@ import type {
|
||||
Language,
|
||||
} from '../../models/types.js';
|
||||
import type { PhaseName } from '../types.js';
|
||||
import { runAgent } from '../../../agents/runner.js';
|
||||
import { executeAgent } from '../agent-usecases.js';
|
||||
import { InstructionBuilder, isOutputContractItem } from '../instruction/InstructionBuilder.js';
|
||||
import { needsStatusJudgmentPhase, runReportPhase, runStatusJudgmentPhase } from '../phase-runner.js';
|
||||
import { detectMatchedRule } from '../evaluation/index.js';
|
||||
@ -202,7 +202,7 @@ export class MovementExecutor {
|
||||
// Phase 1: main execution (Write excluded if movement has report)
|
||||
this.deps.onPhaseStart?.(step, 1, 'execute', instruction);
|
||||
const agentOptions = this.deps.optionsBuilder.buildAgentOptions(step);
|
||||
let response = await runAgent(step.persona, instruction, agentOptions);
|
||||
let response = await executeAgent(step.persona, instruction, agentOptions);
|
||||
updatePersonaSession(sessionKey, response.sessionId);
|
||||
this.deps.onPhaseComplete?.(step, 1, 'execute', response.content, response.status, response.error);
|
||||
|
||||
|
||||
@ -10,7 +10,7 @@ import type {
|
||||
PieceState,
|
||||
AgentResponse,
|
||||
} from '../../models/types.js';
|
||||
import { runAgent } from '../../../agents/runner.js';
|
||||
import { executeAgent } from '../agent-usecases.js';
|
||||
import { ParallelLogger } from './parallel-logger.js';
|
||||
import { needsStatusJudgmentPhase, runReportPhase, runStatusJudgmentPhase } from '../phase-runner.js';
|
||||
import { detectMatchedRule } from '../evaluation/index.js';
|
||||
@ -101,7 +101,7 @@ export class ParallelRunner {
|
||||
: baseOptions;
|
||||
|
||||
this.deps.onPhaseStart?.(subMovement, 1, 'execute', subInstruction);
|
||||
const subResponse = await runAgent(subMovement.persona, subInstruction, agentOptions);
|
||||
const subResponse = await executeAgent(subMovement.persona, subInstruction, agentOptions);
|
||||
updatePersonaSession(subSessionKey, subResponse.sessionId);
|
||||
this.deps.onPhaseComplete?.(subMovement, 1, 'execute', subResponse.content, subResponse.status, subResponse.error);
|
||||
|
||||
|
||||
@ -1,4 +1,3 @@
|
||||
import { runAgent } from '../../../agents/runner.js';
|
||||
import type {
|
||||
PieceMovement,
|
||||
PieceState,
|
||||
@ -6,11 +5,11 @@ import type {
|
||||
PartDefinition,
|
||||
PartResult,
|
||||
} from '../../models/types.js';
|
||||
import { decomposeTask, executePart } from '../agent-usecases.js';
|
||||
import { detectMatchedRule } from '../evaluation/index.js';
|
||||
import { buildSessionKey } from '../session-key.js';
|
||||
import { ParallelLogger } from './parallel-logger.js';
|
||||
import { incrementMovementIteration } from './state-manager.js';
|
||||
import { parseParts } from './task-decomposer.js';
|
||||
import { buildAbortSignal } from './abort-signal.js';
|
||||
import { createLogger, getErrorMessage } from '../../../shared/utils/index.js';
|
||||
import type { OptionsBuilder } from './OptionsBuilder.js';
|
||||
@ -99,26 +98,19 @@ export class TeamLeaderRunner {
|
||||
);
|
||||
|
||||
this.deps.onPhaseStart?.(leaderStep, 1, 'execute', instruction);
|
||||
const leaderResponse = await runAgent(
|
||||
leaderStep.persona,
|
||||
instruction,
|
||||
this.deps.optionsBuilder.buildAgentOptions(leaderStep),
|
||||
);
|
||||
updatePersonaSession(buildSessionKey(leaderStep), leaderResponse.sessionId);
|
||||
this.deps.onPhaseComplete?.(
|
||||
leaderStep,
|
||||
1,
|
||||
'execute',
|
||||
leaderResponse.content,
|
||||
leaderResponse.status,
|
||||
leaderResponse.error,
|
||||
);
|
||||
if (leaderResponse.status === 'error') {
|
||||
const detail = leaderResponse.error ?? leaderResponse.content;
|
||||
throw new Error(`Team leader failed: ${detail}`);
|
||||
}
|
||||
|
||||
const parts = parseParts(leaderResponse.content, teamLeaderConfig.maxParts);
|
||||
const parts = await decomposeTask(instruction, teamLeaderConfig.maxParts, {
|
||||
cwd: this.deps.getCwd(),
|
||||
persona: leaderStep.persona,
|
||||
model: leaderStep.model,
|
||||
provider: leaderStep.provider,
|
||||
});
|
||||
const leaderResponse: AgentResponse = {
|
||||
persona: leaderStep.persona ?? leaderStep.name,
|
||||
status: 'done',
|
||||
content: JSON.stringify({ parts }, null, 2),
|
||||
timestamp: new Date(),
|
||||
};
|
||||
this.deps.onPhaseComplete?.(leaderStep, 1, 'execute', leaderResponse.content, leaderResponse.status, leaderResponse.error);
|
||||
log.debug('Team leader decomposed parts', {
|
||||
movement: step.name,
|
||||
partCount: parts.length,
|
||||
@ -240,7 +232,7 @@ export class TeamLeaderRunner {
|
||||
: { ...baseOptions, abortSignal: signal };
|
||||
|
||||
try {
|
||||
const response = await runAgent(partMovement.persona, part.instruction, options);
|
||||
const response = await executePart(partMovement.persona, part.instruction, options);
|
||||
updatePersonaSession(buildSessionKey(partMovement), response.sessionId);
|
||||
return {
|
||||
part,
|
||||
|
||||
@ -60,8 +60,19 @@ export { buildEditRule, type InstructionContext } from './instruction/instructio
|
||||
export { generateStatusRulesComponents, type StatusRulesComponents } from './instruction/status-rules.js';
|
||||
|
||||
// Rule evaluation
|
||||
export { RuleEvaluator, type RuleMatch, type RuleEvaluatorContext, detectMatchedRule, evaluateAggregateConditions } from './evaluation/index.js';
|
||||
export { RuleEvaluator, type RuleMatch, type RuleEvaluatorContext, evaluateAggregateConditions } from './evaluation/index.js';
|
||||
export { AggregateEvaluator } from './evaluation/AggregateEvaluator.js';
|
||||
|
||||
// Phase runner
|
||||
export { needsStatusJudgmentPhase, runReportPhase, runStatusJudgmentPhase, type ReportPhaseBlockedResult } from './phase-runner.js';
|
||||
export { needsStatusJudgmentPhase, type ReportPhaseBlockedResult } from './phase-runner.js';
|
||||
|
||||
// Agent usecases
|
||||
export {
|
||||
executeAgent,
|
||||
generateReport,
|
||||
executePart,
|
||||
judgeStatus,
|
||||
evaluateCondition,
|
||||
decomposeTask,
|
||||
type JudgeStatusResult,
|
||||
} from './agent-usecases.js';
|
||||
|
||||
@ -27,6 +27,8 @@ export interface StatusJudgmentContext {
|
||||
lastResponse?: string;
|
||||
/** Input source type for fallback strategies */
|
||||
inputSource?: 'report' | 'response';
|
||||
/** Structured output mode omits tag-format instructions */
|
||||
useStructuredOutput?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -67,7 +69,9 @@ export class StatusJudgmentBuilder {
|
||||
return loadTemplate('perform_phase3_message', language, {
|
||||
reportContent: contentToJudge,
|
||||
criteriaTable: components.criteriaTable,
|
||||
outputList: components.outputList,
|
||||
outputList: this.context.useStructuredOutput
|
||||
? ''
|
||||
: components.outputList,
|
||||
hasAppendix: components.hasAppendix,
|
||||
appendixContent: components.appendixContent,
|
||||
});
|
||||
|
||||
@ -1,255 +0,0 @@
|
||||
/**
|
||||
* Fallback strategies for Phase 3 judgment.
|
||||
*
|
||||
* Implements Chain of Responsibility pattern to try multiple judgment methods
|
||||
* when conductor cannot determine the status from report alone.
|
||||
*/
|
||||
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { resolve } from 'node:path';
|
||||
import type { PieceMovement, Language } from '../../models/types.js';
|
||||
import { runAgent } from '../../../agents/runner.js';
|
||||
import { StatusJudgmentBuilder } from '../instruction/StatusJudgmentBuilder.js';
|
||||
import { JudgmentDetector, type JudgmentResult } from './JudgmentDetector.js';
|
||||
import { hasOnlyOneBranch, getAutoSelectedTag, getReportFiles } from '../evaluation/rule-utils.js';
|
||||
import { createLogger } from '../../../shared/utils/index.js';
|
||||
|
||||
const log = createLogger('fallback-strategy');
|
||||
|
||||
export interface JudgmentContext {
|
||||
step: PieceMovement;
|
||||
cwd: string;
|
||||
language?: Language;
|
||||
reportDir?: string;
|
||||
lastResponse?: string; // Phase 1の最終応答
|
||||
sessionId?: string;
|
||||
}
|
||||
|
||||
export interface JudgmentStrategy {
|
||||
readonly name: string;
|
||||
canApply(context: JudgmentContext): boolean;
|
||||
execute(context: JudgmentContext): Promise<JudgmentResult>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Base class for judgment strategies using Template Method Pattern.
|
||||
*/
|
||||
abstract class JudgmentStrategyBase implements JudgmentStrategy {
|
||||
abstract readonly name: string;
|
||||
|
||||
abstract canApply(context: JudgmentContext): boolean;
|
||||
|
||||
async execute(context: JudgmentContext): Promise<JudgmentResult> {
|
||||
try {
|
||||
// 1. 情報収集(サブクラスで実装)
|
||||
const input = await this.gatherInput(context);
|
||||
|
||||
// 2. 指示生成(サブクラスで実装)
|
||||
const instruction = this.buildInstruction(input, context);
|
||||
|
||||
// 3. conductor実行(共通)
|
||||
const response = await this.runConductor(instruction, context);
|
||||
|
||||
// 4. 結果検出(共通)
|
||||
return JudgmentDetector.detect(response);
|
||||
} catch (error) {
|
||||
const errorMsg = error instanceof Error ? error.message : String(error);
|
||||
log.debug(`Strategy ${this.name} threw error`, { error: errorMsg });
|
||||
return {
|
||||
success: false,
|
||||
reason: `Strategy failed with error: ${errorMsg}`,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
protected abstract gatherInput(context: JudgmentContext): Promise<string>;
|
||||
|
||||
protected abstract buildInstruction(input: string, context: JudgmentContext): string;
|
||||
|
||||
protected async runConductor(instruction: string, context: JudgmentContext): Promise<string> {
|
||||
const response = await runAgent('conductor', instruction, {
|
||||
cwd: context.cwd,
|
||||
maxTurns: 3,
|
||||
permissionMode: 'readonly',
|
||||
language: context.language,
|
||||
});
|
||||
|
||||
if (response.status !== 'done') {
|
||||
throw new Error(`Conductor failed: ${response.error || response.content || 'Unknown error'}`);
|
||||
}
|
||||
|
||||
return response.content;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Strategy 1: Auto-select when there's only one branch.
|
||||
* This strategy doesn't use conductor - just returns the single tag.
|
||||
*/
|
||||
export class AutoSelectStrategy implements JudgmentStrategy {
|
||||
readonly name = 'AutoSelect';
|
||||
|
||||
canApply(context: JudgmentContext): boolean {
|
||||
return hasOnlyOneBranch(context.step);
|
||||
}
|
||||
|
||||
async execute(context: JudgmentContext): Promise<JudgmentResult> {
|
||||
const tag = getAutoSelectedTag(context.step);
|
||||
log.debug('Auto-selected tag (single branch)', { tag });
|
||||
return {
|
||||
success: true,
|
||||
tag,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Strategy 2: Report-based judgment.
|
||||
* Read report files and ask conductor to judge.
|
||||
*/
|
||||
export class ReportBasedStrategy extends JudgmentStrategyBase {
|
||||
readonly name = 'ReportBased';
|
||||
|
||||
canApply(context: JudgmentContext): boolean {
|
||||
return context.reportDir !== undefined && getReportFiles(context.step.outputContracts).length > 0;
|
||||
}
|
||||
|
||||
protected async gatherInput(context: JudgmentContext): Promise<string> {
|
||||
if (!context.reportDir) {
|
||||
throw new Error('Report directory not provided');
|
||||
}
|
||||
|
||||
const reportFiles = getReportFiles(context.step.outputContracts);
|
||||
if (reportFiles.length === 0) {
|
||||
throw new Error('No report files configured');
|
||||
}
|
||||
|
||||
const reportContents: string[] = [];
|
||||
for (const fileName of reportFiles) {
|
||||
const filePath = resolve(context.reportDir, fileName);
|
||||
try {
|
||||
const content = readFileSync(filePath, 'utf-8');
|
||||
reportContents.push(`# ${fileName}\n\n${content}`);
|
||||
} catch (error) {
|
||||
const errorMsg = error instanceof Error ? error.message : String(error);
|
||||
throw new Error(`Failed to read report file ${fileName}: ${errorMsg}`);
|
||||
}
|
||||
}
|
||||
|
||||
return reportContents.join('\n\n---\n\n');
|
||||
}
|
||||
|
||||
protected buildInstruction(input: string, context: JudgmentContext): string {
|
||||
return new StatusJudgmentBuilder(context.step, {
|
||||
language: context.language,
|
||||
reportContent: input,
|
||||
inputSource: 'report',
|
||||
}).build();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Strategy 3: Response-based judgment.
|
||||
* Use the last response from Phase 1 to judge.
|
||||
*/
|
||||
export class ResponseBasedStrategy extends JudgmentStrategyBase {
|
||||
readonly name = 'ResponseBased';
|
||||
|
||||
canApply(context: JudgmentContext): boolean {
|
||||
return context.lastResponse !== undefined && context.lastResponse.length > 0;
|
||||
}
|
||||
|
||||
protected async gatherInput(context: JudgmentContext): Promise<string> {
|
||||
if (!context.lastResponse) {
|
||||
throw new Error('Last response not provided');
|
||||
}
|
||||
return context.lastResponse;
|
||||
}
|
||||
|
||||
protected buildInstruction(input: string, context: JudgmentContext): string {
|
||||
return new StatusJudgmentBuilder(context.step, {
|
||||
language: context.language,
|
||||
lastResponse: input,
|
||||
inputSource: 'response',
|
||||
}).build();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Strategy 4: Agent consult.
|
||||
* Resume the Phase 1 agent session and ask which tag is appropriate.
|
||||
*/
|
||||
export class AgentConsultStrategy implements JudgmentStrategy {
|
||||
readonly name = 'AgentConsult';
|
||||
|
||||
canApply(context: JudgmentContext): boolean {
|
||||
return context.sessionId !== undefined && context.sessionId.length > 0;
|
||||
}
|
||||
|
||||
async execute(context: JudgmentContext): Promise<JudgmentResult> {
|
||||
if (!context.sessionId) {
|
||||
return {
|
||||
success: false,
|
||||
reason: 'Session ID not provided',
|
||||
};
|
||||
}
|
||||
|
||||
try {
|
||||
const question = this.buildQuestion(context);
|
||||
|
||||
const response = await runAgent(context.step.persona ?? context.step.name, question, {
|
||||
cwd: context.cwd,
|
||||
sessionId: context.sessionId,
|
||||
maxTurns: 3,
|
||||
language: context.language,
|
||||
});
|
||||
|
||||
if (response.status !== 'done') {
|
||||
return {
|
||||
success: false,
|
||||
reason: `Agent consultation failed: ${response.error || 'Unknown error'}`,
|
||||
};
|
||||
}
|
||||
|
||||
return JudgmentDetector.detect(response.content);
|
||||
} catch (error) {
|
||||
const errorMsg = error instanceof Error ? error.message : String(error);
|
||||
log.debug('Agent consult strategy failed', { error: errorMsg });
|
||||
return {
|
||||
success: false,
|
||||
reason: `Agent consultation error: ${errorMsg}`,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private buildQuestion(context: JudgmentContext): string {
|
||||
const rules = context.step.rules || [];
|
||||
const ruleDescriptions = rules.map((rule, idx) => {
|
||||
const tag = `[${context.step.name.toUpperCase()}:${idx + 1}]`;
|
||||
const desc = rule.condition || `Rule ${idx + 1}`;
|
||||
return `- ${tag}: ${desc}`;
|
||||
}).join('\n');
|
||||
|
||||
const lang = context.language || 'en';
|
||||
|
||||
if (lang === 'ja') {
|
||||
return `あなたの作業結果に基づいて、以下の判定タグのうちどれが適切か教えてください:\n\n${ruleDescriptions}\n\n該当するタグを1つだけ出力してください(例: [${context.step.name.toUpperCase()}:1])。`;
|
||||
} else {
|
||||
return `Based on your work, which of the following judgment tags is appropriate?\n\n${ruleDescriptions}\n\nPlease output only one tag (e.g., [${context.step.name.toUpperCase()}:1]).`;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Factory for creating judgment strategies in order of priority.
|
||||
*/
|
||||
export class JudgmentStrategyFactory {
|
||||
static createStrategies(): JudgmentStrategy[] {
|
||||
return [
|
||||
new AutoSelectStrategy(),
|
||||
new ReportBasedStrategy(),
|
||||
new ResponseBasedStrategy(),
|
||||
new AgentConsultStrategy(),
|
||||
];
|
||||
}
|
||||
}
|
||||
@ -6,13 +6,3 @@ export {
|
||||
JudgmentDetector,
|
||||
type JudgmentResult,
|
||||
} from './JudgmentDetector.js';
|
||||
|
||||
export {
|
||||
AutoSelectStrategy,
|
||||
ReportBasedStrategy,
|
||||
ResponseBasedStrategy,
|
||||
AgentConsultStrategy,
|
||||
JudgmentStrategyFactory,
|
||||
type JudgmentContext,
|
||||
type JudgmentStrategy,
|
||||
} from './FallbackStrategy.js';
|
||||
|
||||
@ -9,12 +9,13 @@ import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
|
||||
import { dirname, parse, resolve, sep } from 'node:path';
|
||||
import type { PieceMovement, Language, AgentResponse } from '../models/types.js';
|
||||
import type { PhaseName } from './types.js';
|
||||
import { runAgent, type RunAgentOptions } from '../../agents/runner.js';
|
||||
import type { RunAgentOptions } from '../../agents/runner.js';
|
||||
import { ReportInstructionBuilder } from './instruction/ReportInstructionBuilder.js';
|
||||
import { hasTagBasedRules, getReportFiles } from './evaluation/rule-utils.js';
|
||||
import { JudgmentStrategyFactory, type JudgmentContext } from './judgment/index.js';
|
||||
import { generateReport } from './agent-usecases.js';
|
||||
import { createLogger } from '../../shared/utils/index.js';
|
||||
import { buildSessionKey } from './session-key.js';
|
||||
export { runStatusJudgmentPhase } from './status-judgment-phase.js';
|
||||
|
||||
const log = createLogger('phase-runner');
|
||||
|
||||
@ -213,7 +214,7 @@ async function runSingleReportAttempt(
|
||||
|
||||
let response: AgentResponse;
|
||||
try {
|
||||
response = await runAgent(step.persona, instruction, options);
|
||||
response = await generateReport(step.persona, instruction, options);
|
||||
} catch (error) {
|
||||
const errorMsg = error instanceof Error ? error.message : String(error);
|
||||
ctx.onPhaseComplete?.(step, 2, 'report', '', 'error', errorMsg);
|
||||
@ -241,55 +242,3 @@ async function runSingleReportAttempt(
|
||||
ctx.onPhaseComplete?.(step, 2, 'report', response.content, response.status);
|
||||
return { kind: 'success', content: trimmedContent, response };
|
||||
}
|
||||
|
||||
/**
|
||||
* Phase 3: Status judgment.
|
||||
* Uses the 'conductor' agent in a new session to output a status tag.
|
||||
* Implements multi-stage fallback logic to ensure judgment succeeds.
|
||||
* Returns the Phase 3 response content (containing the status tag).
|
||||
*/
|
||||
export async function runStatusJudgmentPhase(
|
||||
step: PieceMovement,
|
||||
ctx: PhaseRunnerContext,
|
||||
): Promise<string> {
|
||||
log.debug('Running status judgment phase', { movement: step.name });
|
||||
|
||||
const strategies = JudgmentStrategyFactory.createStrategies();
|
||||
const sessionKey = buildSessionKey(step);
|
||||
const judgmentContext: JudgmentContext = {
|
||||
step,
|
||||
cwd: ctx.cwd,
|
||||
language: ctx.language,
|
||||
reportDir: ctx.reportDir,
|
||||
lastResponse: ctx.lastResponse,
|
||||
sessionId: ctx.getSessionId(sessionKey),
|
||||
};
|
||||
|
||||
for (const strategy of strategies) {
|
||||
if (!strategy.canApply(judgmentContext)) {
|
||||
log.debug(`Strategy ${strategy.name} not applicable, skipping`);
|
||||
continue;
|
||||
}
|
||||
|
||||
log.debug(`Trying strategy: ${strategy.name}`);
|
||||
ctx.onPhaseStart?.(step, 3, 'judge', `Strategy: ${strategy.name}`);
|
||||
|
||||
try {
|
||||
const result = await strategy.execute(judgmentContext);
|
||||
if (result.success) {
|
||||
log.debug(`Strategy ${strategy.name} succeeded`, { tag: result.tag });
|
||||
ctx.onPhaseComplete?.(step, 3, 'judge', result.tag!, 'done');
|
||||
return result.tag!;
|
||||
}
|
||||
|
||||
log.debug(`Strategy ${strategy.name} failed`, { reason: result.reason });
|
||||
} catch (error) {
|
||||
const errorMsg = error instanceof Error ? error.message : String(error);
|
||||
log.debug(`Strategy ${strategy.name} threw error`, { error: errorMsg });
|
||||
}
|
||||
}
|
||||
|
||||
const errorMsg = 'All judgment strategies failed';
|
||||
ctx.onPhaseComplete?.(step, 3, 'judge', '', 'error', errorMsg);
|
||||
throw new Error(errorMsg);
|
||||
}
|
||||
|
||||
50
src/core/piece/schema-loader.ts
Normal file
50
src/core/piece/schema-loader.ts
Normal file
@ -0,0 +1,50 @@
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import { getResourcesDir } from '../../infra/resources/index.js';
|
||||
|
||||
type JsonSchema = Record<string, unknown>;
|
||||
|
||||
const schemaCache = new Map<string, JsonSchema>();
|
||||
|
||||
function loadSchema(name: string): JsonSchema {
|
||||
const cached = schemaCache.get(name);
|
||||
if (cached) {
|
||||
return cached;
|
||||
}
|
||||
const schemaPath = join(getResourcesDir(), 'schemas', name);
|
||||
const content = readFileSync(schemaPath, 'utf-8');
|
||||
const parsed = JSON.parse(content) as JsonSchema;
|
||||
schemaCache.set(name, parsed);
|
||||
return parsed;
|
||||
}
|
||||
|
||||
function cloneSchema(schema: JsonSchema): JsonSchema {
|
||||
return JSON.parse(JSON.stringify(schema)) as JsonSchema;
|
||||
}
|
||||
|
||||
export function loadJudgmentSchema(): JsonSchema {
|
||||
return loadSchema('judgment.json');
|
||||
}
|
||||
|
||||
export function loadEvaluationSchema(): JsonSchema {
|
||||
return loadSchema('evaluation.json');
|
||||
}
|
||||
|
||||
export function loadDecompositionSchema(maxParts: number): JsonSchema {
|
||||
if (!Number.isInteger(maxParts) || maxParts <= 0) {
|
||||
throw new Error(`maxParts must be a positive integer: ${maxParts}`);
|
||||
}
|
||||
|
||||
const schema = cloneSchema(loadSchema('decomposition.json'));
|
||||
const properties = schema.properties;
|
||||
if (!properties || typeof properties !== 'object' || Array.isArray(properties)) {
|
||||
throw new Error('decomposition schema is invalid: properties is missing');
|
||||
}
|
||||
const rawParts = (properties as Record<string, unknown>).parts;
|
||||
if (!rawParts || typeof rawParts !== 'object' || Array.isArray(rawParts)) {
|
||||
throw new Error('decomposition schema is invalid: parts is missing');
|
||||
}
|
||||
|
||||
(rawParts as Record<string, unknown>).maxItems = maxParts;
|
||||
return schema;
|
||||
}
|
||||
78
src/core/piece/status-judgment-phase.ts
Normal file
78
src/core/piece/status-judgment-phase.ts
Normal file
@ -0,0 +1,78 @@
|
||||
import { existsSync, readFileSync } from 'node:fs';
|
||||
import { resolve } from 'node:path';
|
||||
import type { PieceMovement } from '../models/types.js';
|
||||
import { judgeStatus } from './agent-usecases.js';
|
||||
import { StatusJudgmentBuilder } from './instruction/StatusJudgmentBuilder.js';
|
||||
import { getReportFiles } from './evaluation/rule-utils.js';
|
||||
import { createLogger } from '../../shared/utils/index.js';
|
||||
import type { PhaseRunnerContext } from './phase-runner.js';
|
||||
|
||||
const log = createLogger('phase-runner');
|
||||
|
||||
/**
|
||||
* Phase 3: Status judgment.
|
||||
* Uses the 'conductor' agent in a new session to output a status tag.
|
||||
* Implements multi-stage fallback logic to ensure judgment succeeds.
|
||||
* Returns the Phase 3 response content (containing the status tag).
|
||||
*/
|
||||
export async function runStatusJudgmentPhase(
|
||||
step: PieceMovement,
|
||||
ctx: PhaseRunnerContext,
|
||||
): Promise<string> {
|
||||
log.debug('Running status judgment phase', { movement: step.name });
|
||||
if (!step.rules || step.rules.length === 0) {
|
||||
throw new Error(`Status judgment requires rules for movement "${step.name}"`);
|
||||
}
|
||||
|
||||
const reportFiles = getReportFiles(step.outputContracts);
|
||||
let instruction: string | undefined;
|
||||
|
||||
if (reportFiles.length > 0) {
|
||||
const reports: string[] = [];
|
||||
for (const fileName of reportFiles) {
|
||||
const filePath = resolve(ctx.reportDir, fileName);
|
||||
if (!existsSync(filePath)) {
|
||||
continue;
|
||||
}
|
||||
const content = readFileSync(filePath, 'utf-8');
|
||||
reports.push(`# ${fileName}\n\n${content}`);
|
||||
}
|
||||
if (reports.length > 0) {
|
||||
instruction = new StatusJudgmentBuilder(step, {
|
||||
language: ctx.language,
|
||||
reportContent: reports.join('\n\n---\n\n'),
|
||||
inputSource: 'report',
|
||||
useStructuredOutput: true,
|
||||
}).build();
|
||||
}
|
||||
}
|
||||
|
||||
if (instruction == null) {
|
||||
if (!ctx.lastResponse) {
|
||||
throw new Error(`Status judgment requires report or lastResponse for movement "${step.name}"`);
|
||||
}
|
||||
|
||||
instruction = new StatusJudgmentBuilder(step, {
|
||||
language: ctx.language,
|
||||
lastResponse: ctx.lastResponse,
|
||||
inputSource: 'response',
|
||||
useStructuredOutput: true,
|
||||
}).build();
|
||||
}
|
||||
|
||||
ctx.onPhaseStart?.(step, 3, 'judge', instruction);
|
||||
try {
|
||||
const result = await judgeStatus(instruction, step.rules, {
|
||||
cwd: ctx.cwd,
|
||||
movementName: step.name,
|
||||
language: ctx.language,
|
||||
});
|
||||
const tag = `[${step.name.toUpperCase()}:${result.ruleIndex + 1}]`;
|
||||
ctx.onPhaseComplete?.(step, 3, 'judge', tag, 'done');
|
||||
return tag;
|
||||
} catch (error) {
|
||||
const errorMsg = error instanceof Error ? error.message : String(error);
|
||||
ctx.onPhaseComplete?.(step, 3, 'judge', '', 'error', errorMsg);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
@ -113,6 +113,16 @@ function assertTaskPrefixPair(
|
||||
}
|
||||
}
|
||||
|
||||
function toJudgmentMatchMethod(
|
||||
matchedRuleMethod: string | undefined,
|
||||
): string | undefined {
|
||||
if (!matchedRuleMethod) return undefined;
|
||||
if (matchedRuleMethod === 'structured_output') return 'structured_output';
|
||||
if (matchedRuleMethod === 'ai_judge' || matchedRuleMethod === 'ai_judge_fallback') return 'ai_judge';
|
||||
if (matchedRuleMethod === 'phase3_tag' || matchedRuleMethod === 'phase1_tag') return 'tag_fallback';
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function createOutputFns(prefixWriter: TaskPrefixWriter | undefined): OutputFns {
|
||||
if (!prefixWriter) {
|
||||
return {
|
||||
@ -587,6 +597,7 @@ export async function executePiece(
|
||||
}
|
||||
|
||||
// Write step_complete record to NDJSON log
|
||||
const matchMethod = toJudgmentMatchMethod(response.matchedRuleMethod);
|
||||
const record: NdjsonStepComplete = {
|
||||
type: 'step_complete',
|
||||
step: step.name,
|
||||
@ -596,6 +607,7 @@ export async function executePiece(
|
||||
instruction,
|
||||
...(response.matchedRuleIndex != null ? { matchedRuleIndex: response.matchedRuleIndex } : {}),
|
||||
...(response.matchedRuleMethod ? { matchedRuleMethod: response.matchedRuleMethod } : {}),
|
||||
...(matchMethod ? { matchMethod } : {}),
|
||||
...(response.error ? { error: response.error } : {}),
|
||||
timestamp: response.timestamp.toISOString(),
|
||||
};
|
||||
|
||||
19
src/index.ts
19
src/index.ts
@ -61,12 +61,6 @@ export {
|
||||
createCanUseToolCallback,
|
||||
createAskUserQuestionHooks,
|
||||
buildSdkOptions,
|
||||
callClaude,
|
||||
callClaudeCustom,
|
||||
callClaudeAgent,
|
||||
callClaudeSkill,
|
||||
detectRuleIndex,
|
||||
isRegexSafe,
|
||||
} from './infra/claude/index.js';
|
||||
export type {
|
||||
StreamEvent,
|
||||
@ -90,7 +84,8 @@ export type {
|
||||
} from './infra/claude/index.js';
|
||||
|
||||
// Codex integration
|
||||
export * from './infra/codex/index.js';
|
||||
export { CodexClient, mapToCodexSandboxMode } from './infra/codex/index.js';
|
||||
export type { CodexCallOptions, CodexSandboxMode } from './infra/codex/index.js';
|
||||
|
||||
// Agent execution
|
||||
export * from './agents/index.js';
|
||||
@ -115,12 +110,15 @@ export {
|
||||
StatusJudgmentBuilder,
|
||||
buildEditRule,
|
||||
RuleEvaluator,
|
||||
detectMatchedRule,
|
||||
evaluateAggregateConditions,
|
||||
AggregateEvaluator,
|
||||
needsStatusJudgmentPhase,
|
||||
runReportPhase,
|
||||
runStatusJudgmentPhase,
|
||||
executeAgent,
|
||||
generateReport,
|
||||
executePart,
|
||||
judgeStatus,
|
||||
evaluateCondition,
|
||||
decomposeTask,
|
||||
} from './core/piece/index.js';
|
||||
export type {
|
||||
PieceEvents,
|
||||
@ -133,6 +131,7 @@ export type {
|
||||
ProviderType,
|
||||
RuleMatch,
|
||||
RuleEvaluatorContext,
|
||||
JudgeStatusResult,
|
||||
ReportInstructionContext,
|
||||
StatusJudgmentContext,
|
||||
InstructionContext,
|
||||
|
||||
@ -52,6 +52,7 @@ export class ClaudeClient {
|
||||
onAskUserQuestion: options.onAskUserQuestion,
|
||||
bypassPermissions: options.bypassPermissions,
|
||||
anthropicApiKey: options.anthropicApiKey,
|
||||
outputSchema: options.outputSchema,
|
||||
};
|
||||
}
|
||||
|
||||
@ -76,6 +77,7 @@ export class ClaudeClient {
|
||||
timestamp: new Date(),
|
||||
sessionId: result.sessionId,
|
||||
error: result.error,
|
||||
structuredOutput: result.structuredOutput,
|
||||
};
|
||||
}
|
||||
|
||||
@ -104,6 +106,7 @@ export class ClaudeClient {
|
||||
timestamp: new Date(),
|
||||
sessionId: result.sessionId,
|
||||
error: result.error,
|
||||
structuredOutput: result.structuredOutput,
|
||||
};
|
||||
}
|
||||
|
||||
@ -153,6 +156,7 @@ export class ClaudeClient {
|
||||
timestamp: new Date(),
|
||||
sessionId: result.sessionId,
|
||||
error: result.error,
|
||||
structuredOutput: result.structuredOutput,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@ -94,6 +94,7 @@ export class QueryExecutor {
|
||||
let resultContent: string | undefined;
|
||||
let hasResultMessage = false;
|
||||
let accumulatedAssistantText = '';
|
||||
let structuredOutput: Record<string, unknown> | undefined;
|
||||
let onExternalAbort: (() => void) | undefined;
|
||||
|
||||
try {
|
||||
@ -139,6 +140,17 @@ export class QueryExecutor {
|
||||
const resultMsg = message as SDKResultMessage;
|
||||
if (resultMsg.subtype === 'success') {
|
||||
resultContent = resultMsg.result;
|
||||
const rawStructuredOutput = (resultMsg as unknown as {
|
||||
structured_output?: unknown;
|
||||
structuredOutput?: unknown;
|
||||
}).structured_output ?? (resultMsg as unknown as { structuredOutput?: unknown }).structuredOutput;
|
||||
if (
|
||||
rawStructuredOutput
|
||||
&& typeof rawStructuredOutput === 'object'
|
||||
&& !Array.isArray(rawStructuredOutput)
|
||||
) {
|
||||
structuredOutput = rawStructuredOutput as Record<string, unknown>;
|
||||
}
|
||||
success = true;
|
||||
} else {
|
||||
success = false;
|
||||
@ -169,6 +181,7 @@ export class QueryExecutor {
|
||||
content: finalContent.trim(),
|
||||
sessionId,
|
||||
fullContent: accumulatedAssistantText.trim(),
|
||||
structuredOutput,
|
||||
};
|
||||
} catch (error) {
|
||||
if (onExternalAbort && options.abortSignal) {
|
||||
|
||||
@ -65,6 +65,12 @@ export class SdkOptionsBuilder {
|
||||
if (this.options.agents) sdkOptions.agents = this.options.agents;
|
||||
if (this.options.mcpServers) sdkOptions.mcpServers = this.options.mcpServers;
|
||||
if (this.options.systemPrompt) sdkOptions.systemPrompt = this.options.systemPrompt;
|
||||
if (this.options.outputSchema) {
|
||||
(sdkOptions as Record<string, unknown>).outputFormat = {
|
||||
type: 'json_schema',
|
||||
schema: this.options.outputSchema,
|
||||
};
|
||||
}
|
||||
if (canUseTool) sdkOptions.canUseTool = canUseTool;
|
||||
if (hooks) sdkOptions.hooks = hooks;
|
||||
|
||||
|
||||
@ -109,6 +109,8 @@ export interface ClaudeResult {
|
||||
interrupted?: boolean;
|
||||
/** All assistant text accumulated during execution (for status detection) */
|
||||
fullContent?: string;
|
||||
/** Structured output returned by Claude SDK */
|
||||
structuredOutput?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
/** Extended result with query ID for concurrent execution */
|
||||
@ -141,6 +143,8 @@ export interface ClaudeCallOptions {
|
||||
bypassPermissions?: boolean;
|
||||
/** Anthropic API key to inject via env (bypasses CLI auth) */
|
||||
anthropicApiKey?: string;
|
||||
/** JSON Schema for structured output */
|
||||
outputSchema?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
/** Options for spawning a Claude SDK query (low-level, used by executor/process) */
|
||||
@ -168,6 +172,8 @@ export interface ClaudeSpawnOptions {
|
||||
bypassPermissions?: boolean;
|
||||
/** Anthropic API key to inject via env (bypasses CLI auth) */
|
||||
anthropicApiKey?: string;
|
||||
/** JSON Schema for structured output */
|
||||
outputSchema?: Record<string, unknown>;
|
||||
/** Callback for stderr output from the Claude Code process */
|
||||
onStderr?: (data: string) => void;
|
||||
}
|
||||
|
||||
@ -150,13 +150,18 @@ export class CodexClient {
|
||||
const diag = createStreamDiagnostics('codex-sdk', { agentType, model: options.model, attempt });
|
||||
diagRef = diag;
|
||||
|
||||
const { events } = await thread.runStreamed(fullPrompt, {
|
||||
const runOptions: Record<string, unknown> = {
|
||||
signal: streamAbortController.signal,
|
||||
});
|
||||
};
|
||||
if (options.outputSchema) {
|
||||
runOptions.outputSchema = options.outputSchema;
|
||||
}
|
||||
const { events } = await thread.runStreamed(fullPrompt, runOptions as never);
|
||||
resetIdleTimeout();
|
||||
diag.onConnected();
|
||||
|
||||
let content = '';
|
||||
let structuredOutput: Record<string, unknown> | undefined;
|
||||
const contentOffsets = new Map<string, number>();
|
||||
let success = true;
|
||||
let failureMessage = '';
|
||||
@ -189,6 +194,20 @@ export class CodexClient {
|
||||
break;
|
||||
}
|
||||
|
||||
if (event.type === 'turn.completed') {
|
||||
const rawFinalResponse = (event as unknown as {
|
||||
turn?: { finalResponse?: unknown };
|
||||
}).turn?.finalResponse;
|
||||
if (
|
||||
rawFinalResponse
|
||||
&& typeof rawFinalResponse === 'object'
|
||||
&& !Array.isArray(rawFinalResponse)
|
||||
) {
|
||||
structuredOutput = rawFinalResponse as Record<string, unknown>;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (event.type === 'item.started') {
|
||||
const item = event.item as CodexItem | undefined;
|
||||
if (item) {
|
||||
@ -278,6 +297,7 @@ export class CodexClient {
|
||||
content: trimmed,
|
||||
timestamp: new Date(),
|
||||
sessionId: currentThreadId,
|
||||
structuredOutput,
|
||||
};
|
||||
} catch (error) {
|
||||
const message = getErrorMessage(error);
|
||||
|
||||
@ -31,4 +31,6 @@ export interface CodexCallOptions {
|
||||
onStream?: StreamCallback;
|
||||
/** OpenAI API key (bypasses CLI auth) */
|
||||
openaiApiKey?: string;
|
||||
/** JSON Schema for structured output */
|
||||
outputSchema?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
@ -113,6 +113,7 @@ export class SessionManager {
|
||||
...(record.error ? { error: record.error } : {}),
|
||||
...(record.matchedRuleIndex != null ? { matchedRuleIndex: record.matchedRuleIndex } : {}),
|
||||
...(record.matchedRuleMethod ? { matchedRuleMethod: record.matchedRuleMethod } : {}),
|
||||
...(record.matchMethod ? { matchMethod: record.matchMethod } : {}),
|
||||
});
|
||||
sessionLog.iterations++;
|
||||
}
|
||||
|
||||
@ -65,6 +65,7 @@ export async function callMock(
|
||||
content,
|
||||
timestamp: new Date(),
|
||||
sessionId,
|
||||
structuredOutput: options.structuredOutput,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@ -13,6 +13,8 @@ export interface MockCallOptions {
|
||||
mockResponse?: string;
|
||||
/** Fixed status to return (optional, defaults to 'done') */
|
||||
mockStatus?: 'done' | 'blocked' | 'error' | 'approved' | 'rejected' | 'improve';
|
||||
/** Structured output payload returned as-is */
|
||||
structuredOutput?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
/** A single entry in a mock scenario */
|
||||
|
||||
@ -329,14 +329,22 @@ export class OpenCodeClient {
|
||||
diag.onConnected();
|
||||
|
||||
const tools = mapToOpenCodeTools(options.allowedTools);
|
||||
const promptPayload: Record<string, unknown> = {
|
||||
sessionID: sessionId,
|
||||
directory: options.cwd,
|
||||
model: parsedModel,
|
||||
...(tools ? { tools } : {}),
|
||||
parts: [{ type: 'text' as const, text: fullPrompt }],
|
||||
};
|
||||
if (options.outputSchema) {
|
||||
promptPayload.outputFormat = {
|
||||
type: 'json_schema',
|
||||
schema: options.outputSchema,
|
||||
};
|
||||
}
|
||||
|
||||
await client.session.promptAsync(
|
||||
{
|
||||
sessionID: sessionId,
|
||||
directory: options.cwd,
|
||||
model: parsedModel,
|
||||
...(tools ? { tools } : {}),
|
||||
parts: [{ type: 'text' as const, text: fullPrompt }],
|
||||
},
|
||||
promptPayload as never,
|
||||
{ signal: streamAbortController.signal },
|
||||
);
|
||||
|
||||
@ -571,6 +579,17 @@ export class OpenCodeClient {
|
||||
}
|
||||
|
||||
const trimmed = content.trim();
|
||||
let structuredOutput: Record<string, unknown> | undefined;
|
||||
if (options.outputSchema && trimmed.startsWith('{')) {
|
||||
try {
|
||||
const parsed = JSON.parse(trimmed) as unknown;
|
||||
if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
|
||||
structuredOutput = parsed as Record<string, unknown>;
|
||||
}
|
||||
} catch {
|
||||
// Non-JSON response falls back to text path.
|
||||
}
|
||||
}
|
||||
emitResult(options.onStream, true, trimmed, sessionId);
|
||||
|
||||
return {
|
||||
@ -579,6 +598,7 @@ export class OpenCodeClient {
|
||||
content: trimmed,
|
||||
timestamp: new Date(),
|
||||
sessionId,
|
||||
structuredOutput,
|
||||
};
|
||||
} catch (error) {
|
||||
const message = getErrorMessage(error);
|
||||
|
||||
@ -170,4 +170,6 @@ export interface OpenCodeCallOptions {
|
||||
onAskUserQuestion?: AskUserQuestionHandler;
|
||||
/** OpenCode API key */
|
||||
opencodeApiKey?: string;
|
||||
/** JSON Schema for structured output */
|
||||
outputSchema?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
@ -22,6 +22,7 @@ function toClaudeOptions(options: ProviderCallOptions): ClaudeCallOptions {
|
||||
onAskUserQuestion: options.onAskUserQuestion,
|
||||
bypassPermissions: options.bypassPermissions,
|
||||
anthropicApiKey: options.anthropicApiKey ?? resolveAnthropicApiKey(),
|
||||
outputSchema: options.outputSchema,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@ -33,6 +33,7 @@ function toCodexOptions(options: ProviderCallOptions): CodexCallOptions {
|
||||
permissionMode: options.permissionMode,
|
||||
onStream: options.onStream,
|
||||
openaiApiKey: options.openaiApiKey ?? resolveOpenaiApiKey(),
|
||||
outputSchema: options.outputSchema,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@ -22,6 +22,7 @@ function toOpenCodeOptions(options: ProviderCallOptions): OpenCodeCallOptions {
|
||||
onStream: options.onStream,
|
||||
onAskUserQuestion: options.onAskUserQuestion,
|
||||
opencodeApiKey: options.opencodeApiKey ?? resolveOpencodeApiKey(),
|
||||
outputSchema: options.outputSchema,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@ -40,6 +40,8 @@ export interface ProviderCallOptions {
|
||||
openaiApiKey?: string;
|
||||
/** OpenCode API key for OpenCode provider */
|
||||
opencodeApiKey?: string;
|
||||
/** JSON Schema for structured output */
|
||||
outputSchema?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
/** A configured agent ready to be called */
|
||||
|
||||
@ -26,6 +26,8 @@ export interface SessionLog {
|
||||
matchedRuleIndex?: number;
|
||||
/** How the rule match was detected */
|
||||
matchedRuleMethod?: string;
|
||||
/** Method used by status judgment phase */
|
||||
matchMethod?: string;
|
||||
}>;
|
||||
}
|
||||
|
||||
@ -56,6 +58,7 @@ export interface NdjsonStepComplete {
|
||||
instruction: string;
|
||||
matchedRuleIndex?: number;
|
||||
matchedRuleMethod?: string;
|
||||
matchMethod?: string;
|
||||
error?: string;
|
||||
timestamp: string;
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user