takt: fix-phase3-fallback-bypass (#474)
This commit is contained in:
parent
a8223d231d
commit
bc5e1fd860
@ -37,7 +37,7 @@ vi.mock('../shared/utils/index.js', async (importOriginal) => ({
|
|||||||
import { PieceEngine } from '../core/piece/index.js';
|
import { PieceEngine } from '../core/piece/index.js';
|
||||||
import { runAgent } from '../agents/runner.js';
|
import { runAgent } from '../agents/runner.js';
|
||||||
import { detectMatchedRule } from '../core/piece/evaluation/index.js';
|
import { detectMatchedRule } from '../core/piece/evaluation/index.js';
|
||||||
import { runReportPhase } from '../core/piece/phase-runner.js';
|
import { needsStatusJudgmentPhase, runReportPhase, runStatusJudgmentPhase } from '../core/piece/phase-runner.js';
|
||||||
import {
|
import {
|
||||||
makeResponse,
|
makeResponse,
|
||||||
makeMovement,
|
makeMovement,
|
||||||
@ -113,6 +113,45 @@ describe('PieceEngine Integration: Error Handling', () => {
|
|||||||
|
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// =====================================================
|
||||||
|
// 2.5 Phase 3 fallback
|
||||||
|
// =====================================================
|
||||||
|
describe('Phase 3 fallback', () => {
|
||||||
|
it('should continue with phase1 rule evaluation when status judgment throws', async () => {
|
||||||
|
const config = buildDefaultPieceConfig({
|
||||||
|
initialMovement: 'plan',
|
||||||
|
movements: [
|
||||||
|
makeMovement('plan', {
|
||||||
|
rules: [makeRule('continue', 'COMPLETE')],
|
||||||
|
}),
|
||||||
|
],
|
||||||
|
});
|
||||||
|
const engine = new PieceEngine(config, tmpDir, 'test task', { projectCwd: tmpDir });
|
||||||
|
|
||||||
|
vi.mocked(needsStatusJudgmentPhase).mockReturnValue(true);
|
||||||
|
vi.mocked(runStatusJudgmentPhase).mockRejectedValueOnce(new Error('Phase 3 failed'));
|
||||||
|
|
||||||
|
mockRunAgentSequence([
|
||||||
|
makeResponse({ persona: 'plan', content: '[STEP:1] continue' }),
|
||||||
|
]);
|
||||||
|
mockDetectMatchedRuleSequence([
|
||||||
|
{ index: 0, method: 'phase1_tag' },
|
||||||
|
]);
|
||||||
|
|
||||||
|
const state = await engine.run();
|
||||||
|
|
||||||
|
expect(state.status).toBe('completed');
|
||||||
|
expect(runStatusJudgmentPhase).toHaveBeenCalledOnce();
|
||||||
|
expect(detectMatchedRule).toHaveBeenCalledWith(
|
||||||
|
expect.objectContaining({ name: 'plan' }),
|
||||||
|
'[STEP:1] continue',
|
||||||
|
'',
|
||||||
|
expect.any(Object),
|
||||||
|
);
|
||||||
|
expect(state.movementOutputs.get('plan')?.matchedRuleMethod).toBe('phase1_tag');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
// =====================================================
|
// =====================================================
|
||||||
// 3. Interrupted status routing
|
// 3. Interrupted status routing
|
||||||
// =====================================================
|
// =====================================================
|
||||||
|
|||||||
@ -36,6 +36,7 @@ vi.mock('../shared/utils/index.js', async (importOriginal) => ({
|
|||||||
import { PieceEngine } from '../core/piece/index.js';
|
import { PieceEngine } from '../core/piece/index.js';
|
||||||
import { runAgent } from '../agents/runner.js';
|
import { runAgent } from '../agents/runner.js';
|
||||||
import { detectMatchedRule } from '../core/piece/evaluation/index.js';
|
import { detectMatchedRule } from '../core/piece/evaluation/index.js';
|
||||||
|
import { needsStatusJudgmentPhase, runStatusJudgmentPhase } from '../core/piece/phase-runner.js';
|
||||||
import {
|
import {
|
||||||
makeResponse,
|
makeResponse,
|
||||||
makeMovement,
|
makeMovement,
|
||||||
@ -215,4 +216,59 @@ describe('PieceEngine Integration: Parallel Movement Partial Failure', () => {
|
|||||||
expect(archReviewOutput!.error).toBe('Session resume failed');
|
expect(archReviewOutput!.error).toBe('Session resume failed');
|
||||||
expect(archReviewOutput!.content).toBe('');
|
expect(archReviewOutput!.content).toBe('');
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('should fallback to phase1 rule evaluation when sub-movement phase3 throws', async () => {
|
||||||
|
const config = buildParallelOnlyConfig();
|
||||||
|
const engine = new PieceEngine(config, tmpDir, 'test task', { projectCwd: tmpDir });
|
||||||
|
|
||||||
|
vi.mocked(needsStatusJudgmentPhase).mockImplementation((movement) => {
|
||||||
|
return movement.name === 'arch-review' || movement.name === 'security-review';
|
||||||
|
});
|
||||||
|
vi.mocked(runStatusJudgmentPhase).mockImplementation(async (movement) => {
|
||||||
|
if (movement.name === 'arch-review') {
|
||||||
|
throw new Error('Phase 3 failed for arch-review');
|
||||||
|
}
|
||||||
|
return { tag: '', ruleIndex: 0, method: 'auto_select' };
|
||||||
|
});
|
||||||
|
|
||||||
|
const mock = vi.mocked(runAgent);
|
||||||
|
mock.mockImplementationOnce(async (persona, task, options) => {
|
||||||
|
options?.onPromptResolved?.({
|
||||||
|
systemPrompt: typeof persona === 'string' ? persona : '',
|
||||||
|
userInstruction: task,
|
||||||
|
});
|
||||||
|
return makeResponse({ persona: 'arch-review', content: '[STEP:1] done' });
|
||||||
|
});
|
||||||
|
mock.mockImplementationOnce(async (persona, task, options) => {
|
||||||
|
options?.onPromptResolved?.({
|
||||||
|
systemPrompt: typeof persona === 'string' ? persona : '',
|
||||||
|
userInstruction: task,
|
||||||
|
});
|
||||||
|
return makeResponse({ persona: 'security-review', content: '[STEP:1] done' });
|
||||||
|
});
|
||||||
|
mock.mockImplementationOnce(async (persona, task, options) => {
|
||||||
|
options?.onPromptResolved?.({
|
||||||
|
systemPrompt: typeof persona === 'string' ? persona : '',
|
||||||
|
userInstruction: task,
|
||||||
|
});
|
||||||
|
return makeResponse({ persona: 'done', content: 'completed' });
|
||||||
|
});
|
||||||
|
|
||||||
|
mockDetectMatchedRuleSequence([
|
||||||
|
{ index: 0, method: 'phase1_tag' }, // arch-review fallback
|
||||||
|
{ index: 0, method: 'aggregate' }, // reviewers aggregate
|
||||||
|
{ index: 0, method: 'phase1_tag' }, // done -> COMPLETE
|
||||||
|
]);
|
||||||
|
|
||||||
|
const state = await engine.run();
|
||||||
|
|
||||||
|
expect(state.status).toBe('completed');
|
||||||
|
expect(state.movementOutputs.get('arch-review')?.status).toBe('done');
|
||||||
|
expect(state.movementOutputs.get('arch-review')?.matchedRuleMethod).toBe('phase1_tag');
|
||||||
|
expect(
|
||||||
|
vi.mocked(detectMatchedRule).mock.calls.some(([movement, content, tagContent]) => {
|
||||||
|
return movement.name === 'arch-review' && content === '[STEP:1] done' && tagContent === '';
|
||||||
|
}),
|
||||||
|
).toBe(true);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@ -19,9 +19,10 @@ import { executeAgent } from '../../../agents/agent-usecases.js';
|
|||||||
import { InstructionBuilder } from '../instruction/InstructionBuilder.js';
|
import { InstructionBuilder } from '../instruction/InstructionBuilder.js';
|
||||||
import { needsStatusJudgmentPhase, runReportPhase, runStatusJudgmentPhase } from '../phase-runner.js';
|
import { needsStatusJudgmentPhase, runReportPhase, runStatusJudgmentPhase } from '../phase-runner.js';
|
||||||
import { detectMatchedRule } from '../evaluation/index.js';
|
import { detectMatchedRule } from '../evaluation/index.js';
|
||||||
|
import type { StatusJudgmentPhaseResult } from '../phase-runner.js';
|
||||||
import { buildSessionKey } from '../session-key.js';
|
import { buildSessionKey } from '../session-key.js';
|
||||||
import { incrementMovementIteration, getPreviousOutput } from './state-manager.js';
|
import { incrementMovementIteration, getPreviousOutput } from './state-manager.js';
|
||||||
import { createLogger } from '../../../shared/utils/index.js';
|
import { createLogger, getErrorMessage } from '../../../shared/utils/index.js';
|
||||||
import type { OptionsBuilder } from './OptionsBuilder.js';
|
import type { OptionsBuilder } from './OptionsBuilder.js';
|
||||||
import type { RunPaths } from '../run/run-paths.js';
|
import type { RunPaths } from '../run/run-paths.js';
|
||||||
|
|
||||||
@ -237,9 +238,17 @@ export class MovementExecutor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Phase 3: status judgment (new session, no tools, determines matched rule)
|
// Phase 3: status judgment (new session, no tools, determines matched rule)
|
||||||
const phase3Result = needsStatusJudgmentPhase(step)
|
let phase3Result: StatusJudgmentPhaseResult | undefined;
|
||||||
|
try {
|
||||||
|
phase3Result = needsStatusJudgmentPhase(step)
|
||||||
? await runStatusJudgmentPhase(step, phaseCtx)
|
? await runStatusJudgmentPhase(step, phaseCtx)
|
||||||
: undefined;
|
: undefined;
|
||||||
|
} catch (error) {
|
||||||
|
log.info('Phase 3 status judgment failed, falling back to phase1 rule evaluation', {
|
||||||
|
movement: step.name,
|
||||||
|
error: getErrorMessage(error),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
if (phase3Result) {
|
if (phase3Result) {
|
||||||
log.debug('Rule matched (Phase 3)', {
|
log.debug('Rule matched (Phase 3)', {
|
||||||
|
|||||||
@ -14,6 +14,7 @@ import { executeAgent } from '../../../agents/agent-usecases.js';
|
|||||||
import { ParallelLogger } from './parallel-logger.js';
|
import { ParallelLogger } from './parallel-logger.js';
|
||||||
import { needsStatusJudgmentPhase, runReportPhase, runStatusJudgmentPhase } from '../phase-runner.js';
|
import { needsStatusJudgmentPhase, runReportPhase, runStatusJudgmentPhase } from '../phase-runner.js';
|
||||||
import { detectMatchedRule } from '../evaluation/index.js';
|
import { detectMatchedRule } from '../evaluation/index.js';
|
||||||
|
import type { StatusJudgmentPhaseResult } from '../phase-runner.js';
|
||||||
import { incrementMovementIteration } from './state-manager.js';
|
import { incrementMovementIteration } from './state-manager.js';
|
||||||
import { createLogger, getErrorMessage } from '../../../shared/utils/index.js';
|
import { createLogger, getErrorMessage } from '../../../shared/utils/index.js';
|
||||||
import { buildSessionKey } from '../session-key.js';
|
import { buildSessionKey } from '../session-key.js';
|
||||||
@ -154,9 +155,17 @@ export class ParallelRunner {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Phase 3: status judgment for sub-movement
|
// Phase 3: status judgment for sub-movement
|
||||||
const subPhase3 = needsStatusJudgmentPhase(subMovement)
|
let subPhase3: StatusJudgmentPhaseResult | undefined;
|
||||||
|
try {
|
||||||
|
subPhase3 = needsStatusJudgmentPhase(subMovement)
|
||||||
? await runStatusJudgmentPhase(subMovement, phaseCtx)
|
? await runStatusJudgmentPhase(subMovement, phaseCtx)
|
||||||
: undefined;
|
: undefined;
|
||||||
|
} catch (error) {
|
||||||
|
log.info('Phase 3 status judgment failed for sub-movement, falling back to phase1 rule evaluation', {
|
||||||
|
movement: subMovement.name,
|
||||||
|
error: getErrorMessage(error),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
let finalResponse: AgentResponse;
|
let finalResponse: AgentResponse;
|
||||||
if (subPhase3) {
|
if (subPhase3) {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user