takt/src/__tests__/ai-judge.test.ts
nrslib b25e9a78ab fix: callAiJudgeをプロバイダーシステム経由に変更(Codex対応)
callAiJudgeがinfra/claude/にハードコードされており、Codexプロバイダー使用時に
judge評価が動作しなかった。agents/ai-judge.tsに移動し、runAgent経由で
プロバイダーを正しく解決するように修正。
2026-02-10 19:32:42 +09:00

54 lines
1.7 KiB
TypeScript

/**
* Tests for AI judge (ai() condition evaluation)
*/
import { describe, it, expect } from 'vitest';
import { detectJudgeIndex, buildJudgePrompt } from '../agents/ai-judge.js';
describe('detectJudgeIndex', () => {
it('should detect [JUDGE:1] as index 0', () => {
expect(detectJudgeIndex('[JUDGE:1]')).toBe(0);
});
it('should detect [JUDGE:3] as index 2', () => {
expect(detectJudgeIndex('Some output [JUDGE:3] more text')).toBe(2);
});
it('should return -1 for no match', () => {
expect(detectJudgeIndex('No judge tag here')).toBe(-1);
});
it('should return -1 for [JUDGE:0]', () => {
expect(detectJudgeIndex('[JUDGE:0]')).toBe(-1);
});
it('should be case-insensitive', () => {
expect(detectJudgeIndex('[judge:2]')).toBe(1);
});
});
describe('buildJudgePrompt', () => {
it('should build a well-structured judge prompt', () => {
const agentOutput = 'Code implementation complete.\n\nAll tests pass.';
const conditions = [
{ index: 0, text: 'No issues found' },
{ index: 1, text: 'Issues detected that need fixing' },
];
const prompt = buildJudgePrompt(agentOutput, conditions);
expect(prompt).toContain('# Judge Task');
expect(prompt).toContain('Code implementation complete.');
expect(prompt).toContain('All tests pass.');
expect(prompt).toContain('| 1 | No issues found |');
expect(prompt).toContain('| 2 | Issues detected that need fixing |');
expect(prompt).toContain('[JUDGE:N]');
});
it('should handle single condition', () => {
const prompt = buildJudgePrompt('output', [{ index: 0, text: 'Always true' }]);
expect(prompt).toContain('| 1 | Always true |');
});
});