takt/e2e/specs/structured-output.e2e.ts
Takashi Morikubo 50935a1244
Remove worktree prompt from execute action (#414)
* fix: remove execute worktree prompt and deprecate create-worktree option

* test(e2e): align specs with removed --create-worktree

* fix: remove execute worktree leftovers and align docs/tests

---------

Co-authored-by: Takashi Morikubo <azurite0107@gmail.com>
2026-03-02 16:12:18 +09:00

96 lines
3.9 KiB
TypeScript

import { describe, it, expect, beforeEach, afterEach } from 'vitest';
import { resolve, dirname } from 'node:path';
import { fileURLToPath } from 'node:url';
import { createIsolatedEnv, type IsolatedEnv } from '../helpers/isolated-env';
import { createLocalRepo, type LocalRepo } from '../helpers/test-repo';
import { runTakt } from '../helpers/takt-runner';
import { readSessionRecords } from '../helpers/session-log';
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
/**
* E2E: Structured output for status judgment (Phase 3).
*
* Verifies that real providers (Claude, Codex, OpenCode) can execute a piece
* where the status judgment phase uses structured output (`outputSchema`)
* internally via `judgeStatus()`.
*
* The piece has 2 rules per step, so `judgeStatus` cannot auto-select
* and must actually call the provider with an outputSchema to determine
* which rule matched.
*
* If structured output works correctly, `judgeStatus` extracts the step
* number from `response.structuredOutput.step` (recorded as `structured_output`).
* If the agent happens to output `[STEP:N]` tags, the RuleEvaluator detects
* them as `phase3_tag`/`phase1_tag` (recorded as `tag_fallback` in session log).
* The session log matchMethod is transformed by `toJudgmentMatchMethod()`.
*
* Run with:
* TAKT_E2E_PROVIDER=claude vitest run --config vitest.config.e2e.structured-output.ts
* TAKT_E2E_PROVIDER=codex vitest run --config vitest.config.e2e.structured-output.ts
* TAKT_E2E_PROVIDER=opencode TAKT_E2E_MODEL=openai/gpt-4 vitest run --config vitest.config.e2e.structured-output.ts
*/
describe('E2E: Structured output rule matching', () => {
let isolatedEnv: IsolatedEnv;
let repo: LocalRepo;
beforeEach(() => {
isolatedEnv = createIsolatedEnv();
repo = createLocalRepo();
});
afterEach(() => {
try { repo.cleanup(); } catch { /* best-effort */ }
try { isolatedEnv.cleanup(); } catch { /* best-effort */ }
});
it('should complete piece via Phase 3 status judgment with 2-rule step', () => {
const piecePath = resolve(__dirname, '../fixtures/pieces/structured-output.yaml');
const result = runTakt({
args: [
'--task', 'Say hello',
'--piece', piecePath,
],
cwd: repo.path,
env: isolatedEnv.env,
timeout: 240_000,
});
if (result.exitCode !== 0) {
console.log('=== STDOUT ===\n', result.stdout);
console.log('=== STDERR ===\n', result.stderr);
}
// Always log the matchMethod for diagnostic purposes
const allRecords = readSessionRecords(repo.path);
const sc = allRecords.find((r) => r.type === 'step_complete');
console.log(`=== matchMethod: ${sc?.matchMethod ?? '(none)'} ===`);
expect(result.exitCode).toBe(0);
expect(result.stdout).toContain('Piece completed');
// Verify session log has proper step_complete with matchMethod
const records = readSessionRecords(repo.path);
const pieceComplete = records.find((r) => r.type === 'piece_complete');
expect(pieceComplete).toBeDefined();
const stepComplete = records.find((r) => r.type === 'step_complete');
expect(stepComplete).toBeDefined();
// matchMethod should be present — the 2-rule step required actual judgment
// (auto_select is only used for single-rule steps)
const matchMethod = stepComplete?.matchMethod as string | undefined;
expect(matchMethod).toBeDefined();
// Session log records transformed matchMethod via toJudgmentMatchMethod():
// structured_output → structured_output (judgeStatus extracted from structuredOutput.step)
// phase3_tag / phase1_tag → tag_fallback (agent output [STEP:N] tag, detected by RuleEvaluator)
// ai_judge / ai_judge_fallback → ai_judge (AI evaluated conditions as fallback)
const validMethods = ['structured_output', 'tag_fallback', 'ai_judge'];
expect(validMethods).toContain(matchMethod);
}, 240_000);
});