diff --git a/src/__tests__/it-error-recovery.test.ts b/src/__tests__/it-error-recovery.test.ts new file mode 100644 index 0000000..77a3c01 --- /dev/null +++ b/src/__tests__/it-error-recovery.test.ts @@ -0,0 +1,365 @@ +/** + * Error recovery integration tests. + * + * Tests agent error, blocked responses, max iteration limits, + * loop detection, scenario queue exhaustion, and step execution exceptions. + * + * Mocked: UI, session, phase-runner, notifications, config, callAiJudge + * Not mocked: WorkflowEngine, runAgent, detectMatchedRule, rule-evaluator + */ + +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import { mkdtempSync, mkdirSync, writeFileSync, rmSync } from 'node:fs'; +import { join } from 'node:path'; +import { tmpdir } from 'node:os'; +import { setMockScenario, resetScenario } from '../mock/scenario.js'; +import type { WorkflowConfig, WorkflowStep, WorkflowRule } from '../models/types.js'; + +// --- Mocks --- + +vi.mock('../claude/client.js', async (importOriginal) => { + const original = await importOriginal(); + return { + ...original, + callAiJudge: vi.fn().mockResolvedValue(-1), + }; +}); + +vi.mock('../workflow/phase-runner.js', () => ({ + needsStatusJudgmentPhase: vi.fn().mockReturnValue(false), + runReportPhase: vi.fn().mockResolvedValue(undefined), + runStatusJudgmentPhase: vi.fn().mockResolvedValue(''), +})); + +vi.mock('../utils/session.js', () => ({ + generateReportDir: vi.fn().mockReturnValue('test-report-dir'), + generateSessionId: vi.fn().mockReturnValue('test-session-id'), +})); + +vi.mock('../config/globalConfig.js', () => ({ + loadGlobalConfig: vi.fn().mockReturnValue({}), + getLanguage: vi.fn().mockReturnValue('en'), + getDisabledBuiltins: vi.fn().mockReturnValue([]), +})); + +vi.mock('../config/projectConfig.js', () => ({ + loadProjectConfig: vi.fn().mockReturnValue({}), +})); + +// --- Imports (after mocks) --- + +import { WorkflowEngine } from '../workflow/engine.js'; + +// --- Test helpers --- + +function makeRule(condition: string, next: string): WorkflowRule { + return { condition, next }; +} + +function makeStep(name: string, agentPath: string, rules: WorkflowRule[]): WorkflowStep { + return { + name, + agent: `./agents/${name}.md`, + agentDisplayName: name, + agentPath, + instructionTemplate: '{task}', + passPreviousResponse: true, + rules, + }; +} + +function createTestEnv(): { dir: string; agentPaths: Record } { + const dir = mkdtempSync(join(tmpdir(), 'takt-it-err-')); + mkdirSync(join(dir, '.takt', 'reports', 'test-report-dir'), { recursive: true }); + + const agentsDir = join(dir, 'agents'); + mkdirSync(agentsDir, { recursive: true }); + + // Agent file names match step names used in makeStep() + const agents = ['plan', 'implement', 'review', 'supervisor']; + const agentPaths: Record = {}; + for (const agent of agents) { + const path = join(agentsDir, `${agent}.md`); + writeFileSync(path, `You are a ${agent} agent.`); + agentPaths[agent] = path; + } + + return { dir, agentPaths }; +} + +function buildWorkflow(agentPaths: Record, maxIterations: number): WorkflowConfig { + return { + name: 'it-error', + description: 'IT error recovery workflow', + maxIterations, + initialStep: 'plan', + steps: [ + makeStep('plan', agentPaths.plan, [ + makeRule('Requirements are clear', 'implement'), + makeRule('Requirements unclear', 'ABORT'), + ]), + makeStep('implement', agentPaths.implement, [ + makeRule('Implementation complete', 'review'), + makeRule('Cannot proceed', 'plan'), + ]), + makeStep('review', agentPaths.review, [ + makeRule('All checks passed', 'COMPLETE'), + makeRule('Issues found', 'implement'), + ]), + ], + }; +} + +describe('Error Recovery IT: agent blocked response', () => { + let testDir: string; + let agentPaths: Record; + + beforeEach(() => { + vi.clearAllMocks(); + const env = createTestEnv(); + testDir = env.dir; + agentPaths = env.agentPaths; + }); + + afterEach(() => { + resetScenario(); + rmSync(testDir, { recursive: true, force: true }); + }); + + it('should handle blocked agent response gracefully', async () => { + setMockScenario([ + { agent: 'plan', status: 'blocked', content: 'Error: Agent is blocked.' }, + ]); + + const config = buildWorkflow(agentPaths, 10); + const engine = new WorkflowEngine(config, testDir, 'Test task', { + projectCwd: testDir, + provider: 'mock', + }); + + const state = await engine.run(); + + // Blocked agent should result in workflow abort + expect(state.status).toBe('aborted'); + }); + + it('should handle empty content from agent', async () => { + setMockScenario([ + { agent: 'plan', status: 'done', content: '' }, + ]); + + const config = buildWorkflow(agentPaths, 10); + const engine = new WorkflowEngine(config, testDir, 'Test task', { + projectCwd: testDir, + provider: 'mock', + }); + + const state = await engine.run(); + + // Empty content means no tag match; should eventually abort + expect(['aborted', 'completed']).toContain(state.status); + }); +}); + +describe('Error Recovery IT: max iterations reached', () => { + let testDir: string; + let agentPaths: Record; + + beforeEach(() => { + vi.clearAllMocks(); + const env = createTestEnv(); + testDir = env.dir; + agentPaths = env.agentPaths; + }); + + afterEach(() => { + resetScenario(); + rmSync(testDir, { recursive: true, force: true }); + }); + + it('should abort when max iterations reached (tight limit)', async () => { + // Only 2 iterations allowed, but workflow needs 3 steps + setMockScenario([ + { agent: 'plan', status: 'done', content: '[PLAN:1]\n\nClear.' }, + { agent: 'implement', status: 'done', content: '[IMPLEMENT:1]\n\nDone.' }, + { agent: 'review', status: 'done', content: '[REVIEW:1]\n\nPassed.' }, + ]); + + const config = buildWorkflow(agentPaths, 2); + const engine = new WorkflowEngine(config, testDir, 'Task', { + projectCwd: testDir, + provider: 'mock', + }); + + const state = await engine.run(); + + expect(state.status).toBe('aborted'); + expect(state.iteration).toBeLessThanOrEqual(2); + }); + + it('should abort when infinite plan → implement loop hits max', async () => { + // plan → implement → plan → implement ... + const loopScenario = Array.from({ length: 10 }, (_, i) => ({ + status: 'done' as const, + content: i % 2 === 0 ? '[PLAN:1]\n\nClear.' : '[IMPLEMENT:2]\n\nCannot proceed.', + })); + setMockScenario(loopScenario); + + const config = buildWorkflow(agentPaths, 4); + const engine = new WorkflowEngine(config, testDir, 'Looping task', { + projectCwd: testDir, + provider: 'mock', + }); + + const state = await engine.run(); + + expect(state.status).toBe('aborted'); + expect(state.iteration).toBeLessThanOrEqual(4); + }); +}); + +describe('Error Recovery IT: scenario queue exhaustion', () => { + let testDir: string; + let agentPaths: Record; + + beforeEach(() => { + vi.clearAllMocks(); + const env = createTestEnv(); + testDir = env.dir; + agentPaths = env.agentPaths; + }); + + afterEach(() => { + resetScenario(); + rmSync(testDir, { recursive: true, force: true }); + }); + + it('should handle scenario queue exhaustion mid-workflow', async () => { + // Only 1 entry, but workflow needs 3 steps + setMockScenario([ + { agent: 'plan', status: 'done', content: '[PLAN:1]\n\nClear.' }, + ]); + + const config = buildWorkflow(agentPaths, 10); + const engine = new WorkflowEngine(config, testDir, 'Task', { + projectCwd: testDir, + provider: 'mock', + }); + + // Should not throw; mock client falls back to generic response when queue is empty + const state = await engine.run(); + + // Even with queue exhaustion, engine should reach some terminal state + expect(['completed', 'aborted']).toContain(state.status); + }); +}); + +describe('Error Recovery IT: step events on error paths', () => { + let testDir: string; + let agentPaths: Record; + + beforeEach(() => { + vi.clearAllMocks(); + const env = createTestEnv(); + testDir = env.dir; + agentPaths = env.agentPaths; + }); + + afterEach(() => { + resetScenario(); + rmSync(testDir, { recursive: true, force: true }); + }); + + it('should emit workflow:abort event with reason on max iterations', async () => { + const loopScenario = Array.from({ length: 6 }, (_, i) => ({ + status: 'done' as const, + content: i % 2 === 0 ? '[PLAN:1]\n\nClear.' : '[IMPLEMENT:2]\n\nCannot proceed.', + })); + setMockScenario(loopScenario); + + const config = buildWorkflow(agentPaths, 3); + const engine = new WorkflowEngine(config, testDir, 'Task', { + projectCwd: testDir, + provider: 'mock', + }); + + let abortReason: string | undefined; + engine.on('workflow:abort', (_state, reason) => { + abortReason = reason; + }); + + await engine.run(); + + expect(abortReason).toBeDefined(); + }); + + it('should emit step:start and step:complete for each executed step before abort', async () => { + setMockScenario([ + { agent: 'plan', status: 'done', content: '[PLAN:2]\n\nRequirements unclear.' }, + ]); + + const config = buildWorkflow(agentPaths, 10); + const engine = new WorkflowEngine(config, testDir, 'Task', { + projectCwd: testDir, + provider: 'mock', + }); + + const startedSteps: string[] = []; + const completedSteps: string[] = []; + + engine.on('step:start', (step) => { + startedSteps.push(step.name); + }); + engine.on('step:complete', (step) => { + completedSteps.push(step.name); + }); + + await engine.run(); + + expect(startedSteps).toEqual(['plan']); + expect(completedSteps).toEqual(['plan']); + }); +}); + +describe('Error Recovery IT: programmatic abort', () => { + let testDir: string; + let agentPaths: Record; + + beforeEach(() => { + vi.clearAllMocks(); + const env = createTestEnv(); + testDir = env.dir; + agentPaths = env.agentPaths; + }); + + afterEach(() => { + resetScenario(); + rmSync(testDir, { recursive: true, force: true }); + }); + + it('should support engine.abort() to cancel running workflow', async () => { + // Provide enough scenarios for 3 steps + setMockScenario([ + { agent: 'plan', status: 'done', content: '[PLAN:1]\n\nClear.' }, + { agent: 'implement', status: 'done', content: '[IMPLEMENT:1]\n\nDone.' }, + { agent: 'review', status: 'done', content: '[REVIEW:1]\n\nPassed.' }, + ]); + + const config = buildWorkflow(agentPaths, 10); + const engine = new WorkflowEngine(config, testDir, 'Task', { + projectCwd: testDir, + provider: 'mock', + }); + + // Abort after the first step completes + engine.on('step:complete', () => { + engine.abort(); + }); + + const state = await engine.run(); + + expect(state.status).toBe('aborted'); + // Should have aborted after 1 step + expect(state.iteration).toBeLessThanOrEqual(2); + }); +}); diff --git a/src/__tests__/it-instruction-builder.test.ts b/src/__tests__/it-instruction-builder.test.ts new file mode 100644 index 0000000..c8d8614 --- /dev/null +++ b/src/__tests__/it-instruction-builder.test.ts @@ -0,0 +1,361 @@ +/** + * Instruction builder integration tests. + * + * Tests template variable expansion and auto-injection in buildInstruction(). + * Uses real workflow step configs (not mocked) against the buildInstruction function. + * + * Not mocked: buildInstruction, buildReportInstruction, buildStatusJudgmentInstruction + */ + +import { describe, it, expect, vi } from 'vitest'; +import type { WorkflowStep, WorkflowRule, AgentResponse } from '../models/types.js'; + +vi.mock('../config/globalConfig.js', () => ({ + loadGlobalConfig: vi.fn().mockReturnValue({}), + getLanguage: vi.fn().mockReturnValue('en'), +})); + +import { + buildInstruction, + buildReportInstruction, + buildStatusJudgmentInstruction, +} from '../workflow/instruction-builder.js'; +import type { InstructionContext } from '../workflow/instruction-builder.js'; + +// --- Test helpers --- + +function makeRule(condition: string, next: string, extra?: Partial): WorkflowRule { + return { condition, next, ...extra }; +} + +function makeStep(overrides: Partial = {}): WorkflowStep { + return { + name: 'test-step', + agent: 'test-agent', + agentDisplayName: 'test-step', + instructionTemplate: 'Do the work.', + passPreviousResponse: false, + rules: [ + makeRule('Done', 'COMPLETE'), + makeRule('Not done', 'ABORT'), + ], + ...overrides, + }; +} + +function makeContext(overrides: Partial = {}): InstructionContext { + return { + task: 'Test task description', + iteration: 3, + maxIterations: 30, + stepIteration: 2, + cwd: '/tmp/test-project', + projectCwd: '/tmp/test-project', + userInputs: [], + language: 'en', + ...overrides, + }; +} + +describe('Instruction Builder IT: task auto-injection', () => { + it('should auto-inject task as "User Request" section when template has no {task}', () => { + const step = makeStep({ instructionTemplate: 'Do the work.' }); + const ctx = makeContext({ task: 'Build the login page' }); + + const result = buildInstruction(step, ctx); + + expect(result).toContain('## User Request'); + expect(result).toContain('Build the login page'); + }); + + it('should NOT auto-inject task section when template contains {task}', () => { + const step = makeStep({ instructionTemplate: 'Here is the task: {task}' }); + const ctx = makeContext({ task: 'Build the login page' }); + + const result = buildInstruction(step, ctx); + + // Should not have separate User Request section + const userRequestCount = (result.match(/## User Request/g) || []).length; + expect(userRequestCount).toBe(0); + // But task should still appear inline + expect(result).toContain('Build the login page'); + }); +}); + +describe('Instruction Builder IT: previous_response auto-injection', () => { + it('should auto-inject previous response when passPreviousResponse is true', () => { + const step = makeStep({ + passPreviousResponse: true, + instructionTemplate: 'Continue the work.', + }); + const previousOutput: AgentResponse = { + agent: 'previous-agent', + status: 'done', + content: 'Previous agent completed step A.', + timestamp: new Date(), + }; + const ctx = makeContext({ previousOutput }); + + const result = buildInstruction(step, ctx); + + expect(result).toContain('## Previous Response'); + expect(result).toContain('Previous agent completed step A.'); + }); + + it('should NOT inject previous response when passPreviousResponse is false', () => { + const step = makeStep({ + passPreviousResponse: false, + instructionTemplate: 'Do fresh work.', + }); + const previousOutput: AgentResponse = { + agent: 'previous-agent', + status: 'done', + content: 'Previous output.', + timestamp: new Date(), + }; + const ctx = makeContext({ previousOutput }); + + const result = buildInstruction(step, ctx); + + expect(result).not.toContain('## Previous Response'); + expect(result).not.toContain('Previous output.'); + }); + + it('should NOT auto-inject when template contains {previous_response}', () => { + const step = makeStep({ + passPreviousResponse: true, + instructionTemplate: '## Context\n{previous_response}\n\nDo work.', + }); + const previousOutput: AgentResponse = { + agent: 'prev', status: 'done', content: 'Prior work done.', timestamp: new Date(), + }; + const ctx = makeContext({ previousOutput }); + + const result = buildInstruction(step, ctx); + + // Should not have separate Previous Response section + const prevCount = (result.match(/## Previous Response/g) || []).length; + expect(prevCount).toBe(0); + // But content should be inline + expect(result).toContain('Prior work done.'); + }); +}); + +describe('Instruction Builder IT: user_inputs auto-injection', () => { + it('should auto-inject user inputs section', () => { + const step = makeStep(); + const ctx = makeContext({ userInputs: ['Fix the typo', 'Use TypeScript'] }); + + const result = buildInstruction(step, ctx); + + expect(result).toContain('## Additional User Inputs'); + expect(result).toContain('Fix the typo'); + expect(result).toContain('Use TypeScript'); + }); + + it('should NOT auto-inject when template contains {user_inputs}', () => { + const step = makeStep({ instructionTemplate: 'Inputs: {user_inputs}' }); + const ctx = makeContext({ userInputs: ['Input A'] }); + + const result = buildInstruction(step, ctx); + + const count = (result.match(/## Additional User Inputs/g) || []).length; + expect(count).toBe(0); + expect(result).toContain('Input A'); + }); +}); + +describe('Instruction Builder IT: iteration variables', () => { + it('should replace {iteration}, {max_iterations}, {step_iteration} in template', () => { + const step = makeStep({ + instructionTemplate: 'Iter: {iteration}/{max_iterations}, step iter: {step_iteration}', + }); + const ctx = makeContext({ iteration: 5, maxIterations: 30, stepIteration: 2 }); + + const result = buildInstruction(step, ctx); + + expect(result).toContain('Iter: 5/30, step iter: 2'); + }); + + it('should include iteration in Workflow Context section', () => { + const step = makeStep(); + const ctx = makeContext({ iteration: 7, maxIterations: 20, stepIteration: 3 }); + + const result = buildInstruction(step, ctx); + + expect(result).toContain('Iteration: 7/20'); + expect(result).toContain('Step Iteration: 3'); + }); +}); + +describe('Instruction Builder IT: report_dir expansion', () => { + it('should replace {report_dir} in template', () => { + const step = makeStep({ + instructionTemplate: 'Read the plan from {report_dir}/00-plan.md', + }); + const ctx = makeContext({ reportDir: '/tmp/test-project/.takt/reports/20250126-task' }); + + const result = buildInstruction(step, ctx); + + expect(result).toContain('Read the plan from /tmp/test-project/.takt/reports/20250126-task/00-plan.md'); + }); + + it('should replace {report:filename} with full path', () => { + const step = makeStep({ + instructionTemplate: 'Read {report:00-plan.md} for the plan.', + }); + const ctx = makeContext({ reportDir: '/tmp/reports' }); + + const result = buildInstruction(step, ctx); + + expect(result).toContain('Read /tmp/reports/00-plan.md for the plan.'); + }); +}); + +describe('Instruction Builder IT: status output rules injection', () => { + it('should inject status rules for steps with tag-based rules', () => { + const step = makeStep({ + name: 'plan', + rules: [ + makeRule('Requirements clear', 'implement'), + makeRule('Requirements unclear', 'ABORT'), + ], + }); + const ctx = makeContext(); + + const result = buildInstruction(step, ctx); + + // Should contain status rules section with the tag format + expect(result).toContain('[PLAN:'); + expect(result).toContain('Requirements clear'); + expect(result).toContain('Requirements unclear'); + }); + + it('should NOT inject status rules for steps with only ai() conditions', () => { + const step = makeStep({ + name: 'review', + rules: [ + makeRule('ai("approved")', 'COMPLETE', { isAiCondition: true, aiConditionText: 'approved' }), + makeRule('ai("rejected")', 'ABORT', { isAiCondition: true, aiConditionText: 'rejected' }), + ], + }); + const ctx = makeContext(); + + const result = buildInstruction(step, ctx); + + // Should NOT contain tag-based status rules + expect(result).not.toContain('[REVIEW:'); + }); +}); + +describe('Instruction Builder IT: edit permission in execution context', () => { + it('should include edit permission rules when edit is true', () => { + const step = makeStep({ edit: true }); + const ctx = makeContext(); + + const result = buildInstruction(step, ctx); + + // Should mention editing is allowed + expect(result).toMatch(/edit|Edit|ファイル/i); + }); + + it('should indicate read-only when edit is false', () => { + const step = makeStep({ edit: false }); + const ctx = makeContext(); + + const result = buildInstruction(step, ctx); + + // Should contain the "Editing is DISABLED" execution rule + expect(result).toContain('Editing is DISABLED'); + expect(result).not.toContain('Editing is ENABLED'); + }); +}); + +describe('Instruction Builder IT: buildReportInstruction', () => { + it('should build report instruction with report context', () => { + const step = makeStep({ + name: 'plan', + report: { name: '00-plan.md', format: '# Plan\n{step_iteration}' }, + }); + + const result = buildReportInstruction(step, { + cwd: '/tmp/test', + reportDir: '/tmp/test/.takt/reports/test-dir', + stepIteration: 1, + language: 'en', + }); + + expect(result).toContain('00-plan.md'); + expect(result).toContain('/tmp/test/.takt/reports/test-dir'); + expect(result).toContain('report'); + }); + + it('should throw for step without report config', () => { + const step = makeStep({ report: undefined }); + + expect(() => + buildReportInstruction(step, { + cwd: '/tmp', + reportDir: '/tmp/reports', + stepIteration: 1, + }), + ).toThrow(/no report config/); + }); +}); + +describe('Instruction Builder IT: buildStatusJudgmentInstruction', () => { + it('should build Phase 3 instruction with status rules', () => { + const step = makeStep({ + name: 'plan', + rules: [ + makeRule('Clear', 'implement'), + makeRule('Unclear', 'ABORT'), + ], + }); + + const result = buildStatusJudgmentInstruction(step, { language: 'en' }); + + expect(result).toContain('[PLAN:'); + expect(result).toContain('Clear'); + expect(result).toContain('Unclear'); + }); + + it('should throw for step without rules', () => { + const step = makeStep({ rules: undefined }); + + expect(() => + buildStatusJudgmentInstruction(step, { language: 'en' }), + ).toThrow(/no rules/); + }); +}); + +describe('Instruction Builder IT: template injection prevention', () => { + it('should escape curly braces in task content', () => { + const step = makeStep(); + const ctx = makeContext({ task: 'Use {variable} in code' }); + + const result = buildInstruction(step, ctx); + + // Braces should be escaped to prevent template re-injection + expect(result).not.toContain('{variable}'); + expect(result).toContain('{variable}'); + }); + + it('should escape curly braces in previous response content', () => { + const step = makeStep({ + passPreviousResponse: true, + instructionTemplate: 'Continue.', + }); + const ctx = makeContext({ + previousOutput: { + agent: 'prev', status: 'done', + content: 'Use {template} syntax', timestamp: new Date(), + }, + }); + + const result = buildInstruction(step, ctx); + + expect(result).not.toContain('{template}'); + expect(result).toContain('{template}'); + }); +}); diff --git a/src/__tests__/it-pipeline-modes.test.ts b/src/__tests__/it-pipeline-modes.test.ts new file mode 100644 index 0000000..260382e --- /dev/null +++ b/src/__tests__/it-pipeline-modes.test.ts @@ -0,0 +1,546 @@ +/** + * Pipeline execution mode integration tests. + * + * Tests various --pipeline mode option combinations including: + * - --task, --issue, --skip-git, --auto-pr, --workflow (name/path), --provider, --model + * - Exit codes for different failure scenarios + * + * Mocked: git (child_process), GitHub API, UI, notifications, session, phase-runner, config + * Not mocked: executePipeline, executeTask, WorkflowEngine, runAgent, rule evaluation + */ + +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import { mkdtempSync, mkdirSync, writeFileSync, rmSync } from 'node:fs'; +import { join } from 'node:path'; +import { tmpdir } from 'node:os'; +import { setMockScenario, resetScenario } from '../mock/scenario.js'; + +// --- Mocks --- + +const { + mockFetchIssue, + mockFormatIssueAsTask, + mockCheckGhCli, + mockCreatePullRequest, + mockPushBranch, +} = vi.hoisted(() => ({ + mockFetchIssue: vi.fn(), + mockFormatIssueAsTask: vi.fn(), + mockCheckGhCli: vi.fn(), + mockCreatePullRequest: vi.fn(), + mockPushBranch: vi.fn(), +})); + +vi.mock('../claude/client.js', async (importOriginal) => { + const original = await importOriginal(); + return { + ...original, + callAiJudge: vi.fn().mockResolvedValue(-1), + }; +}); + +vi.mock('node:child_process', () => ({ + execFileSync: vi.fn(), +})); + +vi.mock('../github/issue.js', () => ({ + fetchIssue: mockFetchIssue, + formatIssueAsTask: mockFormatIssueAsTask, + checkGhCli: mockCheckGhCli, +})); + +vi.mock('../github/pr.js', () => ({ + createPullRequest: mockCreatePullRequest, + pushBranch: mockPushBranch, + buildPrBody: vi.fn().mockReturnValue('PR body'), +})); + +vi.mock('../task/git.js', () => ({ + stageAndCommit: vi.fn().mockReturnValue('abc1234'), +})); + +vi.mock('../utils/ui.js', () => ({ + header: vi.fn(), + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + success: vi.fn(), + status: vi.fn(), + blankLine: vi.fn(), + StreamDisplay: vi.fn().mockImplementation(() => ({ + createHandler: () => vi.fn(), + flush: vi.fn(), + })), +})); + +vi.mock('../utils/notification.js', () => ({ + notifySuccess: vi.fn(), + notifyError: vi.fn(), +})); + +vi.mock('../utils/session.js', () => ({ + generateSessionId: vi.fn().mockReturnValue('test-session-id'), + createSessionLog: vi.fn().mockReturnValue({ + startTime: new Date().toISOString(), + iterations: 0, + }), + finalizeSessionLog: vi.fn().mockImplementation((log, status) => ({ ...log, status })), + updateLatestPointer: vi.fn(), + initNdjsonLog: vi.fn().mockReturnValue('/tmp/test.ndjson'), + appendNdjsonLine: vi.fn(), + generateReportDir: vi.fn().mockReturnValue('test-report-dir'), +})); + +vi.mock('../config/paths.js', async (importOriginal) => { + const original = await importOriginal(); + return { + ...original, + loadAgentSessions: vi.fn().mockReturnValue({}), + updateAgentSession: vi.fn(), + loadWorktreeSessions: vi.fn().mockReturnValue({}), + updateWorktreeSession: vi.fn(), + getCurrentWorkflow: vi.fn().mockReturnValue('default'), + getProjectConfigDir: vi.fn().mockImplementation((cwd: string) => join(cwd, '.takt')), + }; +}); + +vi.mock('../config/globalConfig.js', async (importOriginal) => { + const original = await importOriginal(); + return { + ...original, + loadGlobalConfig: vi.fn().mockReturnValue({}), + getLanguage: vi.fn().mockReturnValue('en'), + getDisabledBuiltins: vi.fn().mockReturnValue([]), + }; +}); + +vi.mock('../config/projectConfig.js', async (importOriginal) => { + const original = await importOriginal(); + return { + ...original, + loadProjectConfig: vi.fn().mockReturnValue({}), + }; +}); + +vi.mock('../cli.js', () => ({ + isQuietMode: vi.fn().mockReturnValue(true), +})); + +vi.mock('../prompt/index.js', () => ({ + selectOption: vi.fn().mockResolvedValue('stop'), + promptInput: vi.fn().mockResolvedValue(null), +})); + +vi.mock('../workflow/phase-runner.js', () => ({ + needsStatusJudgmentPhase: vi.fn().mockReturnValue(false), + runReportPhase: vi.fn().mockResolvedValue(undefined), + runStatusJudgmentPhase: vi.fn().mockResolvedValue(''), +})); + +// --- Imports (after mocks) --- + +import { executePipeline } from '../commands/pipelineExecution.js'; +import { + EXIT_ISSUE_FETCH_FAILED, + EXIT_WORKFLOW_FAILED, + EXIT_PR_CREATION_FAILED, +} from '../exitCodes.js'; + +// --- Test helpers --- + +function createTestWorkflowDir(): { dir: string; workflowPath: string } { + const dir = mkdtempSync(join(tmpdir(), 'takt-it-pm-')); + mkdirSync(join(dir, '.takt', 'reports', 'test-report-dir'), { recursive: true }); + + const agentsDir = join(dir, 'agents'); + mkdirSync(agentsDir, { recursive: true }); + writeFileSync(join(agentsDir, 'planner.md'), 'You are a planner.'); + writeFileSync(join(agentsDir, 'coder.md'), 'You are a coder.'); + writeFileSync(join(agentsDir, 'reviewer.md'), 'You are a reviewer.'); + + const workflowYaml = ` +name: it-pipeline +description: Pipeline test workflow +max_iterations: 10 +initial_step: plan + +steps: + - name: plan + agent: ./agents/planner.md + rules: + - condition: Requirements are clear + next: implement + - condition: Requirements unclear + next: ABORT + instruction: "{task}" + + - name: implement + agent: ./agents/coder.md + rules: + - condition: Implementation complete + next: review + - condition: Cannot proceed + next: plan + instruction: "{task}" + + - name: review + agent: ./agents/reviewer.md + rules: + - condition: All checks passed + next: COMPLETE + - condition: Issues found + next: implement + instruction: "{task}" +`; + + const workflowPath = join(dir, 'workflow.yaml'); + writeFileSync(workflowPath, workflowYaml); + + return { dir, workflowPath }; +} + +function happyScenario(): void { + setMockScenario([ + { agent: 'planner', status: 'done', content: '[PLAN:1]\n\nRequirements are clear.' }, + { agent: 'coder', status: 'done', content: '[IMPLEMENT:1]\n\nImplementation complete.' }, + { agent: 'reviewer', status: 'done', content: '[REVIEW:1]\n\nAll checks passed.' }, + ]); +} + +describe('Pipeline Modes IT: --task + --workflow path', () => { + let testDir: string; + let workflowPath: string; + + beforeEach(() => { + vi.clearAllMocks(); + const setup = createTestWorkflowDir(); + testDir = setup.dir; + workflowPath = setup.workflowPath; + }); + + afterEach(() => { + resetScenario(); + rmSync(testDir, { recursive: true, force: true }); + }); + + it('should return exit code 0 on successful pipeline', async () => { + happyScenario(); + + const exitCode = await executePipeline({ + task: 'Add a feature', + workflow: workflowPath, + autoPr: false, + skipGit: true, + cwd: testDir, + provider: 'mock', + }); + + expect(exitCode).toBe(0); + }); + + it('should return EXIT_WORKFLOW_FAILED (3) on ABORT', async () => { + setMockScenario([ + { agent: 'planner', status: 'done', content: '[PLAN:2]\n\nRequirements unclear.' }, + ]); + + const exitCode = await executePipeline({ + task: 'Vague task', + workflow: workflowPath, + autoPr: false, + skipGit: true, + cwd: testDir, + provider: 'mock', + }); + + expect(exitCode).toBe(EXIT_WORKFLOW_FAILED); + }); +}); + +describe('Pipeline Modes IT: --task + --workflow name (builtin)', () => { + let testDir: string; + + beforeEach(() => { + vi.clearAllMocks(); + const setup = createTestWorkflowDir(); + testDir = setup.dir; + }); + + afterEach(() => { + resetScenario(); + rmSync(testDir, { recursive: true, force: true }); + }); + + it('should load and execute builtin simple workflow by name', async () => { + setMockScenario([ + { agent: 'planner', status: 'done', content: '[PLAN:1]\n\nRequirements are clear.' }, + { agent: 'coder', status: 'done', content: '[IMPLEMENT:1]\n\nImplementation complete.' }, + { agent: 'ai-antipattern-reviewer', status: 'done', content: '[AI_REVIEW:1]\n\nNo issues.' }, + { agent: 'architecture-reviewer', status: 'done', content: '[REVIEW:1]\n\nNo issues found.' }, + { agent: 'supervisor', status: 'done', content: '[SUPERVISE:1]\n\nAll checks passed.' }, + ]); + + const exitCode = await executePipeline({ + task: 'Add a feature', + workflow: 'simple', + autoPr: false, + skipGit: true, + cwd: testDir, + provider: 'mock', + }); + + expect(exitCode).toBe(0); + }); + + it('should return EXIT_WORKFLOW_FAILED for non-existent workflow name', async () => { + const exitCode = await executePipeline({ + task: 'Test task', + workflow: 'non-existent-workflow-xyz', + autoPr: false, + skipGit: true, + cwd: testDir, + provider: 'mock', + }); + + expect(exitCode).toBe(EXIT_WORKFLOW_FAILED); + }); +}); + +describe('Pipeline Modes IT: --issue', () => { + let testDir: string; + let workflowPath: string; + + beforeEach(() => { + vi.clearAllMocks(); + const setup = createTestWorkflowDir(); + testDir = setup.dir; + workflowPath = setup.workflowPath; + }); + + afterEach(() => { + resetScenario(); + rmSync(testDir, { recursive: true, force: true }); + }); + + it('should fetch issue and execute workflow', async () => { + mockCheckGhCli.mockReturnValue({ available: true }); + mockFetchIssue.mockReturnValue({ + number: 42, + title: 'Fix the bug', + body: 'Details here', + }); + mockFormatIssueAsTask.mockReturnValue('Fix the bug\n\nDetails here'); + happyScenario(); + + const exitCode = await executePipeline({ + issueNumber: 42, + workflow: workflowPath, + autoPr: false, + skipGit: true, + cwd: testDir, + provider: 'mock', + }); + + expect(exitCode).toBe(0); + expect(mockFetchIssue).toHaveBeenCalledWith(42); + }); + + it('should return EXIT_ISSUE_FETCH_FAILED when gh CLI unavailable', async () => { + mockCheckGhCli.mockReturnValue({ available: false, error: 'gh not found' }); + + const exitCode = await executePipeline({ + issueNumber: 42, + workflow: workflowPath, + autoPr: false, + skipGit: true, + cwd: testDir, + provider: 'mock', + }); + + expect(exitCode).toBe(EXIT_ISSUE_FETCH_FAILED); + }); + + it('should return EXIT_ISSUE_FETCH_FAILED when issue fetch throws', async () => { + mockCheckGhCli.mockReturnValue({ available: true }); + mockFetchIssue.mockImplementation(() => { + throw new Error('Issue not found'); + }); + + const exitCode = await executePipeline({ + issueNumber: 999, + workflow: workflowPath, + autoPr: false, + skipGit: true, + cwd: testDir, + provider: 'mock', + }); + + expect(exitCode).toBe(EXIT_ISSUE_FETCH_FAILED); + }); + + it('should return EXIT_ISSUE_FETCH_FAILED when neither --issue nor --task specified', async () => { + const exitCode = await executePipeline({ + workflow: workflowPath, + autoPr: false, + skipGit: true, + cwd: testDir, + provider: 'mock', + }); + + expect(exitCode).toBe(EXIT_ISSUE_FETCH_FAILED); + }); +}); + +describe('Pipeline Modes IT: --auto-pr', () => { + let testDir: string; + let workflowPath: string; + + beforeEach(() => { + vi.clearAllMocks(); + const setup = createTestWorkflowDir(); + testDir = setup.dir; + workflowPath = setup.workflowPath; + }); + + afterEach(() => { + resetScenario(); + rmSync(testDir, { recursive: true, force: true }); + }); + + it('should create PR on success when --auto-pr is set (without --skip-git)', async () => { + happyScenario(); + mockCreatePullRequest.mockReturnValue({ success: true, url: 'https://github.com/test/pr/1' }); + + const exitCode = await executePipeline({ + task: 'Add a feature', + workflow: workflowPath, + autoPr: true, + skipGit: false, + cwd: testDir, + provider: 'mock', + }); + + expect(exitCode).toBe(0); + expect(mockCreatePullRequest).toHaveBeenCalled(); + }); + + it('should return EXIT_PR_CREATION_FAILED when PR creation fails', async () => { + happyScenario(); + mockCreatePullRequest.mockReturnValue({ success: false, error: 'Rate limited' }); + + const exitCode = await executePipeline({ + task: 'Add a feature', + workflow: workflowPath, + autoPr: true, + skipGit: false, + cwd: testDir, + provider: 'mock', + }); + + expect(exitCode).toBe(EXIT_PR_CREATION_FAILED); + }); + + it('should skip PR creation when --auto-pr and --skip-git are both set', async () => { + happyScenario(); + + const exitCode = await executePipeline({ + task: 'Add a feature', + workflow: workflowPath, + autoPr: true, + skipGit: true, + cwd: testDir, + provider: 'mock', + }); + + expect(exitCode).toBe(0); + expect(mockCreatePullRequest).not.toHaveBeenCalled(); + }); +}); + +describe('Pipeline Modes IT: --provider and --model overrides', () => { + let testDir: string; + let workflowPath: string; + + beforeEach(() => { + vi.clearAllMocks(); + const setup = createTestWorkflowDir(); + testDir = setup.dir; + workflowPath = setup.workflowPath; + }); + + afterEach(() => { + resetScenario(); + rmSync(testDir, { recursive: true, force: true }); + }); + + it('should pass provider override to workflow execution', async () => { + happyScenario(); + + const exitCode = await executePipeline({ + task: 'Test task', + workflow: workflowPath, + autoPr: false, + skipGit: true, + cwd: testDir, + provider: 'mock', + }); + + expect(exitCode).toBe(0); + }); + + it('should pass model override to workflow execution', async () => { + happyScenario(); + + const exitCode = await executePipeline({ + task: 'Test task', + workflow: workflowPath, + autoPr: false, + skipGit: true, + cwd: testDir, + provider: 'mock', + model: 'opus', + }); + + expect(exitCode).toBe(0); + }); +}); + +describe('Pipeline Modes IT: review → fix loop', () => { + let testDir: string; + let workflowPath: string; + + beforeEach(() => { + vi.clearAllMocks(); + const setup = createTestWorkflowDir(); + testDir = setup.dir; + workflowPath = setup.workflowPath; + }); + + afterEach(() => { + resetScenario(); + rmSync(testDir, { recursive: true, force: true }); + }); + + it('should handle review → implement → review loop', async () => { + setMockScenario([ + { agent: 'planner', status: 'done', content: '[PLAN:1]\n\nClear.' }, + { agent: 'coder', status: 'done', content: '[IMPLEMENT:1]\n\nDone.' }, + // First review: issues found → back to implement + { agent: 'reviewer', status: 'done', content: '[REVIEW:2]\n\nIssues found.' }, + // Fix + { agent: 'coder', status: 'done', content: '[IMPLEMENT:1]\n\nFixed.' }, + // Second review: passed + { agent: 'reviewer', status: 'done', content: '[REVIEW:1]\n\nAll checks passed.' }, + ]); + + const exitCode = await executePipeline({ + task: 'Task with fix loop', + workflow: workflowPath, + autoPr: false, + skipGit: true, + cwd: testDir, + provider: 'mock', + }); + + expect(exitCode).toBe(0); + }); +}); diff --git a/src/__tests__/it-rule-evaluation.test.ts b/src/__tests__/it-rule-evaluation.test.ts new file mode 100644 index 0000000..74bade5 --- /dev/null +++ b/src/__tests__/it-rule-evaluation.test.ts @@ -0,0 +1,418 @@ +/** + * Rule evaluation integration tests. + * + * Tests the 5-stage rule evaluation cascade: + * 1. Aggregate conditions (all/any) + * 2. Phase 3 tag detection + * 3. Phase 1 tag detection (fallback) + * 4. AI judge for ai() conditions + * 5. AI judge fallback for all conditions + * + * Also tests RuleMatchMethod tracking. + * + * Mocked: callAiJudge (controlled responses) + * Not mocked: detectMatchedRule, evaluateAggregateConditions + */ + +import { describe, it, expect, beforeEach, vi } from 'vitest'; +import type { WorkflowStep, WorkflowState, WorkflowRule, AgentResponse } from '../models/types.js'; + +// --- Mocks --- + +const mockCallAiJudge = vi.fn(); + +vi.mock('../claude/client.js', async (importOriginal) => { + const original = await importOriginal(); + return { + ...original, + callAiJudge: (...args: unknown[]) => mockCallAiJudge(...args), + }; +}); + +vi.mock('../config/globalConfig.js', () => ({ + loadGlobalConfig: vi.fn().mockReturnValue({}), + getLanguage: vi.fn().mockReturnValue('en'), +})); + +vi.mock('../config/projectConfig.js', () => ({ + loadProjectConfig: vi.fn().mockReturnValue({}), +})); + +// --- Imports (after mocks) --- + +import { detectMatchedRule, evaluateAggregateConditions } from '../workflow/rule-evaluator.js'; +import type { RuleMatch, RuleEvaluatorContext } from '../workflow/rule-evaluator.js'; + +// --- Test helpers --- + +function makeRule(condition: string, next: string, extra?: Partial): WorkflowRule { + return { condition, next, ...extra }; +} + +function makeStep(name: string, rules: WorkflowRule[], parallel?: WorkflowStep[]): WorkflowStep { + return { + name, + agent: 'test-agent', + agentDisplayName: name, + instructionTemplate: '{task}', + passPreviousResponse: true, + rules, + parallel, + }; +} + +function makeState(stepOutputs?: Map): WorkflowState { + return { + workflowName: 'it-test', + currentStep: 'test', + iteration: 1, + status: 'running', + stepOutputs: stepOutputs ?? new Map(), + stepIterations: new Map(), + agentSessions: new Map(), + userInputs: [], + }; +} + +function makeCtx(stepOutputs?: Map): RuleEvaluatorContext { + return { + state: makeState(stepOutputs), + cwd: '/tmp/test', + }; +} + +describe('Rule Evaluation IT: Phase 3 tag detection', () => { + beforeEach(() => { + vi.clearAllMocks(); + mockCallAiJudge.mockResolvedValue(-1); + }); + + it('should detect rule from Phase 3 tag content', async () => { + const step = makeStep('plan', [ + makeRule('Clear', 'implement'), + makeRule('Unclear', 'ABORT'), + ]); + + const result = await detectMatchedRule(step, 'Agent output without tag.', '[PLAN:2]', makeCtx()); + + expect(result).toEqual({ index: 1, method: 'phase3_tag' }); + }); + + it('should prefer Phase 3 tag over Phase 1 tag', async () => { + const step = makeStep('plan', [ + makeRule('Clear', 'implement'), + makeRule('Unclear', 'ABORT'), + ]); + + // Phase 1 has tag [PLAN:1], Phase 3 has tag [PLAN:2] + const result = await detectMatchedRule(step, '[PLAN:1] Clear.', '[PLAN:2]', makeCtx()); + + expect(result).toEqual({ index: 1, method: 'phase3_tag' }); + }); +}); + +describe('Rule Evaluation IT: Phase 1 tag fallback', () => { + beforeEach(() => { + vi.clearAllMocks(); + mockCallAiJudge.mockResolvedValue(-1); + }); + + it('should fall back to Phase 1 tag when Phase 3 has no tag', async () => { + const step = makeStep('plan', [ + makeRule('Clear', 'implement'), + makeRule('Unclear', 'ABORT'), + ]); + + const result = await detectMatchedRule(step, '[PLAN:1] Requirements are clear.', '', makeCtx()); + + expect(result).toEqual({ index: 0, method: 'phase1_tag' }); + }); + + it('should detect last tag when multiple tags in Phase 1', async () => { + const step = makeStep('plan', [ + makeRule('Clear', 'implement'), + makeRule('Unclear', 'ABORT'), + ]); + + const result = await detectMatchedRule(step, 'Some [PLAN:1] text then [PLAN:2] final.', '', makeCtx()); + + expect(result).toEqual({ index: 1, method: 'phase1_tag' }); + }); +}); + +describe('Rule Evaluation IT: Aggregate conditions (all/any)', () => { + beforeEach(() => { + vi.clearAllMocks(); + mockCallAiJudge.mockResolvedValue(-1); + }); + + it('should match all("approved") when all sub-steps have "approved"', () => { + const subStep1 = makeStep('arch-review', [ + makeRule('approved', ''), + makeRule('needs_fix', ''), + ]); + const subStep2 = makeStep('security-review', [ + makeRule('approved', ''), + makeRule('needs_fix', ''), + ]); + + const parentStep = makeStep('reviewers', [ + makeRule('all("approved")', 'supervise', { + isAggregateCondition: true, + aggregateType: 'all', + aggregateConditionText: 'approved', + }), + makeRule('any("needs_fix")', 'fix', { + isAggregateCondition: true, + aggregateType: 'any', + aggregateConditionText: 'needs_fix', + }), + ], [subStep1, subStep2]); + + const outputs = new Map(); + outputs.set('arch-review', { + agent: 'arch', status: 'done', content: 'approved', + timestamp: new Date(), matchedRuleIndex: 0, + }); + outputs.set('security-review', { + agent: 'security', status: 'done', content: 'approved', + timestamp: new Date(), matchedRuleIndex: 0, + }); + + const result = evaluateAggregateConditions(parentStep, makeState(outputs)); + + expect(result).toBe(0); // all("approved") is rule index 0 + }); + + it('should match any("needs_fix") when one sub-step has "needs_fix"', () => { + const subStep1 = makeStep('arch-review', [ + makeRule('approved', ''), + makeRule('needs_fix', ''), + ]); + const subStep2 = makeStep('security-review', [ + makeRule('approved', ''), + makeRule('needs_fix', ''), + ]); + + const parentStep = makeStep('reviewers', [ + makeRule('all("approved")', 'supervise', { + isAggregateCondition: true, + aggregateType: 'all', + aggregateConditionText: 'approved', + }), + makeRule('any("needs_fix")', 'fix', { + isAggregateCondition: true, + aggregateType: 'any', + aggregateConditionText: 'needs_fix', + }), + ], [subStep1, subStep2]); + + const outputs = new Map(); + outputs.set('arch-review', { + agent: 'arch', status: 'done', content: 'approved', + timestamp: new Date(), matchedRuleIndex: 0, + }); + outputs.set('security-review', { + agent: 'security', status: 'done', content: 'needs_fix', + timestamp: new Date(), matchedRuleIndex: 1, + }); + + const result = evaluateAggregateConditions(parentStep, makeState(outputs)); + + expect(result).toBe(1); // any("needs_fix") is rule index 1 + }); + + it('should return -1 when no aggregate condition matches', () => { + const subStep1 = makeStep('review-a', [ + makeRule('approved', ''), + makeRule('needs_fix', ''), + ]); + const subStep2 = makeStep('review-b', [ + makeRule('approved', ''), + makeRule('needs_fix', ''), + ]); + + const parentStep = makeStep('reviews', [ + makeRule('all("approved")', 'done', { + isAggregateCondition: true, + aggregateType: 'all', + aggregateConditionText: 'approved', + }), + ], [subStep1, subStep2]); + + const outputs = new Map(); + outputs.set('review-a', { + agent: 'a', status: 'done', content: 'approved', + timestamp: new Date(), matchedRuleIndex: 0, + }); + outputs.set('review-b', { + agent: 'b', status: 'done', content: 'needs_fix', + timestamp: new Date(), matchedRuleIndex: 1, + }); + + const result = evaluateAggregateConditions(parentStep, makeState(outputs)); + + expect(result).toBe(-1); + }); + + it('should return -1 for non-parallel step', () => { + const step = makeStep('step', [ + makeRule('all("done")', 'COMPLETE', { + isAggregateCondition: true, + aggregateType: 'all', + aggregateConditionText: 'done', + }), + ]); + + const result = evaluateAggregateConditions(step, makeState()); + + expect(result).toBe(-1); + }); +}); + +describe('Rule Evaluation IT: ai() judge condition', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it('should call AI judge for ai() conditions when no tag match', async () => { + mockCallAiJudge.mockResolvedValue(0); // Judge says first ai() condition matches + + const step = makeStep('step', [ + makeRule('ai("The code is approved")', 'COMPLETE', { + isAiCondition: true, + aiConditionText: 'The code is approved', + }), + makeRule('ai("The code needs fixes")', 'fix', { + isAiCondition: true, + aiConditionText: 'The code needs fixes', + }), + ]); + + const result = await detectMatchedRule(step, 'Code looks great, no issues.', '', makeCtx()); + + expect(result).toEqual({ index: 0, method: 'ai_judge' }); + expect(mockCallAiJudge).toHaveBeenCalled(); + }); + + it('should skip AI judge if tag already matched', async () => { + const step = makeStep('plan', [ + makeRule('ai("Clear")', 'implement', { + isAiCondition: true, + aiConditionText: 'Clear', + }), + ]); + + const result = await detectMatchedRule(step, '[PLAN:1] Clear.', '', makeCtx()); + + expect(result).toEqual({ index: 0, method: 'phase1_tag' }); + expect(mockCallAiJudge).not.toHaveBeenCalled(); + }); +}); + +describe('Rule Evaluation IT: AI judge fallback', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it('should use AI judge fallback when no tag and no ai() conditions', async () => { + // First call (ai() conditions): returns -1 (no ai() conditions exist) + // Second call (all conditions fallback): returns 0 + mockCallAiJudge.mockResolvedValue(0); + + const step = makeStep('review', [ + makeRule('Approved', 'COMPLETE'), + makeRule('Rejected', 'fix'), + ]); + + // No tag in content, no ai() rules → goes to fallback + const result = await detectMatchedRule(step, 'The code looks fine, approved.', '', makeCtx()); + + expect(result).toEqual({ index: 0, method: 'ai_judge_fallback' }); + }); + + it('should throw when no rule matches (AI judge returns -1 for all phases)', async () => { + mockCallAiJudge.mockResolvedValue(-1); + + const step = makeStep('review', [ + makeRule('Approved', 'COMPLETE'), + makeRule('Rejected', 'fix'), + ]); + + await expect( + detectMatchedRule(step, 'Totally unrelated content.', '', makeCtx()), + ).rejects.toThrow(/no rule matched/); + }); +}); + +describe('Rule Evaluation IT: RuleMatchMethod tracking', () => { + beforeEach(() => { + vi.clearAllMocks(); + mockCallAiJudge.mockResolvedValue(-1); + }); + + it('should record method as "aggregate" for aggregate matches', () => { + const subStep = makeStep('sub', [makeRule('ok', '')]); + const parentStep = makeStep('parent', [ + makeRule('all("ok")', 'COMPLETE', { + isAggregateCondition: true, + aggregateType: 'all', + aggregateConditionText: 'ok', + }), + ], [subStep]); + + const outputs = new Map(); + outputs.set('sub', { + agent: 'sub', status: 'done', content: 'ok', + timestamp: new Date(), matchedRuleIndex: 0, + }); + + const result = evaluateAggregateConditions(parentStep, makeState(outputs)); + expect(result).toBe(0); + // Method verified via detectMatchedRule in engine integration + }); + + it('should record method as "phase3_tag" for Phase 3 matches', async () => { + const step = makeStep('step', [ + makeRule('Done', 'COMPLETE'), + ]); + + const result = await detectMatchedRule(step, 'content', '[STEP:1]', makeCtx()); + expect(result?.method).toBe('phase3_tag'); + }); + + it('should record method as "phase1_tag" for Phase 1 fallback matches', async () => { + const step = makeStep('step', [ + makeRule('Done', 'COMPLETE'), + ]); + + const result = await detectMatchedRule(step, '[STEP:1] Done.', '', makeCtx()); + expect(result?.method).toBe('phase1_tag'); + }); +}); + +describe('Rule Evaluation IT: steps without rules', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it('should return undefined for step with no rules', async () => { + const step: WorkflowStep = { + name: 'step', + agent: 'agent', + agentDisplayName: 'step', + instructionTemplate: '{task}', + passPreviousResponse: true, + }; + + const result = await detectMatchedRule(step, 'content', '', makeCtx()); + expect(result).toBeUndefined(); + }); + + it('should return undefined for step with empty rules array', async () => { + const step = makeStep('step', []); + + const result = await detectMatchedRule(step, 'content', '', makeCtx()); + expect(result).toBeUndefined(); + }); +}); diff --git a/src/__tests__/it-three-phase-execution.test.ts b/src/__tests__/it-three-phase-execution.test.ts new file mode 100644 index 0000000..a427b9e --- /dev/null +++ b/src/__tests__/it-three-phase-execution.test.ts @@ -0,0 +1,392 @@ +/** + * Three-phase execution integration tests. + * + * Tests Phase 1 (main) → Phase 2 (report) → Phase 3 (status judgment) lifecycle. + * Verifies that the correct combination of phases fires based on step config. + * + * Mocked: UI, session, config, callAiJudge + * Selectively mocked: phase-runner (to inspect call patterns) + * Not mocked: WorkflowEngine, runAgent, detectMatchedRule, rule-evaluator + */ + +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import { mkdtempSync, mkdirSync, writeFileSync, rmSync } from 'node:fs'; +import { join } from 'node:path'; +import { tmpdir } from 'node:os'; +import { setMockScenario, resetScenario } from '../mock/scenario.js'; +import type { WorkflowConfig, WorkflowStep, WorkflowRule } from '../models/types.js'; + +// --- Mocks --- + +vi.mock('../claude/client.js', async (importOriginal) => { + const original = await importOriginal(); + return { + ...original, + callAiJudge: vi.fn().mockResolvedValue(-1), + }; +}); + +const mockNeedsStatusJudgmentPhase = vi.fn(); +const mockRunReportPhase = vi.fn(); +const mockRunStatusJudgmentPhase = vi.fn(); + +vi.mock('../workflow/phase-runner.js', () => ({ + needsStatusJudgmentPhase: (...args: unknown[]) => mockNeedsStatusJudgmentPhase(...args), + runReportPhase: (...args: unknown[]) => mockRunReportPhase(...args), + runStatusJudgmentPhase: (...args: unknown[]) => mockRunStatusJudgmentPhase(...args), +})); + +vi.mock('../utils/session.js', () => ({ + generateReportDir: vi.fn().mockReturnValue('test-report-dir'), + generateSessionId: vi.fn().mockReturnValue('test-session-id'), +})); + +vi.mock('../config/globalConfig.js', () => ({ + loadGlobalConfig: vi.fn().mockReturnValue({}), + getLanguage: vi.fn().mockReturnValue('en'), + getDisabledBuiltins: vi.fn().mockReturnValue([]), +})); + +vi.mock('../config/projectConfig.js', () => ({ + loadProjectConfig: vi.fn().mockReturnValue({}), +})); + +// --- Imports (after mocks) --- + +import { WorkflowEngine } from '../workflow/engine.js'; + +// --- Test helpers --- + +function makeRule(condition: string, next: string): WorkflowRule { + return { condition, next }; +} + +function createTestEnv(): { dir: string; agentPath: string } { + const dir = mkdtempSync(join(tmpdir(), 'takt-it-3ph-')); + mkdirSync(join(dir, '.takt', 'reports', 'test-report-dir'), { recursive: true }); + + const agentsDir = join(dir, 'agents'); + mkdirSync(agentsDir, { recursive: true }); + const agentPath = join(agentsDir, 'agent.md'); + writeFileSync(agentPath, 'You are an agent.'); + + return { dir, agentPath }; +} + +function makeStep( + name: string, + agentPath: string, + rules: WorkflowRule[], + options: { report?: string | { label: string; path: string }[]; edit?: boolean } = {}, +): WorkflowStep { + return { + name, + agent: './agents/agent.md', + agentDisplayName: name, + agentPath, + instructionTemplate: '{task}', + passPreviousResponse: true, + rules, + report: options.report, + edit: options.edit, + }; +} + +describe('Three-Phase Execution IT: phase1 only (no report, no tag rules)', () => { + let testDir: string; + let agentPath: string; + + beforeEach(() => { + vi.clearAllMocks(); + const env = createTestEnv(); + testDir = env.dir; + agentPath = env.agentPath; + + // No tag rules needed → Phase 3 not needed + mockNeedsStatusJudgmentPhase.mockReturnValue(false); + mockRunReportPhase.mockResolvedValue(undefined); + mockRunStatusJudgmentPhase.mockResolvedValue(''); + }); + + afterEach(() => { + resetScenario(); + rmSync(testDir, { recursive: true, force: true }); + }); + + it('should only run Phase 1 when step has no report and no tag rules', async () => { + setMockScenario([ + { status: 'done', content: '[STEP:1]\n\nDone.' }, + ]); + + const config: WorkflowConfig = { + name: 'it-phase1-only', + description: 'Test', + maxIterations: 5, + initialStep: 'step', + steps: [ + makeStep('step', agentPath, [ + makeRule('Done', 'COMPLETE'), + makeRule('Not done', 'ABORT'), + ]), + ], + }; + + const engine = new WorkflowEngine(config, testDir, 'Test task', { + projectCwd: testDir, + provider: 'mock', + }); + + const state = await engine.run(); + + expect(state.status).toBe('completed'); + expect(mockRunReportPhase).not.toHaveBeenCalled(); + // needsStatusJudgmentPhase is called but returns false + expect(mockRunStatusJudgmentPhase).not.toHaveBeenCalled(); + }); +}); + +describe('Three-Phase Execution IT: phase1 + phase2 (report defined)', () => { + let testDir: string; + let agentPath: string; + + beforeEach(() => { + vi.clearAllMocks(); + const env = createTestEnv(); + testDir = env.dir; + agentPath = env.agentPath; + + mockNeedsStatusJudgmentPhase.mockReturnValue(false); + mockRunReportPhase.mockResolvedValue(undefined); + mockRunStatusJudgmentPhase.mockResolvedValue(''); + }); + + afterEach(() => { + resetScenario(); + rmSync(testDir, { recursive: true, force: true }); + }); + + it('should run Phase 1 + Phase 2 when step has report', async () => { + setMockScenario([ + { status: 'done', content: '[STEP:1]\n\nDone.' }, + ]); + + const config: WorkflowConfig = { + name: 'it-phase1-2', + description: 'Test', + maxIterations: 5, + initialStep: 'step', + steps: [ + makeStep('step', agentPath, [ + makeRule('Done', 'COMPLETE'), + makeRule('Not done', 'ABORT'), + ], { report: 'test-report.md' }), + ], + }; + + const engine = new WorkflowEngine(config, testDir, 'Test task', { + projectCwd: testDir, + provider: 'mock', + }); + + const state = await engine.run(); + + expect(state.status).toBe('completed'); + expect(mockRunReportPhase).toHaveBeenCalledTimes(1); + expect(mockRunStatusJudgmentPhase).not.toHaveBeenCalled(); + }); + + it('should run Phase 2 for multi-report step', async () => { + setMockScenario([ + { status: 'done', content: '[STEP:1]\n\nDone.' }, + ]); + + const config: WorkflowConfig = { + name: 'it-phase1-2-multi', + description: 'Test', + maxIterations: 5, + initialStep: 'step', + steps: [ + makeStep('step', agentPath, [ + makeRule('Done', 'COMPLETE'), + ], { report: [{ label: 'Scope', path: 'scope.md' }, { label: 'Decisions', path: 'decisions.md' }] }), + ], + }; + + const engine = new WorkflowEngine(config, testDir, 'Test task', { + projectCwd: testDir, + provider: 'mock', + }); + + const state = await engine.run(); + + expect(state.status).toBe('completed'); + expect(mockRunReportPhase).toHaveBeenCalledTimes(1); + }); +}); + +describe('Three-Phase Execution IT: phase1 + phase3 (tag rules defined)', () => { + let testDir: string; + let agentPath: string; + + beforeEach(() => { + vi.clearAllMocks(); + const env = createTestEnv(); + testDir = env.dir; + agentPath = env.agentPath; + + mockNeedsStatusJudgmentPhase.mockReturnValue(true); + mockRunReportPhase.mockResolvedValue(undefined); + // Phase 3 returns content with a tag + mockRunStatusJudgmentPhase.mockResolvedValue('[STEP:1]'); + }); + + afterEach(() => { + resetScenario(); + rmSync(testDir, { recursive: true, force: true }); + }); + + it('should run Phase 1 + Phase 3 when step has tag-based rules but no report', async () => { + setMockScenario([ + // Phase 1: main content (no tag — Phase 3 will provide it) + { status: 'done', content: 'Agent completed the work.' }, + ]); + + const config: WorkflowConfig = { + name: 'it-phase1-3', + description: 'Test', + maxIterations: 5, + initialStep: 'step', + steps: [ + makeStep('step', agentPath, [ + makeRule('Done', 'COMPLETE'), + makeRule('Not done', 'ABORT'), + ]), + ], + }; + + const engine = new WorkflowEngine(config, testDir, 'Test task', { + projectCwd: testDir, + provider: 'mock', + }); + + const state = await engine.run(); + + expect(state.status).toBe('completed'); + expect(mockRunReportPhase).not.toHaveBeenCalled(); + expect(mockRunStatusJudgmentPhase).toHaveBeenCalledTimes(1); + }); +}); + +describe('Three-Phase Execution IT: all three phases', () => { + let testDir: string; + let agentPath: string; + + beforeEach(() => { + vi.clearAllMocks(); + const env = createTestEnv(); + testDir = env.dir; + agentPath = env.agentPath; + + mockNeedsStatusJudgmentPhase.mockReturnValue(true); + mockRunReportPhase.mockResolvedValue(undefined); + mockRunStatusJudgmentPhase.mockResolvedValue('[STEP:1]'); + }); + + afterEach(() => { + resetScenario(); + rmSync(testDir, { recursive: true, force: true }); + }); + + it('should run Phase 1 → Phase 2 → Phase 3 when step has report and tag rules', async () => { + setMockScenario([ + { status: 'done', content: 'Agent completed the work.' }, + ]); + + const config: WorkflowConfig = { + name: 'it-all-phases', + description: 'Test', + maxIterations: 5, + initialStep: 'step', + steps: [ + makeStep('step', agentPath, [ + makeRule('Done', 'COMPLETE'), + makeRule('Not done', 'ABORT'), + ], { report: 'test-report.md' }), + ], + }; + + const engine = new WorkflowEngine(config, testDir, 'Test task', { + projectCwd: testDir, + provider: 'mock', + }); + + const state = await engine.run(); + + expect(state.status).toBe('completed'); + expect(mockRunReportPhase).toHaveBeenCalledTimes(1); + expect(mockRunStatusJudgmentPhase).toHaveBeenCalledTimes(1); + + // Verify ordering: report phase is called before status judgment + const reportCallOrder = mockRunReportPhase.mock.invocationCallOrder[0]; + const judgmentCallOrder = mockRunStatusJudgmentPhase.mock.invocationCallOrder[0]; + expect(reportCallOrder).toBeLessThan(judgmentCallOrder); + }); +}); + +describe('Three-Phase Execution IT: phase3 tag → rule match', () => { + let testDir: string; + let agentPath: string; + + beforeEach(() => { + vi.clearAllMocks(); + const env = createTestEnv(); + testDir = env.dir; + agentPath = env.agentPath; + + mockNeedsStatusJudgmentPhase.mockReturnValue(true); + mockRunReportPhase.mockResolvedValue(undefined); + }); + + afterEach(() => { + resetScenario(); + rmSync(testDir, { recursive: true, force: true }); + }); + + it('should use Phase 3 tag for rule matching over Phase 1 content', async () => { + // Phase 1 content has no tag → fallback + setMockScenario([ + { status: 'done', content: 'Work done.' }, + { status: 'done', content: '[STEP2:1]\n\nChecked.' }, + ]); + + // Phase 3 returns rule 2 (ABORT) + mockRunStatusJudgmentPhase.mockResolvedValue('[STEP1:2]'); + + const config: WorkflowConfig = { + name: 'it-phase3-tag', + description: 'Test', + maxIterations: 5, + initialStep: 'step1', + steps: [ + makeStep('step1', agentPath, [ + makeRule('Done', 'step2'), + makeRule('Not done', 'ABORT'), + ]), + makeStep('step2', agentPath, [ + makeRule('Checked', 'COMPLETE'), + ]), + ], + }; + + const engine = new WorkflowEngine(config, testDir, 'Test task', { + projectCwd: testDir, + provider: 'mock', + }); + + const state = await engine.run(); + + // Phase 3 returned [STEP1:2] → rule index 1 → "Not done" → ABORT + expect(state.status).toBe('aborted'); + expect(state.iteration).toBe(1); + }); +}); diff --git a/src/__tests__/it-workflow-loader.test.ts b/src/__tests__/it-workflow-loader.test.ts new file mode 100644 index 0000000..396f541 --- /dev/null +++ b/src/__tests__/it-workflow-loader.test.ts @@ -0,0 +1,380 @@ +/** + * Workflow loader integration tests. + * + * Tests the 3-tier workflow resolution (project-local → user → builtin) + * and YAML parsing including special rule syntax (ai(), all(), any()). + * + * Mocked: globalConfig (for language/builtins) + * Not mocked: loadWorkflow, parseWorkflow, rule parsing + */ + +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import { mkdtempSync, mkdirSync, writeFileSync, rmSync, existsSync } from 'node:fs'; +import { join } from 'node:path'; +import { tmpdir } from 'node:os'; + +// --- Mocks --- + +vi.mock('../config/globalConfig.js', () => ({ + loadGlobalConfig: vi.fn().mockReturnValue({}), + getLanguage: vi.fn().mockReturnValue('en'), + getDisabledBuiltins: vi.fn().mockReturnValue([]), +})); + +// --- Imports (after mocks) --- + +import { loadWorkflow } from '../config/workflowLoader.js'; + +// --- Test helpers --- + +function createTestDir(): string { + const dir = mkdtempSync(join(tmpdir(), 'takt-it-wfl-')); + mkdirSync(join(dir, '.takt'), { recursive: true }); + return dir; +} + +describe('Workflow Loader IT: builtin workflow loading', () => { + let testDir: string; + + beforeEach(() => { + testDir = createTestDir(); + }); + + afterEach(() => { + rmSync(testDir, { recursive: true, force: true }); + }); + + const builtinNames = ['default', 'simple', 'expert', 'expert-cqrs', 'research', 'magi', 'review-only']; + + for (const name of builtinNames) { + it(`should load builtin workflow: ${name}`, () => { + const config = loadWorkflow(name, testDir); + + expect(config).not.toBeNull(); + expect(config!.name).toBe(name); + expect(config!.steps.length).toBeGreaterThan(0); + expect(config!.initialStep).toBeDefined(); + expect(config!.maxIterations).toBeGreaterThan(0); + }); + } + + it('should return null for non-existent workflow', () => { + const config = loadWorkflow('non-existent-workflow-xyz', testDir); + expect(config).toBeNull(); + }); +}); + +describe('Workflow Loader IT: project-local workflow override', () => { + let testDir: string; + + beforeEach(() => { + testDir = createTestDir(); + }); + + afterEach(() => { + rmSync(testDir, { recursive: true, force: true }); + }); + + it('should load project-local workflow from .takt/workflows/', () => { + const workflowsDir = join(testDir, '.takt', 'workflows'); + mkdirSync(workflowsDir, { recursive: true }); + + const agentsDir = join(testDir, 'agents'); + mkdirSync(agentsDir, { recursive: true }); + writeFileSync(join(agentsDir, 'custom.md'), 'Custom agent'); + + writeFileSync(join(workflowsDir, 'custom-wf.yaml'), ` +name: custom-wf +description: Custom project workflow +max_iterations: 5 +initial_step: start + +steps: + - name: start + agent: ./agents/custom.md + rules: + - condition: Done + next: COMPLETE + instruction: "Do the work" +`); + + const config = loadWorkflow('custom-wf', testDir); + + expect(config).not.toBeNull(); + expect(config!.name).toBe('custom-wf'); + expect(config!.steps.length).toBe(1); + expect(config!.steps[0]!.name).toBe('start'); + }); +}); + +describe('Workflow Loader IT: agent path resolution', () => { + let testDir: string; + + beforeEach(() => { + testDir = createTestDir(); + }); + + afterEach(() => { + rmSync(testDir, { recursive: true, force: true }); + }); + + it('should resolve relative agent paths from workflow YAML location', () => { + const config = loadWorkflow('simple', testDir); + expect(config).not.toBeNull(); + + for (const step of config!.steps) { + if (step.agentPath) { + // Agent paths should be resolved to absolute paths + expect(step.agentPath).toMatch(/^\//); + // Agent files should exist + expect(existsSync(step.agentPath)).toBe(true); + } + if (step.parallel) { + for (const sub of step.parallel) { + if (sub.agentPath) { + expect(sub.agentPath).toMatch(/^\//); + expect(existsSync(sub.agentPath)).toBe(true); + } + } + } + } + }); +}); + +describe('Workflow Loader IT: rule syntax parsing', () => { + let testDir: string; + + beforeEach(() => { + testDir = createTestDir(); + }); + + afterEach(() => { + rmSync(testDir, { recursive: true, force: true }); + }); + + it('should parse all() aggregate conditions from default workflow', () => { + const config = loadWorkflow('default', testDir); + expect(config).not.toBeNull(); + + // Find the parallel reviewers step + const reviewersStep = config!.steps.find( + (s) => s.parallel && s.parallel.length > 0, + ); + expect(reviewersStep).toBeDefined(); + + // Should have aggregate rules + const allRule = reviewersStep!.rules?.find( + (r) => r.isAggregateCondition && r.aggregateType === 'all', + ); + expect(allRule).toBeDefined(); + expect(allRule!.aggregateConditionText).toBe('approved'); + }); + + it('should parse any() aggregate conditions from default workflow', () => { + const config = loadWorkflow('default', testDir); + expect(config).not.toBeNull(); + + const reviewersStep = config!.steps.find( + (s) => s.parallel && s.parallel.length > 0, + ); + + const anyRule = reviewersStep!.rules?.find( + (r) => r.isAggregateCondition && r.aggregateType === 'any', + ); + expect(anyRule).toBeDefined(); + expect(anyRule!.aggregateConditionText).toBe('needs_fix'); + }); + + it('should parse standard rules with next step', () => { + const config = loadWorkflow('simple', testDir); + expect(config).not.toBeNull(); + + const planStep = config!.steps.find((s) => s.name === 'plan'); + expect(planStep).toBeDefined(); + expect(planStep!.rules).toBeDefined(); + expect(planStep!.rules!.length).toBeGreaterThan(0); + + // Each rule should have condition and next + for (const rule of planStep!.rules!) { + expect(typeof rule.condition).toBe('string'); + expect(rule.condition.length).toBeGreaterThan(0); + } + }); +}); + +describe('Workflow Loader IT: workflow config validation', () => { + let testDir: string; + + beforeEach(() => { + testDir = createTestDir(); + }); + + afterEach(() => { + rmSync(testDir, { recursive: true, force: true }); + }); + + it('should set max_iterations from YAML', () => { + const config = loadWorkflow('simple', testDir); + expect(config).not.toBeNull(); + expect(typeof config!.maxIterations).toBe('number'); + expect(config!.maxIterations).toBeGreaterThan(0); + }); + + it('should set initial_step from YAML', () => { + const config = loadWorkflow('simple', testDir); + expect(config).not.toBeNull(); + expect(typeof config!.initialStep).toBe('string'); + + // initial_step should reference an existing step + const stepNames = config!.steps.map((s) => s.name); + expect(stepNames).toContain(config!.initialStep); + }); + + it('should preserve edit property on steps (review-only has no edit: true)', () => { + const config = loadWorkflow('review-only', testDir); + expect(config).not.toBeNull(); + + // review-only: no step should have edit: true + for (const step of config!.steps) { + expect(step.edit).not.toBe(true); + if (step.parallel) { + for (const sub of step.parallel) { + expect(sub.edit).not.toBe(true); + } + } + } + + // expert: implement step should have edit: true + const expertConfig = loadWorkflow('expert', testDir); + expect(expertConfig).not.toBeNull(); + const implementStep = expertConfig!.steps.find((s) => s.name === 'implement'); + expect(implementStep).toBeDefined(); + expect(implementStep!.edit).toBe(true); + }); + + it('should set passPreviousResponse from YAML', () => { + const config = loadWorkflow('simple', testDir); + expect(config).not.toBeNull(); + + // At least some steps should have passPreviousResponse set + const stepsWithPassPrev = config!.steps.filter((s) => s.passPreviousResponse === true); + expect(stepsWithPassPrev.length).toBeGreaterThan(0); + }); +}); + +describe('Workflow Loader IT: parallel step loading', () => { + let testDir: string; + + beforeEach(() => { + testDir = createTestDir(); + }); + + afterEach(() => { + rmSync(testDir, { recursive: true, force: true }); + }); + + it('should load parallel sub-steps from default workflow', () => { + const config = loadWorkflow('default', testDir); + expect(config).not.toBeNull(); + + const parallelStep = config!.steps.find( + (s) => s.parallel && s.parallel.length > 0, + ); + expect(parallelStep).toBeDefined(); + expect(parallelStep!.parallel!.length).toBeGreaterThanOrEqual(2); + + // Each sub-step should have required fields + for (const sub of parallelStep!.parallel!) { + expect(sub.name).toBeDefined(); + expect(sub.agent).toBeDefined(); + expect(sub.rules).toBeDefined(); + } + }); + + it('should load 4 parallel reviewers from expert workflow', () => { + const config = loadWorkflow('expert', testDir); + expect(config).not.toBeNull(); + + const parallelStep = config!.steps.find( + (s) => s.parallel && s.parallel.length === 4, + ); + expect(parallelStep).toBeDefined(); + + const subNames = parallelStep!.parallel!.map((s) => s.name); + expect(subNames).toContain('arch-review'); + expect(subNames).toContain('frontend-review'); + expect(subNames).toContain('security-review'); + expect(subNames).toContain('qa-review'); + }); +}); + +describe('Workflow Loader IT: report config loading', () => { + let testDir: string; + + beforeEach(() => { + testDir = createTestDir(); + }); + + afterEach(() => { + rmSync(testDir, { recursive: true, force: true }); + }); + + it('should load single report config', () => { + const config = loadWorkflow('simple', testDir); + expect(config).not.toBeNull(); + + // simple workflow: plan step has a report config + const planStep = config!.steps.find((s) => s.name === 'plan'); + expect(planStep).toBeDefined(); + expect(planStep!.report).toBeDefined(); + }); + + it('should load multi-report config from expert workflow', () => { + const config = loadWorkflow('expert', testDir); + expect(config).not.toBeNull(); + + // implement step has multi-report: [Scope, Decisions] + const implementStep = config!.steps.find((s) => s.name === 'implement'); + expect(implementStep).toBeDefined(); + expect(implementStep!.report).toBeDefined(); + expect(Array.isArray(implementStep!.report)).toBe(true); + expect((implementStep!.report as unknown[]).length).toBe(2); + }); +}); + +describe('Workflow Loader IT: invalid YAML handling', () => { + let testDir: string; + + beforeEach(() => { + testDir = createTestDir(); + }); + + afterEach(() => { + rmSync(testDir, { recursive: true, force: true }); + }); + + it('should throw for workflow file with invalid YAML', () => { + const workflowsDir = join(testDir, '.takt', 'workflows'); + mkdirSync(workflowsDir, { recursive: true }); + + writeFileSync(join(workflowsDir, 'broken.yaml'), ` +name: broken +this is not: valid yaml: [[[[ + - bad: { +`); + + expect(() => loadWorkflow('broken', testDir)).toThrow(); + }); + + it('should throw for workflow missing required fields', () => { + const workflowsDir = join(testDir, '.takt', 'workflows'); + mkdirSync(workflowsDir, { recursive: true }); + + writeFileSync(join(workflowsDir, 'incomplete.yaml'), ` +name: incomplete +description: Missing steps +`); + + expect(() => loadWorkflow('incomplete', testDir)).toThrow(); + }); +}); diff --git a/src/__tests__/it-workflow-patterns.test.ts b/src/__tests__/it-workflow-patterns.test.ts new file mode 100644 index 0000000..601813c --- /dev/null +++ b/src/__tests__/it-workflow-patterns.test.ts @@ -0,0 +1,355 @@ +/** + * Workflow patterns integration tests. + * + * Tests that all builtin workflow definitions can be loaded and execute + * the expected step transitions using WorkflowEngine + MockProvider + ScenarioQueue. + * + * Mocked: UI, session, phase-runner, notifications, config, callAiJudge + * Not mocked: WorkflowEngine, runAgent, detectMatchedRule, rule-evaluator + */ + +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import { mkdtempSync, mkdirSync, rmSync } from 'node:fs'; +import { join } from 'node:path'; +import { tmpdir } from 'node:os'; +import { setMockScenario, resetScenario } from '../mock/scenario.js'; + +// --- Mocks --- + +vi.mock('../claude/client.js', async (importOriginal) => { + const original = await importOriginal(); + return { + ...original, + callAiJudge: vi.fn().mockResolvedValue(-1), + }; +}); + +vi.mock('../workflow/phase-runner.js', () => ({ + needsStatusJudgmentPhase: vi.fn().mockReturnValue(false), + runReportPhase: vi.fn().mockResolvedValue(undefined), + runStatusJudgmentPhase: vi.fn().mockResolvedValue(''), +})); + +vi.mock('../utils/session.js', () => ({ + generateReportDir: vi.fn().mockReturnValue('test-report-dir'), + generateSessionId: vi.fn().mockReturnValue('test-session-id'), +})); + +vi.mock('../config/globalConfig.js', () => ({ + loadGlobalConfig: vi.fn().mockReturnValue({}), + getLanguage: vi.fn().mockReturnValue('en'), + getDisabledBuiltins: vi.fn().mockReturnValue([]), +})); + +vi.mock('../config/projectConfig.js', () => ({ + loadProjectConfig: vi.fn().mockReturnValue({}), +})); + +// --- Imports (after mocks) --- + +import { WorkflowEngine } from '../workflow/engine.js'; +import { loadWorkflow } from '../config/workflowLoader.js'; +import type { WorkflowConfig } from '../models/types.js'; + +// --- Test helpers --- + +function createTestDir(): string { + const dir = mkdtempSync(join(tmpdir(), 'takt-it-wfp-')); + mkdirSync(join(dir, '.takt', 'reports', 'test-report-dir'), { recursive: true }); + return dir; +} + +function createEngine(config: WorkflowConfig, dir: string, task: string): WorkflowEngine { + return new WorkflowEngine(config, dir, task, { + projectCwd: dir, + provider: 'mock', + }); +} + +describe('Workflow Patterns IT: simple workflow', () => { + let testDir: string; + + beforeEach(() => { + vi.clearAllMocks(); + testDir = createTestDir(); + }); + + afterEach(() => { + resetScenario(); + rmSync(testDir, { recursive: true, force: true }); + }); + + it('should complete: plan → implement → ai_review → review → supervise → COMPLETE', async () => { + const config = loadWorkflow('simple', testDir); + expect(config).not.toBeNull(); + + setMockScenario([ + { agent: 'planner', status: 'done', content: '[PLAN:1]\n\nRequirements are clear.' }, + { agent: 'coder', status: 'done', content: '[IMPLEMENT:1]\n\nImplementation complete.' }, + { agent: 'ai-antipattern-reviewer', status: 'done', content: '[AI_REVIEW:1]\n\nNo AI-specific issues.' }, + { agent: 'architecture-reviewer', status: 'done', content: '[REVIEW:1]\n\nNo issues found.' }, + { agent: 'supervisor', status: 'done', content: '[SUPERVISE:1]\n\nAll checks passed.' }, + ]); + + const engine = createEngine(config!, testDir, 'Test task'); + const state = await engine.run(); + + expect(state.status).toBe('completed'); + expect(state.iteration).toBe(5); + }); + + it('should ABORT when plan returns rule 3 (requirements unclear)', async () => { + const config = loadWorkflow('simple', testDir); + + setMockScenario([ + { agent: 'planner', status: 'done', content: '[PLAN:3]\n\nRequirements unclear.' }, + ]); + + const engine = createEngine(config!, testDir, 'Vague task'); + const state = await engine.run(); + + expect(state.status).toBe('aborted'); + expect(state.iteration).toBe(1); + }); + + it('should COMPLETE when plan detects a question (rule 2)', async () => { + const config = loadWorkflow('simple', testDir); + + setMockScenario([ + { agent: 'planner', status: 'done', content: '[PLAN:2]\n\nUser is asking a question.' }, + ]); + + const engine = createEngine(config!, testDir, 'What is X?'); + const state = await engine.run(); + + expect(state.status).toBe('completed'); + expect(state.iteration).toBe(1); + }); +}); + +describe('Workflow Patterns IT: default workflow (parallel reviewers)', () => { + let testDir: string; + + beforeEach(() => { + vi.clearAllMocks(); + testDir = createTestDir(); + }); + + afterEach(() => { + resetScenario(); + rmSync(testDir, { recursive: true, force: true }); + }); + + it('should complete with all("approved") in parallel review step', async () => { + const config = loadWorkflow('default', testDir); + expect(config).not.toBeNull(); + + setMockScenario([ + { agent: 'planner', status: 'done', content: '[PLAN:1]\n\nClear.' }, + { agent: 'coder', status: 'done', content: '[IMPLEMENT:1]\n\nDone.' }, + { agent: 'ai-antipattern-reviewer', status: 'done', content: '[AI_REVIEW:1]\n\nNo issues.' }, + // Parallel reviewers: both approved + { agent: 'architecture-reviewer', status: 'done', content: '[ARCH-REVIEW:1]\n\napproved' }, + { agent: 'security-reviewer', status: 'done', content: '[SECURITY-REVIEW:1]\n\napproved' }, + // Supervisor + { agent: 'supervisor', status: 'done', content: '[SUPERVISE:1]\n\nAll checks passed.' }, + ]); + + const engine = createEngine(config!, testDir, 'Test task'); + const state = await engine.run(); + + expect(state.status).toBe('completed'); + }); + + it('should route to fix when any("needs_fix") in parallel review step', async () => { + const config = loadWorkflow('default', testDir); + + setMockScenario([ + { agent: 'planner', status: 'done', content: '[PLAN:1]\n\nClear.' }, + { agent: 'coder', status: 'done', content: '[IMPLEMENT:1]\n\nDone.' }, + { agent: 'ai-antipattern-reviewer', status: 'done', content: '[AI_REVIEW:1]\n\nNo issues.' }, + // Parallel: arch approved, security needs_fix + { agent: 'architecture-reviewer', status: 'done', content: '[ARCH-REVIEW:1]\n\napproved' }, + { agent: 'security-reviewer', status: 'done', content: '[SECURITY-REVIEW:2]\n\nneeds_fix' }, + // Fix step + { agent: 'coder', status: 'done', content: '[FIX:1]\n\nFix complete.' }, + // Re-review: both approved + { agent: 'architecture-reviewer', status: 'done', content: '[ARCH-REVIEW:1]\n\napproved' }, + { agent: 'security-reviewer', status: 'done', content: '[SECURITY-REVIEW:1]\n\napproved' }, + // Supervisor + { agent: 'supervisor', status: 'done', content: '[SUPERVISE:1]\n\nAll checks passed.' }, + ]); + + const engine = createEngine(config!, testDir, 'Task needing security fix'); + const state = await engine.run(); + + expect(state.status).toBe('completed'); + }); +}); + +describe('Workflow Patterns IT: research workflow', () => { + let testDir: string; + + beforeEach(() => { + vi.clearAllMocks(); + testDir = createTestDir(); + }); + + afterEach(() => { + resetScenario(); + rmSync(testDir, { recursive: true, force: true }); + }); + + it('should complete: plan → dig → supervise → COMPLETE', async () => { + const config = loadWorkflow('research', testDir); + expect(config).not.toBeNull(); + + setMockScenario([ + { agent: 'research/planner', status: 'done', content: '[PLAN:1]\n\nPlanning is complete.' }, + { agent: 'research/digger', status: 'done', content: '[DIG:1]\n\nResearch is complete.' }, + { agent: 'research/supervisor', status: 'done', content: '[SUPERVISE:1]\n\nAdequate.' }, + ]); + + const engine = createEngine(config!, testDir, 'Research topic X'); + const state = await engine.run(); + + expect(state.status).toBe('completed'); + expect(state.iteration).toBe(3); + }); + + it('should loop: plan → dig → supervise (insufficient) → plan → dig → supervise → COMPLETE', async () => { + const config = loadWorkflow('research', testDir); + + setMockScenario([ + { agent: 'research/planner', status: 'done', content: '[PLAN:1]\n\nPlanning is complete.' }, + { agent: 'research/digger', status: 'done', content: '[DIG:1]\n\nResearch is complete.' }, + { agent: 'research/supervisor', status: 'done', content: '[SUPERVISE:2]\n\nInsufficient.' }, + // Second pass + { agent: 'research/planner', status: 'done', content: '[PLAN:1]\n\nRevised plan.' }, + { agent: 'research/digger', status: 'done', content: '[DIG:1]\n\nMore research.' }, + { agent: 'research/supervisor', status: 'done', content: '[SUPERVISE:1]\n\nAdequate now.' }, + ]); + + const engine = createEngine(config!, testDir, 'Research topic X'); + const state = await engine.run(); + + expect(state.status).toBe('completed'); + expect(state.iteration).toBe(6); + }); +}); + +describe('Workflow Patterns IT: magi workflow', () => { + let testDir: string; + + beforeEach(() => { + vi.clearAllMocks(); + testDir = createTestDir(); + }); + + afterEach(() => { + resetScenario(); + rmSync(testDir, { recursive: true, force: true }); + }); + + it('should complete: melchior → balthasar → casper → COMPLETE', async () => { + const config = loadWorkflow('magi', testDir); + expect(config).not.toBeNull(); + + setMockScenario([ + { agent: 'magi/melchior', status: 'done', content: '[MELCHIOR:1]\n\nJudgment completed.' }, + { agent: 'magi/balthasar', status: 'done', content: '[BALTHASAR:1]\n\nJudgment completed.' }, + { agent: 'magi/casper', status: 'done', content: '[CASPER:1]\n\nFinal judgment completed.' }, + ]); + + const engine = createEngine(config!, testDir, 'Deliberation topic'); + const state = await engine.run(); + + expect(state.status).toBe('completed'); + expect(state.iteration).toBe(3); + }); +}); + +describe('Workflow Patterns IT: review-only workflow', () => { + let testDir: string; + + beforeEach(() => { + vi.clearAllMocks(); + testDir = createTestDir(); + }); + + afterEach(() => { + resetScenario(); + rmSync(testDir, { recursive: true, force: true }); + }); + + it('should complete: plan → reviewers (all approved) → supervise → COMPLETE', async () => { + const config = loadWorkflow('review-only', testDir); + expect(config).not.toBeNull(); + + setMockScenario([ + { agent: 'planner', status: 'done', content: '[PLAN:1]\n\nReview scope is clear.' }, + // Parallel reviewers: all approved + { agent: 'architecture-reviewer', status: 'done', content: '[ARCH-REVIEW:1]\n\napproved' }, + { agent: 'security-reviewer', status: 'done', content: '[SECURITY-REVIEW:1]\n\napproved' }, + { agent: 'ai-antipattern-reviewer', status: 'done', content: '[AI-REVIEW:1]\n\napproved' }, + // Supervisor: approved (local review, no PR) + { agent: 'supervisor', status: 'done', content: '[SUPERVISE:2]\n\napproved' }, + ]); + + const engine = createEngine(config!, testDir, 'Review the codebase'); + const state = await engine.run(); + + expect(state.status).toBe('completed'); + }); + + it('should verify no steps have edit: true', () => { + const config = loadWorkflow('review-only', testDir); + expect(config).not.toBeNull(); + + for (const step of config!.steps) { + expect(step.edit).not.toBe(true); + if (step.parallel) { + for (const subStep of step.parallel) { + expect(subStep.edit).not.toBe(true); + } + } + } + }); +}); + +describe('Workflow Patterns IT: expert workflow (4 parallel reviewers)', () => { + let testDir: string; + + beforeEach(() => { + vi.clearAllMocks(); + testDir = createTestDir(); + }); + + afterEach(() => { + resetScenario(); + rmSync(testDir, { recursive: true, force: true }); + }); + + it('should complete with all("approved") in 4-parallel review', async () => { + const config = loadWorkflow('expert', testDir); + expect(config).not.toBeNull(); + + setMockScenario([ + { agent: 'planner', status: 'done', content: '[PLAN:1]\n\nClear.' }, + { agent: 'coder', status: 'done', content: '[IMPLEMENT:1]\n\nDone.' }, + { agent: 'ai-antipattern-reviewer', status: 'done', content: '[AI_REVIEW:1]\n\nNo issues.' }, + // 4 parallel reviewers + { agent: 'architecture-reviewer', status: 'done', content: '[ARCH-REVIEW:1]\n\napproved' }, + { agent: 'expert/frontend-reviewer', status: 'done', content: '[FRONTEND-REVIEW:1]\n\napproved' }, + { agent: 'expert/security-reviewer', status: 'done', content: '[SECURITY-REVIEW:1]\n\napproved' }, + { agent: 'expert/qa-reviewer', status: 'done', content: '[QA-REVIEW:1]\n\napproved' }, + // Supervisor + { agent: 'expert/supervisor', status: 'done', content: '[SUPERVISE:1]\n\nAll validations pass.' }, + ]); + + const engine = createEngine(config!, testDir, 'Expert review task'); + const state = await engine.run(); + + expect(state.status).toBe('completed'); + }); +});