Merge pull request #41 from nrslib/issue/17-engine-integration-test
feat: WorkflowEngineのモックインテグレーションテスト追加 (#17)
This commit is contained in:
commit
1e1f2c0c58
204
CLAUDE.md
204
CLAUDE.md
@ -4,7 +4,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
|
|||||||
|
|
||||||
## Project Overview
|
## Project Overview
|
||||||
|
|
||||||
TAKT (Task Agent Koordination Tool) is a multi-agent orchestration system for Claude Code. It enables YAML-based workflow definitions that coordinate multiple AI agents through state machine transitions.
|
TAKT (Task Agent Koordination Tool) is a multi-agent orchestration system for Claude Code. It enables YAML-based workflow definitions that coordinate multiple AI agents through state machine transitions with rule-based routing.
|
||||||
|
|
||||||
## Development Commands
|
## Development Commands
|
||||||
|
|
||||||
@ -25,35 +25,75 @@ TAKT (Task Agent Koordination Tool) is a multi-agent orchestration system for Cl
|
|||||||
| `takt /watch` | | Watch `.takt/tasks/` and auto-execute tasks (resident process) |
|
| `takt /watch` | | Watch `.takt/tasks/` and auto-execute tasks (resident process) |
|
||||||
| `takt /add-task` | `/add` | Add a new task interactively (YAML format, multiline supported) |
|
| `takt /add-task` | `/add` | Add a new task interactively (YAML format, multiline supported) |
|
||||||
| `takt /list-tasks` | `/list` | List task branches (try merge, merge & cleanup, or delete) |
|
| `takt /list-tasks` | `/list` | List task branches (try merge, merge & cleanup, or delete) |
|
||||||
| `takt /switch` | | Switch workflow interactively |
|
| `takt /switch` | `/sw` | Switch workflow interactively |
|
||||||
| `takt /clear` | | Clear agent conversation sessions (reset state) |
|
| `takt /clear` | | Clear agent conversation sessions (reset state) |
|
||||||
|
| `takt /eject` | | Copy builtin workflow/agents to `~/.takt/` for customization |
|
||||||
| `takt /refresh-builtin` | | Update builtin resources from `resources/` to `~/.takt/` |
|
| `takt /refresh-builtin` | | Update builtin resources from `resources/` to `~/.takt/` |
|
||||||
| `takt /help` | | Show help message |
|
| `takt /help` | | Show help message |
|
||||||
| `takt /config` | | Display current configuration |
|
| `takt /config` | | Display current configuration |
|
||||||
|
|
||||||
|
GitHub issue references: `takt #6` fetches issue #6 and executes it as a task.
|
||||||
|
|
||||||
## Architecture
|
## Architecture
|
||||||
|
|
||||||
### Core Flow
|
### Core Flow
|
||||||
|
|
||||||
```
|
```
|
||||||
CLI (cli.ts)
|
CLI (cli.ts)
|
||||||
→ Slash commands (/run-tasks, /watch, /add-task, /list-tasks, /switch, /clear, /refresh-builtin, /help, /config)
|
→ Slash commands or executeTask()
|
||||||
→ or executeTask()
|
|
||||||
→ WorkflowEngine (workflow/engine.ts)
|
→ WorkflowEngine (workflow/engine.ts)
|
||||||
→ runAgent() (agents/runner.ts)
|
→ Per step: 3-phase execution
|
||||||
→ callClaude() (claude/client.ts)
|
Phase 1: runAgent() → main work
|
||||||
→ executeClaudeCli() (claude/process.ts)
|
Phase 2: runReportPhase() → report output (if step.report defined)
|
||||||
→ ClaudeProcess (claude-agent-sdk)
|
Phase 3: runStatusJudgmentPhase() → status tag output (if tag-based rules)
|
||||||
|
→ detectMatchedRule() → rule evaluation → determineNextStep()
|
||||||
|
→ Parallel steps: Promise.all() for sub-steps, aggregate evaluation
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Three-Phase Step Execution
|
||||||
|
|
||||||
|
Each step executes in up to 3 phases (session is resumed across phases):
|
||||||
|
|
||||||
|
| Phase | Purpose | Tools | When |
|
||||||
|
|-------|---------|-------|------|
|
||||||
|
| Phase 1 | Main work (coding, review, etc.) | Step's allowed_tools (Write excluded if report defined) | Always |
|
||||||
|
| Phase 2 | Report output | Write only | When `step.report` is defined |
|
||||||
|
| Phase 3 | Status judgment | None (judgment only) | When step has tag-based rules |
|
||||||
|
|
||||||
|
Phase 2/3 are implemented in `src/workflow/phase-runner.ts`. The session is resumed so the agent retains context from Phase 1.
|
||||||
|
|
||||||
|
### Rule Evaluation (5-Stage Fallback)
|
||||||
|
|
||||||
|
After step execution, rules are evaluated to determine the next step. Evaluation order (first match wins):
|
||||||
|
|
||||||
|
1. **Aggregate** (`all()`/`any()`) - For parallel parent steps
|
||||||
|
2. **Phase 3 tag** - `[STEP:N]` tag from status judgment output
|
||||||
|
3. **Phase 1 tag** - `[STEP:N]` tag from main execution output (fallback)
|
||||||
|
4. **AI judge (ai() only)** - AI evaluates `ai("condition text")` rules
|
||||||
|
5. **AI judge fallback** - AI evaluates ALL conditions as final resort
|
||||||
|
|
||||||
|
Implemented in `src/workflow/rule-evaluator.ts`. The matched method is tracked as `RuleMatchMethod` type.
|
||||||
|
|
||||||
### Key Components
|
### Key Components
|
||||||
|
|
||||||
**WorkflowEngine** (`src/workflow/engine.ts`)
|
**WorkflowEngine** (`src/workflow/engine.ts`)
|
||||||
- State machine that orchestrates agent execution via EventEmitter
|
- State machine that orchestrates agent execution via EventEmitter
|
||||||
- Manages step transitions based on agent response status
|
- Manages step transitions based on rule evaluation results
|
||||||
- Emits events: `step:start`, `step:complete`, `step:blocked`, `step:loop_detected`, `workflow:complete`, `workflow:abort`, `iteration:limit`
|
- Emits events: `step:start`, `step:complete`, `step:blocked`, `step:loop_detected`, `workflow:complete`, `workflow:abort`, `iteration:limit`
|
||||||
- Supports loop detection (`LoopDetector`) and iteration limits
|
- Supports loop detection (`LoopDetector`) and iteration limits
|
||||||
- Maintains agent sessions per step for conversation continuity
|
- Maintains agent sessions per step for conversation continuity
|
||||||
|
- Parallel step execution via `runParallelStep()` with `Promise.all()`
|
||||||
|
|
||||||
|
**Instruction Builder** (`src/workflow/instruction-builder.ts`)
|
||||||
|
- Auto-injects standard sections into every instruction (no need for `{task}` or `{previous_response}` placeholders in templates):
|
||||||
|
1. Execution context (working dir, edit permission rules)
|
||||||
|
2. Workflow context (iteration counts, report dir)
|
||||||
|
3. User request (`{task}` — auto-injected unless placeholder present)
|
||||||
|
4. Previous response (auto-injected if `pass_previous_response: true`)
|
||||||
|
5. User inputs (auto-injected unless `{user_inputs}` placeholder present)
|
||||||
|
6. `instruction_template` content
|
||||||
|
7. Status output rules (auto-injected for tag-based rules)
|
||||||
|
- Localized for `en` and `ja`
|
||||||
|
|
||||||
**Agent Runner** (`src/agents/runner.ts`)
|
**Agent Runner** (`src/agents/runner.ts`)
|
||||||
- Resolves agent specs (name or path) to agent configurations
|
- Resolves agent specs (name or path) to agent configurations
|
||||||
@ -63,17 +103,16 @@ CLI (cli.ts)
|
|||||||
- `supervisor`: Read/Glob/Grep/Bash/WebSearch/WebFetch
|
- `supervisor`: Read/Glob/Grep/Bash/WebSearch/WebFetch
|
||||||
- `planner`: Read/Glob/Grep/Bash/WebSearch/WebFetch
|
- `planner`: Read/Glob/Grep/Bash/WebSearch/WebFetch
|
||||||
- Custom agents via `.takt/agents.yaml` or prompt files (.md)
|
- Custom agents via `.takt/agents.yaml` or prompt files (.md)
|
||||||
- Supports Claude Code agents (`claudeAgent`) and skills (`claudeSkill`)
|
|
||||||
|
|
||||||
**Claude Integration** (`src/claude/`)
|
**Claude Integration** (`src/claude/`)
|
||||||
- `client.ts` - High-level API: `callClaude()`, `callClaudeCustom()`, `callClaudeAgent()`, `callClaudeSkill()`, status detection via regex patterns
|
- `client.ts` - High-level API: `callClaude()`, `callClaudeCustom()`, `callClaudeAgent()`, `callClaudeSkill()`
|
||||||
- `process.ts` - SDK wrapper with `ClaudeProcess` class, re-exports query management
|
- `process.ts` - SDK wrapper with `ClaudeProcess` class
|
||||||
- `executor.ts` - Query execution using `@anthropic-ai/claude-agent-sdk`
|
- `executor.ts` - Query execution using `@anthropic-ai/claude-agent-sdk`
|
||||||
- `query-manager.ts` - Concurrent query tracking with query IDs
|
- `query-manager.ts` - Concurrent query tracking with query IDs
|
||||||
|
|
||||||
**Configuration** (`src/config/`)
|
**Configuration** (`src/config/`)
|
||||||
- `loader.ts` - Custom agent loading from `.takt/agents.yaml`
|
- `loader.ts` - Custom agent loading from `.takt/agents.yaml`
|
||||||
- `workflowLoader.ts` - YAML workflow parsing with Zod validation (loads from `~/.takt/workflows/` only)
|
- `workflowLoader.ts` - YAML workflow parsing with Zod validation; resolves user workflows (`~/.takt/workflows/`) with builtin fallback (`resources/global/{lang}/workflows/`)
|
||||||
- `agentLoader.ts` - Agent prompt file loading
|
- `agentLoader.ts` - Agent prompt file loading
|
||||||
- `paths.ts` - Directory structure (`.takt/`, `~/.takt/`), session management
|
- `paths.ts` - Directory structure (`.takt/`, `~/.takt/`), session management
|
||||||
|
|
||||||
@ -82,74 +121,123 @@ CLI (cli.ts)
|
|||||||
- `watcher.ts` - TaskWatcher class for polling and auto-executing tasks (used by `/watch`)
|
- `watcher.ts` - TaskWatcher class for polling and auto-executing tasks (used by `/watch`)
|
||||||
- `index.ts` - Task operations (getNextTask, completeTask, addTask)
|
- `index.ts` - Task operations (getNextTask, completeTask, addTask)
|
||||||
|
|
||||||
|
**GitHub Integration** (`src/github/issue.ts`)
|
||||||
|
- Fetches issues via `gh` CLI, formats as task text with title/body/labels/comments
|
||||||
|
|
||||||
### Data Flow
|
### Data Flow
|
||||||
|
|
||||||
1. User provides task or slash command → CLI
|
1. User provides task (text or `#N` issue reference) or slash command → CLI
|
||||||
2. CLI loads workflow from `~/.takt/workflows/{name}.yaml`
|
2. CLI loads workflow: user `~/.takt/workflows/` → builtin `resources/global/{lang}/workflows/` fallback
|
||||||
3. WorkflowEngine starts at `initialStep`
|
3. WorkflowEngine starts at `initial_step`
|
||||||
4. Each step: `buildInstruction()` → `runStep()` → `runAgent()` → `callClaude()` → detect status → `determineNextStep()`
|
4. Each step: `buildInstruction()` → Phase 1 (main) → Phase 2 (report) → Phase 3 (status) → `detectMatchedRule()` → `determineNextStep()`
|
||||||
5. Status patterns (regex in `statusPatterns`) determine next step via `transitions`
|
5. Rule evaluation determines next step name
|
||||||
6. Special transitions: `COMPLETE` ends workflow successfully, `ABORT` ends with failure
|
6. Special transitions: `COMPLETE` ends workflow successfully, `ABORT` ends with failure
|
||||||
|
|
||||||
### Status Detection
|
|
||||||
|
|
||||||
Agents output status markers (e.g., `[CODER:DONE]`) that are matched against `GENERIC_STATUS_PATTERNS` in `src/models/schemas.ts`. Common statuses: `done`, `blocked`, `approved`, `rejected`, `improve`, `in_progress`, `interrupted`.
|
|
||||||
|
|
||||||
## Directory Structure
|
## Directory Structure
|
||||||
|
|
||||||
```
|
```
|
||||||
~/.takt/ # Global user config (created on first run)
|
~/.takt/ # Global user config (created on first run)
|
||||||
config.yaml # Trusted dirs, default workflow, log level, language
|
config.yaml # Trusted dirs, default workflow, log level, language
|
||||||
workflows/ # Workflow YAML files (required location)
|
workflows/ # User workflow YAML files (override builtins)
|
||||||
agents/ # Agent prompt files (.md)
|
agents/ # User agent prompt files (.md)
|
||||||
|
|
||||||
.takt/ # Project-level config
|
.takt/ # Project-level config
|
||||||
agents.yaml # Custom agent definitions
|
agents.yaml # Custom agent definitions
|
||||||
tasks/ # Task files for /run-tasks
|
tasks/ # Task files for /run-tasks
|
||||||
reports/ # Execution reports (auto-generated)
|
reports/ # Execution reports (auto-generated)
|
||||||
logs/ # Session logs (gitignored)
|
logs/ # Session logs in NDJSON format (gitignored)
|
||||||
|
|
||||||
resources/ # Bundled defaults (copied to ~/.takt on init)
|
resources/ # Bundled defaults (builtin, read from dist/ at runtime)
|
||||||
global/
|
global/
|
||||||
en/ # English agents and workflows
|
en/ # English agents and workflows
|
||||||
ja/ # Japanese agents and workflows
|
ja/ # Japanese agents and workflows
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Builtin resources are embedded in the npm package (`dist/resources/`). User files in `~/.takt/` take priority. Use `/eject` to copy builtins to `~/.takt/` for customization.
|
||||||
|
|
||||||
## Workflow YAML Schema
|
## Workflow YAML Schema
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
name: workflow-name
|
name: workflow-name
|
||||||
description: Optional description
|
description: Optional description
|
||||||
max_iterations: 10 # snake_case in YAML
|
max_iterations: 10
|
||||||
|
initial_step: plan # First step to execute
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
|
# Normal step
|
||||||
- name: step-name
|
- name: step-name
|
||||||
agent: ~/.takt/agents/default/coder.md # Path to agent prompt
|
agent: ../agents/default/coder.md # Path to agent prompt
|
||||||
agent_name: coder # Display name (optional)
|
agent_name: coder # Display name (optional)
|
||||||
provider: codex # claude|codex (optional)
|
provider: codex # claude|codex (optional)
|
||||||
model: opus # Model name (optional)
|
model: opus # Model name (optional)
|
||||||
|
edit: true # Whether step can edit files
|
||||||
|
permission_mode: acceptEdits # Tool permission mode (optional)
|
||||||
instruction_template: |
|
instruction_template: |
|
||||||
{task}
|
Custom instructions for this step.
|
||||||
{previous_response}
|
{task}, {previous_response} are auto-injected if not present as placeholders.
|
||||||
pass_previous_response: true # Default: true
|
pass_previous_response: true # Default: true
|
||||||
transitions:
|
report:
|
||||||
- condition: done
|
name: 01-plan.md # Report file name
|
||||||
next_step: next-step
|
format: | # Report format template
|
||||||
|
# Plan Report
|
||||||
|
...
|
||||||
|
rules:
|
||||||
|
- condition: "Human-readable condition"
|
||||||
|
next: next-step-name
|
||||||
|
- condition: ai("AI evaluates this condition text")
|
||||||
|
next: other-step
|
||||||
- condition: blocked
|
- condition: blocked
|
||||||
next_step: ABORT
|
next: ABORT
|
||||||
on_no_status: complete # complete|continue|stay
|
|
||||||
|
# Parallel step (sub-steps execute concurrently)
|
||||||
|
- name: reviewers
|
||||||
|
parallel:
|
||||||
|
- name: arch-review
|
||||||
|
agent: ../agents/default/architecture-reviewer.md
|
||||||
|
rules:
|
||||||
|
- condition: approved # next is optional for sub-steps
|
||||||
|
- condition: needs_fix
|
||||||
|
instruction_template: |
|
||||||
|
Review architecture...
|
||||||
|
- name: security-review
|
||||||
|
agent: ../agents/default/security-reviewer.md
|
||||||
|
rules:
|
||||||
|
- condition: approved
|
||||||
|
- condition: needs_fix
|
||||||
|
instruction_template: |
|
||||||
|
Review security...
|
||||||
|
rules: # Parent rules use aggregate conditions
|
||||||
|
- condition: all("approved")
|
||||||
|
next: supervise
|
||||||
|
- condition: any("needs_fix")
|
||||||
|
next: fix
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Key points about parallel steps:
|
||||||
|
- Sub-step `rules` define possible outcomes but `next` is ignored (parent handles routing)
|
||||||
|
- Parent `rules` use `all("X")`/`any("X")` to aggregate sub-step results
|
||||||
|
- `all("X")`: true if ALL sub-steps matched condition X
|
||||||
|
- `any("X")`: true if ANY sub-step matched condition X
|
||||||
|
|
||||||
|
### Rule Condition Types
|
||||||
|
|
||||||
|
| Type | Syntax | Evaluation |
|
||||||
|
|------|--------|------------|
|
||||||
|
| Tag-based | `"condition text"` | Agent outputs `[STEP:N]` tag, matched by index |
|
||||||
|
| AI judge | `ai("condition text")` | AI evaluates condition against agent output |
|
||||||
|
| Aggregate | `all("X")` / `any("X")` | Aggregates parallel sub-step matched conditions |
|
||||||
|
|
||||||
### Template Variables
|
### Template Variables
|
||||||
|
|
||||||
| Variable | Description |
|
| Variable | Description |
|
||||||
|----------|-------------|
|
|----------|-------------|
|
||||||
| `{task}` | Original user request |
|
| `{task}` | Original user request (auto-injected if not in template) |
|
||||||
| `{iteration}` | Current iteration number |
|
| `{iteration}` | Workflow-wide iteration count |
|
||||||
| `{max_iterations}` | Maximum iterations |
|
| `{max_iterations}` | Maximum iterations allowed |
|
||||||
| `{previous_response}` | Previous step output (requires `pass_previous_response: true`) |
|
| `{step_iteration}` | Per-step iteration count |
|
||||||
| `{user_inputs}` | Accumulated user inputs during workflow |
|
| `{previous_response}` | Previous step output (auto-injected if not in template) |
|
||||||
| `{report_dir}` | Report directory name (e.g., `20250126-143052-task-summary`) |
|
| `{user_inputs}` | Accumulated user inputs (auto-injected if not in template) |
|
||||||
|
| `{report_dir}` | Report directory name |
|
||||||
|
|
||||||
### Model Resolution
|
### Model Resolution
|
||||||
|
|
||||||
@ -166,24 +254,34 @@ provider: claude
|
|||||||
model: opus # Default model for all steps (unless overridden)
|
model: opus # Default model for all steps (unless overridden)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## NDJSON Session Logging
|
||||||
|
|
||||||
|
Session logs use NDJSON (`.jsonl`) format for real-time append-only writes. Record types:
|
||||||
|
|
||||||
|
| Record | Description |
|
||||||
|
|--------|-------------|
|
||||||
|
| `workflow_start` | Workflow initialization with task, workflow name |
|
||||||
|
| `step_start` | Step execution start |
|
||||||
|
| `step_complete` | Step result with status, content, matched rule info |
|
||||||
|
| `workflow_complete` | Successful completion |
|
||||||
|
| `workflow_abort` | Abort with reason |
|
||||||
|
|
||||||
|
Files: `.takt/logs/{sessionId}.jsonl`, with `latest.json` pointer. Legacy `.json` format is still readable via `loadSessionLog()`.
|
||||||
|
|
||||||
## TypeScript Notes
|
## TypeScript Notes
|
||||||
|
|
||||||
- ESM modules with `.js` extensions in imports
|
- ESM modules with `.js` extensions in imports
|
||||||
- Strict TypeScript with `noUncheckedIndexedAccess`
|
- Strict TypeScript with `noUncheckedIndexedAccess`
|
||||||
- Zod schemas (v4 syntax) for runtime validation (`src/models/schemas.ts`)
|
- Zod schemas for runtime validation (`src/models/schemas.ts`)
|
||||||
- Uses `@anthropic-ai/claude-agent-sdk` for Claude integration
|
- Uses `@anthropic-ai/claude-agent-sdk` for Claude integration
|
||||||
|
|
||||||
## Design Principles
|
## Design Principles
|
||||||
|
|
||||||
**Keep commands minimal.** One command per concept. Use arguments/modes instead of multiple similar commands. Before adding a new command, consider if existing commands can be extended.
|
**Keep commands minimal.** One command per concept. Use arguments/modes instead of multiple similar commands. Before adding a new command, consider if existing commands can be extended.
|
||||||
|
|
||||||
**Do NOT expand schemas carelessly.** The `TransitionConditionSchema` defines allowed condition values for workflow transitions. Do NOT add new values without strong justification. Use existing values creatively:
|
**Do NOT expand schemas carelessly.** Rule conditions are free-form text (not enum-restricted). However, the engine's behavior depends on specific patterns (`ai()`, `all()`, `any()`). Do not add new special syntax without updating the loader's regex parsing in `workflowLoader.ts`.
|
||||||
- `done` - Task completed (minor fixes, successful completion)
|
|
||||||
- `blocked` - Cannot proceed (needs plan rework)
|
**Instruction auto-injection over explicit placeholders.** The instruction builder auto-injects `{task}`, `{previous_response}`, `{user_inputs}`, and status rules. Templates should contain only step-specific instructions, not boilerplate.
|
||||||
- `approved` - Review passed
|
|
||||||
- `rejected` - Review failed, needs major rework
|
|
||||||
- `improve` - Needs improvement (security concerns, quality issues)
|
|
||||||
- `always` - Unconditional transition
|
|
||||||
|
|
||||||
## Isolated Execution (Shared Clone)
|
## Isolated Execution (Shared Clone)
|
||||||
|
|
||||||
|
|||||||
248
README.md
248
README.md
@ -26,6 +26,9 @@ npm install -g takt
|
|||||||
# Run a task (will prompt for workflow selection and optional isolated clone)
|
# Run a task (will prompt for workflow selection and optional isolated clone)
|
||||||
takt "Add a login feature"
|
takt "Add a login feature"
|
||||||
|
|
||||||
|
# Run a GitHub issue as a task
|
||||||
|
takt "#6"
|
||||||
|
|
||||||
# Add a task to the queue
|
# Add a task to the queue
|
||||||
takt /add-task "Fix the login bug"
|
takt /add-task "Fix the login bug"
|
||||||
|
|
||||||
@ -75,7 +78,7 @@ Choose `y` to run in a `git clone --shared` isolated environment, keeping your w
|
|||||||
|
|
||||||
| Workflow | Best for |
|
| Workflow | Best for |
|
||||||
|----------|----------|
|
|----------|----------|
|
||||||
| `default` | Full development tasks. Used for TAKT's own development. Multi-stage review with fix loops. |
|
| `default` | Full development tasks. Used for TAKT's own development. Multi-stage review with parallel architect + security review. |
|
||||||
| `simple` | Lightweight tasks like README updates or small fixes. Reviews without fix loops. |
|
| `simple` | Lightweight tasks like README updates or small fixes. Reviews without fix loops. |
|
||||||
| `expert-review` / `expert-cqrs` | Web development projects. Multi-expert review (CQRS, Frontend, Security, QA). |
|
| `expert-review` / `expert-cqrs` | Web development projects. Multi-expert review (CQRS, Frontend, Security, QA). |
|
||||||
| `research` | Research and investigation. Autonomous research without asking questions. |
|
| `research` | Research and investigation. Autonomous research without asking questions. |
|
||||||
@ -86,67 +89,111 @@ Choose `y` to run in a `git clone --shared` isolated environment, keeping your w
|
|||||||
| Command | Alias | Description |
|
| Command | Alias | Description |
|
||||||
|---------|-------|-------------|
|
|---------|-------|-------------|
|
||||||
| `takt "task"` | | Execute task with current workflow (session auto-continued) |
|
| `takt "task"` | | Execute task with current workflow (session auto-continued) |
|
||||||
|
| `takt "#N"` | | Execute GitHub issue #N as a task |
|
||||||
| `takt /run-tasks` | `/run` | Run all pending tasks from `.takt/tasks/` |
|
| `takt /run-tasks` | `/run` | Run all pending tasks from `.takt/tasks/` |
|
||||||
| `takt /watch` | | Watch `.takt/tasks/` and auto-execute tasks (stays resident) |
|
| `takt /watch` | | Watch `.takt/tasks/` and auto-execute tasks (stays resident) |
|
||||||
| `takt /add-task` | `/add` | Add a new task interactively (YAML format, multiline supported) |
|
| `takt /add-task` | `/add` | Add a new task interactively (YAML format, multiline supported) |
|
||||||
| `takt /list-tasks` | `/list` | List task branches (try merge, merge & cleanup, or delete) |
|
| `takt /list-tasks` | `/list` | List task branches (try merge, merge & cleanup, or delete) |
|
||||||
| `takt /switch` | `/sw` | Switch workflow interactively |
|
| `takt /switch` | `/sw` | Switch workflow interactively |
|
||||||
| `takt /clear` | | Clear agent conversation sessions |
|
| `takt /clear` | | Clear agent conversation sessions |
|
||||||
|
| `takt /eject` | | Copy builtin workflow/agents to `~/.takt/` for customization |
|
||||||
| `takt /refresh-builtin` | | Update builtin agents/workflows to latest version |
|
| `takt /refresh-builtin` | | Update builtin agents/workflows to latest version |
|
||||||
| `takt /config` | | Configure permission mode |
|
| `takt /config` | | Configure permission mode |
|
||||||
| `takt /help` | | Show help |
|
| `takt /help` | | Show help |
|
||||||
|
|
||||||
## Workflows
|
## Workflows
|
||||||
|
|
||||||
TAKT uses YAML-based workflow definitions. Place them in:
|
TAKT uses YAML-based workflow definitions with rule-based routing. Builtin workflows are embedded in the package; user workflows in `~/.takt/workflows/` take priority. Use `/eject` to copy a builtin to `~/.takt/` for customization.
|
||||||
- `~/.takt/workflows/*.yaml`
|
|
||||||
|
|
||||||
### Example Workflow
|
### Example Workflow
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
name: default
|
name: default
|
||||||
max_iterations: 10
|
max_iterations: 10
|
||||||
|
initial_step: plan
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: plan
|
- name: plan
|
||||||
agent: planner
|
agent: ../agents/default/planner.md
|
||||||
provider: claude # Optional: claude or codex
|
model: opus
|
||||||
model: opus # Claude: opus/sonnet/haiku, Codex: gpt-5.2-codex/gpt-5.1-codex
|
edit: false
|
||||||
|
rules:
|
||||||
|
- condition: Plan complete
|
||||||
|
next: implement
|
||||||
instruction_template: |
|
instruction_template: |
|
||||||
{task}
|
Analyze the request and create an implementation plan.
|
||||||
transitions:
|
|
||||||
- condition: done
|
|
||||||
next_step: implement
|
|
||||||
|
|
||||||
- name: implement
|
- name: implement
|
||||||
agent: coder
|
agent: ../agents/default/coder.md
|
||||||
provider: codex
|
edit: true
|
||||||
model: gpt-5.2-codex # Codex model example
|
permission_mode: acceptEdits
|
||||||
|
rules:
|
||||||
|
- condition: Implementation complete
|
||||||
|
next: review
|
||||||
|
- condition: Cannot proceed
|
||||||
|
next: ABORT
|
||||||
instruction_template: |
|
instruction_template: |
|
||||||
{task}
|
Implement based on the plan.
|
||||||
transitions:
|
|
||||||
- condition: done
|
|
||||||
next_step: review
|
|
||||||
- condition: blocked
|
|
||||||
next_step: ABORT
|
|
||||||
|
|
||||||
- name: review
|
- name: review
|
||||||
agent: architect
|
agent: ../agents/default/architecture-reviewer.md
|
||||||
model: sonnet # Model alias (no provider = uses global default)
|
edit: false
|
||||||
transitions:
|
rules:
|
||||||
- condition: approved
|
- condition: Approved
|
||||||
next_step: COMPLETE
|
next: COMPLETE
|
||||||
- condition: rejected
|
- condition: Needs fix
|
||||||
next_step: implement
|
next: implement
|
||||||
|
instruction_template: |
|
||||||
|
Review the implementation for architecture and code quality.
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Parallel Steps
|
||||||
|
|
||||||
|
Steps can execute sub-steps concurrently with aggregate evaluation:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
- name: reviewers
|
||||||
|
parallel:
|
||||||
|
- name: arch-review
|
||||||
|
agent: ../agents/default/architecture-reviewer.md
|
||||||
|
rules:
|
||||||
|
- condition: approved
|
||||||
|
- condition: needs_fix
|
||||||
|
instruction_template: |
|
||||||
|
Review architecture and code quality.
|
||||||
|
- name: security-review
|
||||||
|
agent: ../agents/default/security-reviewer.md
|
||||||
|
rules:
|
||||||
|
- condition: approved
|
||||||
|
- condition: needs_fix
|
||||||
|
instruction_template: |
|
||||||
|
Review for security vulnerabilities.
|
||||||
|
rules:
|
||||||
|
- condition: all("approved")
|
||||||
|
next: supervise
|
||||||
|
- condition: any("needs_fix")
|
||||||
|
next: fix
|
||||||
|
```
|
||||||
|
|
||||||
|
- `all("X")`: true if ALL sub-steps matched condition X
|
||||||
|
- `any("X")`: true if ANY sub-step matched condition X
|
||||||
|
- Sub-step `rules` define possible outcomes; `next` is optional (parent handles routing)
|
||||||
|
|
||||||
|
### Rule Condition Types
|
||||||
|
|
||||||
|
| Type | Syntax | Description |
|
||||||
|
|------|--------|-------------|
|
||||||
|
| Tag-based | `"condition text"` | Agent outputs `[STEP:N]` tag, matched by index |
|
||||||
|
| AI judge | `ai("condition text")` | AI evaluates the condition against agent output |
|
||||||
|
| Aggregate | `all("X")` / `any("X")` | Aggregates parallel sub-step results |
|
||||||
|
|
||||||
## Built-in Workflows
|
## Built-in Workflows
|
||||||
|
|
||||||
TAKT ships with several built-in workflows:
|
TAKT ships with several built-in workflows:
|
||||||
|
|
||||||
| Workflow | Description |
|
| Workflow | Description |
|
||||||
|----------|-------------|
|
|----------|-------------|
|
||||||
| `default` | Full development workflow: plan → implement → architect review → AI review → security review → supervisor approval. Includes fix loops for each review stage. |
|
| `default` | Full development workflow: plan → implement → AI review → parallel reviewers (architect + security) → supervisor approval. Includes fix loops for each review stage. |
|
||||||
| `simple` | Simplified version of default: plan → implement → architect review → AI review → supervisor. No intermediate fix steps. |
|
| `simple` | Simplified version of default: plan → implement → architect review → AI review → supervisor. No intermediate fix steps. |
|
||||||
| `research` | Research workflow: planner → digger → supervisor. Autonomously researches topics without asking questions. |
|
| `research` | Research workflow: planner → digger → supervisor. Autonomously researches topics without asking questions. |
|
||||||
| `expert-review` | Comprehensive review with domain experts: CQRS+ES, Frontend, AI, Security, QA reviews with fix loops. |
|
| `expert-review` | Comprehensive review with domain experts: CQRS+ES, Frontend, AI, Security, QA reviews with fix loops. |
|
||||||
@ -158,9 +205,9 @@ Switch between workflows with `takt /switch`.
|
|||||||
## Built-in Agents
|
## Built-in Agents
|
||||||
|
|
||||||
- **coder** - Implements features and fixes bugs
|
- **coder** - Implements features and fixes bugs
|
||||||
- **architect** - Reviews code and provides feedback
|
- **architect** - Reviews architecture and code quality, verifies spec compliance
|
||||||
- **supervisor** - Final verification and approval
|
- **supervisor** - Final verification, validation, and approval
|
||||||
- **planner** - Task analysis and implementation planning
|
- **planner** - Task analysis, spec investigation, and implementation planning
|
||||||
- **ai-reviewer** - AI-generated code quality review
|
- **ai-reviewer** - AI-generated code quality review
|
||||||
- **security** - Security vulnerability assessment
|
- **security** - Security vulnerability assessment
|
||||||
|
|
||||||
@ -175,14 +222,19 @@ agents:
|
|||||||
allowed_tools: [Read, Glob, Grep]
|
allowed_tools: [Read, Glob, Grep]
|
||||||
provider: claude # Optional: claude or codex
|
provider: claude # Optional: claude or codex
|
||||||
model: opus # Claude: opus/sonnet/haiku or full name (claude-opus-4-5-20251101)
|
model: opus # Claude: opus/sonnet/haiku or full name (claude-opus-4-5-20251101)
|
||||||
status_patterns:
|
```
|
||||||
approved: "\\[APPROVE\\]"
|
|
||||||
rejected: "\\[REJECT\\]"
|
|
||||||
|
|
||||||
- name: my-codex-agent
|
Or create agent prompt files as Markdown:
|
||||||
prompt_file: .takt/prompts/analyzer.md
|
|
||||||
provider: codex
|
```markdown
|
||||||
model: gpt-5.2-codex # Codex: gpt-5.2-codex, gpt-5.1-codex, etc.
|
# ~/.takt/agents/my-agents/reviewer.md
|
||||||
|
|
||||||
|
You are a code reviewer focused on security.
|
||||||
|
|
||||||
|
## Your Role
|
||||||
|
- Check for security vulnerabilities
|
||||||
|
- Verify input validation
|
||||||
|
- Review authentication logic
|
||||||
```
|
```
|
||||||
|
|
||||||
## Model Selection
|
## Model Selection
|
||||||
@ -217,22 +269,22 @@ Available Codex models:
|
|||||||
```
|
```
|
||||||
~/.takt/
|
~/.takt/
|
||||||
├── config.yaml # Global config (provider, model, workflows, etc.)
|
├── config.yaml # Global config (provider, model, workflows, etc.)
|
||||||
├── workflows/ # Workflow definitions
|
├── workflows/ # User workflow definitions (override builtins)
|
||||||
└── agents/ # Agent prompt files
|
└── agents/ # User agent prompt files
|
||||||
|
|
||||||
.takt/ # Project-level config
|
.takt/ # Project-level config
|
||||||
├── agents.yaml # Custom agent definitions
|
├── agents.yaml # Custom agent definitions
|
||||||
├── tasks/ # Pending task files (.yaml, .md)
|
├── tasks/ # Pending task files (.yaml, .md)
|
||||||
├── completed/ # Completed tasks with reports
|
├── completed/ # Completed tasks with reports
|
||||||
├── worktree-meta/ # Metadata for task branches
|
|
||||||
├── worktree-sessions/ # Per-clone agent session storage
|
|
||||||
├── reports/ # Execution reports (auto-generated)
|
├── reports/ # Execution reports (auto-generated)
|
||||||
└── logs/ # Session logs (incremental)
|
└── logs/ # Session logs in NDJSON format
|
||||||
├── latest.json # Pointer to current/latest session
|
├── latest.json # Pointer to current/latest session
|
||||||
├── previous.json # Pointer to previous session
|
├── previous.json # Pointer to previous session
|
||||||
└── {sessionId}.json # Full session log per workflow run
|
└── {sessionId}.jsonl # NDJSON session log per workflow run
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Builtin resources are embedded in the npm package (`dist/resources/`). User files in `~/.takt/` take priority.
|
||||||
|
|
||||||
### Global Configuration
|
### Global Configuration
|
||||||
|
|
||||||
Configure default provider and model in `~/.takt/config.yaml`:
|
Configure default provider and model in `~/.takt/config.yaml`:
|
||||||
@ -268,67 +320,59 @@ This interactive flow ensures each task runs with the right workflow and isolati
|
|||||||
|
|
||||||
### Adding Custom Workflows
|
### Adding Custom Workflows
|
||||||
|
|
||||||
Create your own workflow by adding YAML files to `~/.takt/workflows/`:
|
Create your own workflow by adding YAML files to `~/.takt/workflows/`, or use `/eject` to customize a builtin:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Copy the default workflow to ~/.takt/workflows/ for editing
|
||||||
|
takt /eject default
|
||||||
|
```
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
# ~/.takt/workflows/my-workflow.yaml
|
# ~/.takt/workflows/my-workflow.yaml
|
||||||
name: my-workflow
|
name: my-workflow
|
||||||
description: My custom workflow
|
description: My custom workflow
|
||||||
|
|
||||||
max_iterations: 5
|
max_iterations: 5
|
||||||
|
initial_step: analyze
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: analyze
|
- name: analyze
|
||||||
agent: ~/.takt/agents/my-agents/analyzer.md
|
agent: ~/.takt/agents/my-agents/analyzer.md
|
||||||
|
edit: false
|
||||||
|
rules:
|
||||||
|
- condition: Analysis complete
|
||||||
|
next: implement
|
||||||
instruction_template: |
|
instruction_template: |
|
||||||
Analyze this request: {task}
|
Analyze this request thoroughly.
|
||||||
transitions:
|
|
||||||
- condition: done
|
|
||||||
next_step: implement
|
|
||||||
|
|
||||||
- name: implement
|
- name: implement
|
||||||
agent: ~/.takt/agents/default/coder.md
|
agent: ~/.takt/agents/default/coder.md
|
||||||
instruction_template: |
|
edit: true
|
||||||
Implement based on the analysis: {previous_response}
|
permission_mode: acceptEdits
|
||||||
pass_previous_response: true
|
pass_previous_response: true
|
||||||
transitions:
|
rules:
|
||||||
- condition: done
|
- condition: Done
|
||||||
next_step: COMPLETE
|
next: COMPLETE
|
||||||
|
instruction_template: |
|
||||||
|
Implement based on the analysis.
|
||||||
```
|
```
|
||||||
|
|
||||||
|
> **Note**: `{task}`, `{previous_response}`, and `{user_inputs}` are auto-injected into instructions. You only need explicit placeholders if you want to control their position in the template.
|
||||||
|
|
||||||
### Specifying Agents by Path
|
### Specifying Agents by Path
|
||||||
|
|
||||||
Agents are specified using file paths in workflow definitions:
|
Agents are specified using file paths in workflow definitions:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
# Use built-in agents
|
# Relative to workflow file directory
|
||||||
|
agent: ../agents/default/coder.md
|
||||||
|
|
||||||
|
# Home directory
|
||||||
agent: ~/.takt/agents/default/coder.md
|
agent: ~/.takt/agents/default/coder.md
|
||||||
agent: ~/.takt/agents/magi/melchior.md
|
|
||||||
|
|
||||||
# Use project-local agents
|
# Absolute paths
|
||||||
agent: ./.takt/agents/my-reviewer.md
|
|
||||||
|
|
||||||
# Use absolute paths
|
|
||||||
agent: /path/to/custom/agent.md
|
agent: /path/to/custom/agent.md
|
||||||
```
|
```
|
||||||
|
|
||||||
Create custom agent prompts as Markdown files:
|
|
||||||
|
|
||||||
```markdown
|
|
||||||
# ~/.takt/agents/my-agents/reviewer.md
|
|
||||||
|
|
||||||
You are a code reviewer focused on security.
|
|
||||||
|
|
||||||
## Your Role
|
|
||||||
- Check for security vulnerabilities
|
|
||||||
- Verify input validation
|
|
||||||
- Review authentication logic
|
|
||||||
|
|
||||||
## Output Format
|
|
||||||
- [REVIEWER:APPROVE] if code is secure
|
|
||||||
- [REVIEWER:REJECT] if issues found (list them)
|
|
||||||
```
|
|
||||||
|
|
||||||
### Task Management
|
### Task Management
|
||||||
|
|
||||||
TAKT supports batch task processing through task files in `.takt/tasks/`. Both `.yaml`/`.yml` and `.md` file formats are supported.
|
TAKT supports batch task processing through task files in `.takt/tasks/`. Both `.yaml`/`.yml` and `.md` file formats are supported.
|
||||||
@ -339,6 +383,9 @@ TAKT supports batch task processing through task files in `.takt/tasks/`. Both `
|
|||||||
# Quick add (no isolation)
|
# Quick add (no isolation)
|
||||||
takt /add-task "Add authentication feature"
|
takt /add-task "Add authentication feature"
|
||||||
|
|
||||||
|
# Add a GitHub issue as a task
|
||||||
|
takt /add-task "#6"
|
||||||
|
|
||||||
# Interactive mode (prompts for isolation, branch, workflow options)
|
# Interactive mode (prompts for isolation, branch, workflow options)
|
||||||
takt /add-task
|
takt /add-task
|
||||||
```
|
```
|
||||||
@ -416,11 +463,13 @@ Lists all `takt/`-prefixed branches with file change counts. For each branch you
|
|||||||
|
|
||||||
### Session Logs
|
### Session Logs
|
||||||
|
|
||||||
TAKT writes session logs incrementally to `.takt/logs/`. Logs are saved at workflow start, after each step, and at workflow end — so even if the process crashes mid-execution, partial logs are preserved.
|
TAKT writes session logs in NDJSON (`.jsonl`) format to `.takt/logs/`. Each record is appended atomically, so even if the process crashes mid-execution, partial logs are preserved and logs can be tailed in real-time with `tail -f`.
|
||||||
|
|
||||||
- `.takt/logs/latest.json` - Pointer to the current (or most recent) session
|
- `.takt/logs/latest.json` - Pointer to the current (or most recent) session
|
||||||
- `.takt/logs/previous.json` - Pointer to the previous session
|
- `.takt/logs/previous.json` - Pointer to the previous session
|
||||||
- `.takt/logs/{sessionId}.json` - Full session log with step history
|
- `.takt/logs/{sessionId}.jsonl` - NDJSON session log with step history
|
||||||
|
|
||||||
|
Record types: `workflow_start`, `step_start`, `step_complete`, `workflow_complete`, `workflow_abort`.
|
||||||
|
|
||||||
Agents can read `previous.json` to pick up context from a prior run. Session continuity is automatic — simply run `takt "task"` to continue where the previous session left off.
|
Agents can read `previous.json` to pick up context from a prior run. Session continuity is automatic — simply run `takt "task"` to continue where the previous session left off.
|
||||||
|
|
||||||
@ -430,57 +479,48 @@ Available variables in `instruction_template`:
|
|||||||
|
|
||||||
| Variable | Description |
|
| Variable | Description |
|
||||||
|----------|-------------|
|
|----------|-------------|
|
||||||
| `{task}` | Original user request |
|
| `{task}` | Original user request (auto-injected if not in template) |
|
||||||
| `{iteration}` | Workflow-wide turn count (total steps executed) |
|
| `{iteration}` | Workflow-wide turn count (total steps executed) |
|
||||||
| `{max_iterations}` | Maximum iterations allowed |
|
| `{max_iterations}` | Maximum iterations allowed |
|
||||||
| `{step_iteration}` | Per-step iteration count (how many times THIS step has run) |
|
| `{step_iteration}` | Per-step iteration count (how many times THIS step has run) |
|
||||||
| `{previous_response}` | Previous step's output (requires `pass_previous_response: true`) |
|
| `{previous_response}` | Previous step's output (auto-injected if not in template) |
|
||||||
| `{user_inputs}` | Additional user inputs during workflow |
|
| `{user_inputs}` | Additional user inputs during workflow (auto-injected if not in template) |
|
||||||
| `{report_dir}` | Report directory name (e.g., `20250126-143052-task-summary`) |
|
| `{report_dir}` | Report directory name (e.g., `20250126-143052-task-summary`) |
|
||||||
|
|
||||||
### Designing Workflows
|
### Designing Workflows
|
||||||
|
|
||||||
Each workflow step requires three key elements:
|
Each workflow step requires:
|
||||||
|
|
||||||
**1. Agent** - A Markdown file containing the system prompt:
|
**1. Agent** - A Markdown file containing the system prompt:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
agent: ~/.takt/agents/default/coder.md # Path to agent prompt file
|
agent: ../agents/default/coder.md # Path to agent prompt file
|
||||||
agent_name: coder # Display name (optional)
|
agent_name: coder # Display name (optional)
|
||||||
```
|
```
|
||||||
|
|
||||||
**2. Status Rules** - Define how the agent signals completion. Agents output status markers like `[CODER:DONE]` or `[ARCHITECT:REJECT]` that TAKT detects to drive transitions:
|
**2. Rules** - Define how the step routes to the next step. The instruction builder auto-injects status output rules so agents know what tags to output:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
status_rules_prompt: |
|
rules:
|
||||||
Your final output MUST include a status tag:
|
- condition: "Implementation complete"
|
||||||
- `[CODER:DONE]` if implementation is complete
|
next: review
|
||||||
- `[CODER:BLOCKED]` if you cannot proceed
|
- condition: "Cannot proceed"
|
||||||
|
next: ABORT
|
||||||
```
|
```
|
||||||
|
|
||||||
**3. Transitions** - Route to the next step based on status:
|
Special `next` values: `COMPLETE` (success), `ABORT` (failure).
|
||||||
|
|
||||||
```yaml
|
**3. Step options:**
|
||||||
transitions:
|
|
||||||
- condition: done # Maps to status tag DONE
|
|
||||||
next_step: review # Go to review step
|
|
||||||
- condition: blocked # Maps to status tag BLOCKED
|
|
||||||
next_step: ABORT # End workflow with failure
|
|
||||||
```
|
|
||||||
|
|
||||||
Available transition conditions: `done`, `blocked`, `approved`, `rejected`, `improve`, `answer`, `always`.
|
|
||||||
Special next_step values: `COMPLETE` (success), `ABORT` (failure).
|
|
||||||
|
|
||||||
**Step options:**
|
|
||||||
|
|
||||||
| Option | Default | Description |
|
| Option | Default | Description |
|
||||||
|--------|---------|-------------|
|
|--------|---------|-------------|
|
||||||
|
| `edit` | - | Whether the step can edit project files (`true`/`false`) |
|
||||||
| `pass_previous_response` | `true` | Pass previous step's output to `{previous_response}` |
|
| `pass_previous_response` | `true` | Pass previous step's output to `{previous_response}` |
|
||||||
| `on_no_status` | - | Behavior when no status is detected: `complete`, `continue`, `stay` |
|
|
||||||
| `allowed_tools` | - | List of tools the agent can use (Read, Glob, Grep, Edit, Write, Bash, etc.) |
|
| `allowed_tools` | - | List of tools the agent can use (Read, Glob, Grep, Edit, Write, Bash, etc.) |
|
||||||
| `provider` | - | Override provider for this step (`claude` or `codex`) |
|
| `provider` | - | Override provider for this step (`claude` or `codex`) |
|
||||||
| `model` | - | Override model for this step |
|
| `model` | - | Override model for this step |
|
||||||
| `permission_mode` | `default` | Permission mode: `default`, `acceptEdits`, or `bypassPermissions` |
|
| `permission_mode` | `default` | Permission mode: `default`, `acceptEdits`, or `bypassPermissions` |
|
||||||
|
| `report` | - | Report file configuration (name, format) for auto-generated reports |
|
||||||
|
|
||||||
## API Usage
|
## API Usage
|
||||||
|
|
||||||
|
|||||||
@ -22,6 +22,9 @@ npm install -g takt
|
|||||||
# タスクを実行(ワークフロー選択プロンプトが表示されます)
|
# タスクを実行(ワークフロー選択プロンプトが表示されます)
|
||||||
takt "ログイン機能を追加して"
|
takt "ログイン機能を追加して"
|
||||||
|
|
||||||
|
# GitHub Issueをタスクとして実行
|
||||||
|
takt "#6"
|
||||||
|
|
||||||
# タスクをキューに追加
|
# タスクをキューに追加
|
||||||
takt /add-task "ログインのバグを修正"
|
takt /add-task "ログインのバグを修正"
|
||||||
|
|
||||||
@ -40,7 +43,7 @@ takt /switch
|
|||||||
|
|
||||||
### タスク実行の流れ
|
### タスク実行の流れ
|
||||||
|
|
||||||
`takt "ログイン機能を追加して"` を実行すると、以下の対話フローが表示されます:
|
`takt "ログイン機能を追加して"` を実行すると、以下の対話フローが表示されます:
|
||||||
|
|
||||||
**1. ワークフロー選択**
|
**1. ワークフロー選択**
|
||||||
|
|
||||||
@ -71,7 +74,7 @@ Select workflow:
|
|||||||
|
|
||||||
| ワークフロー | おすすめ用途 |
|
| ワークフロー | おすすめ用途 |
|
||||||
|------------|------------|
|
|------------|------------|
|
||||||
| `default` | 本格的な開発タスク。TAKT自身の開発で使用。修正ループ付きの多段階レビュー。 |
|
| `default` | 本格的な開発タスク。TAKT自身の開発で使用。アーキテクト+セキュリティの並列レビュー付き多段階レビュー。 |
|
||||||
| `simple` | README更新や小さな修正などの軽量タスク。レビューはあるが修正ループなし。 |
|
| `simple` | README更新や小さな修正などの軽量タスク。レビューはあるが修正ループなし。 |
|
||||||
| `expert-review` / `expert-cqrs` | Web開発プロジェクト。マルチエキスパートレビュー(CQRS、フロントエンド、セキュリティ、QA)。 |
|
| `expert-review` / `expert-cqrs` | Web開発プロジェクト。マルチエキスパートレビュー(CQRS、フロントエンド、セキュリティ、QA)。 |
|
||||||
| `research` | 調査・リサーチ。質問せずに自律的にリサーチを実行。 |
|
| `research` | 調査・リサーチ。質問せずに自律的にリサーチを実行。 |
|
||||||
@ -82,16 +85,175 @@ Select workflow:
|
|||||||
| コマンド | エイリアス | 説明 |
|
| コマンド | エイリアス | 説明 |
|
||||||
|---------|-----------|------|
|
|---------|-----------|------|
|
||||||
| `takt "タスク"` | | 現在のワークフローでタスクを実行(セッション自動継続) |
|
| `takt "タスク"` | | 現在のワークフローでタスクを実行(セッション自動継続) |
|
||||||
|
| `takt "#N"` | | GitHub Issue #Nをタスクとして実行 |
|
||||||
| `takt /run-tasks` | `/run` | `.takt/tasks/` の保留中タスクをすべて実行 |
|
| `takt /run-tasks` | `/run` | `.takt/tasks/` の保留中タスクをすべて実行 |
|
||||||
| `takt /watch` | | `.takt/tasks/` を監視してタスクを自動実行(常駐プロセス) |
|
| `takt /watch` | | `.takt/tasks/` を監視してタスクを自動実行(常駐プロセス) |
|
||||||
| `takt /add-task` | `/add` | 新しいタスクを対話的に追加(YAML形式、複数行対応) |
|
| `takt /add-task` | `/add` | 新しいタスクを対話的に追加(YAML形式、複数行対応) |
|
||||||
| `takt /list-tasks` | `/list` | タスクブランチ一覧(マージ・削除) |
|
| `takt /list-tasks` | `/list` | タスクブランチ一覧(マージ・削除) |
|
||||||
| `takt /switch` | `/sw` | ワークフローを対話的に切り替え |
|
| `takt /switch` | `/sw` | ワークフローを対話的に切り替え |
|
||||||
| `takt /clear` | | エージェントの会話セッションをクリア |
|
| `takt /clear` | | エージェントの会話セッションをクリア |
|
||||||
|
| `takt /eject` | | ビルトインのワークフロー/エージェントを`~/.takt/`にコピーしてカスタマイズ |
|
||||||
| `takt /refresh-builtin` | | ビルトインのエージェント/ワークフローを最新版に更新 |
|
| `takt /refresh-builtin` | | ビルトインのエージェント/ワークフローを最新版に更新 |
|
||||||
| `takt /config` | | パーミッションモードを設定 |
|
| `takt /config` | | パーミッションモードを設定 |
|
||||||
| `takt /help` | | ヘルプを表示 |
|
| `takt /help` | | ヘルプを表示 |
|
||||||
|
|
||||||
|
## ワークフロー
|
||||||
|
|
||||||
|
TAKTはYAMLベースのワークフロー定義とルールベースルーティングを使用します。ビルトインワークフローはパッケージに埋め込まれており、`~/.takt/workflows/` のユーザーワークフローが優先されます。`/eject` でビルトインを`~/.takt/`にコピーしてカスタマイズできます。
|
||||||
|
|
||||||
|
### ワークフローの例
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
name: default
|
||||||
|
max_iterations: 10
|
||||||
|
initial_step: plan
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: plan
|
||||||
|
agent: ../agents/default/planner.md
|
||||||
|
model: opus
|
||||||
|
edit: false
|
||||||
|
rules:
|
||||||
|
- condition: 計画完了
|
||||||
|
next: implement
|
||||||
|
instruction_template: |
|
||||||
|
リクエストを分析し、実装計画を作成してください。
|
||||||
|
|
||||||
|
- name: implement
|
||||||
|
agent: ../agents/default/coder.md
|
||||||
|
edit: true
|
||||||
|
permission_mode: acceptEdits
|
||||||
|
rules:
|
||||||
|
- condition: 実装完了
|
||||||
|
next: review
|
||||||
|
- condition: 進行不可
|
||||||
|
next: ABORT
|
||||||
|
instruction_template: |
|
||||||
|
計画に基づいて実装してください。
|
||||||
|
|
||||||
|
- name: review
|
||||||
|
agent: ../agents/default/architecture-reviewer.md
|
||||||
|
edit: false
|
||||||
|
rules:
|
||||||
|
- condition: 承認
|
||||||
|
next: COMPLETE
|
||||||
|
- condition: 修正が必要
|
||||||
|
next: implement
|
||||||
|
instruction_template: |
|
||||||
|
アーキテクチャとコード品質の観点で実装をレビューしてください。
|
||||||
|
```
|
||||||
|
|
||||||
|
### パラレルステップ
|
||||||
|
|
||||||
|
ステップ内でサブステップを並列実行し、集約条件で評価できます:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
- name: reviewers
|
||||||
|
parallel:
|
||||||
|
- name: arch-review
|
||||||
|
agent: ../agents/default/architecture-reviewer.md
|
||||||
|
rules:
|
||||||
|
- condition: approved
|
||||||
|
- condition: needs_fix
|
||||||
|
instruction_template: |
|
||||||
|
アーキテクチャとコード品質をレビューしてください。
|
||||||
|
- name: security-review
|
||||||
|
agent: ../agents/default/security-reviewer.md
|
||||||
|
rules:
|
||||||
|
- condition: approved
|
||||||
|
- condition: needs_fix
|
||||||
|
instruction_template: |
|
||||||
|
セキュリティ脆弱性をレビューしてください。
|
||||||
|
rules:
|
||||||
|
- condition: all("approved")
|
||||||
|
next: supervise
|
||||||
|
- condition: any("needs_fix")
|
||||||
|
next: fix
|
||||||
|
```
|
||||||
|
|
||||||
|
- `all("X")`: すべてのサブステップが条件Xにマッチした場合にtrue
|
||||||
|
- `any("X")`: いずれかのサブステップが条件Xにマッチした場合にtrue
|
||||||
|
- サブステップの `rules` は可能な結果を定義しますが、`next` は省略可能(親が遷移を制御)
|
||||||
|
|
||||||
|
### ルール条件の種類
|
||||||
|
|
||||||
|
| 種類 | 構文 | 説明 |
|
||||||
|
|------|------|------|
|
||||||
|
| タグベース | `"条件テキスト"` | エージェントが `[STEP:N]` タグを出力し、インデックスでマッチ |
|
||||||
|
| AI判定 | `ai("条件テキスト")` | AIが条件をエージェント出力に対して評価 |
|
||||||
|
| 集約 | `all("X")` / `any("X")` | パラレルサブステップの結果を集約 |
|
||||||
|
|
||||||
|
## ビルトインワークフロー
|
||||||
|
|
||||||
|
TAKTには複数のビルトインワークフローが同梱されています:
|
||||||
|
|
||||||
|
| ワークフロー | 説明 |
|
||||||
|
|------------|------|
|
||||||
|
| `default` | フル開発ワークフロー: 計画 → 実装 → AIレビュー → 並列レビュー(アーキテクト+セキュリティ)→ スーパーバイザー承認。各レビュー段階に修正ループあり。 |
|
||||||
|
| `simple` | defaultの簡略版: 計画 → 実装 → アーキテクトレビュー → AIレビュー → スーパーバイザー。中間の修正ステップなし。 |
|
||||||
|
| `research` | リサーチワークフロー: プランナー → ディガー → スーパーバイザー。質問せずに自律的にリサーチを実行。 |
|
||||||
|
| `expert-review` | ドメインエキスパートによる包括的レビュー: CQRS+ES、フロントエンド、AI、セキュリティ、QAレビューと修正ループ。 |
|
||||||
|
| `expert-cqrs` | CQRS+ES、フロントエンド、AI、セキュリティ、QA専門のエキスパートレビュー。計画 → 実装 → マルチエキスパートレビュー → スーパーバイザー。 |
|
||||||
|
| `magi` | エヴァンゲリオンにインスパイアされた審議システム。3つのAIペルソナ(MELCHIOR、BALTHASAR、CASPER)が分析し投票。 |
|
||||||
|
|
||||||
|
`takt /switch` でワークフローを切り替えられます。
|
||||||
|
|
||||||
|
## ビルトインエージェント
|
||||||
|
|
||||||
|
- **coder** - 機能を実装しバグを修正
|
||||||
|
- **architect** - アーキテクチャとコード品質をレビュー、仕様準拠を検証
|
||||||
|
- **supervisor** - 最終検証、バリデーション、承認
|
||||||
|
- **planner** - タスク分析、仕様調査、実装計画
|
||||||
|
- **ai-reviewer** - AI生成コードの品質レビュー
|
||||||
|
- **security** - セキュリティ脆弱性の評価
|
||||||
|
|
||||||
|
## カスタムエージェント
|
||||||
|
|
||||||
|
`.takt/agents.yaml`でカスタムエージェントを定義:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
agents:
|
||||||
|
- name: my-reviewer
|
||||||
|
prompt_file: .takt/prompts/reviewer.md
|
||||||
|
allowed_tools: [Read, Glob, Grep]
|
||||||
|
provider: claude # オプション: claude または codex
|
||||||
|
model: opus # Claude: opus/sonnet/haiku、Codex: gpt-5.2-codex 等
|
||||||
|
```
|
||||||
|
|
||||||
|
またはMarkdownファイルでエージェントプロンプトを作成:
|
||||||
|
|
||||||
|
```markdown
|
||||||
|
# ~/.takt/agents/my-agents/reviewer.md
|
||||||
|
|
||||||
|
あなたはセキュリティに特化したコードレビュアーです。
|
||||||
|
|
||||||
|
## 役割
|
||||||
|
- セキュリティ脆弱性をチェック
|
||||||
|
- 入力バリデーションを検証
|
||||||
|
- 認証ロジックをレビュー
|
||||||
|
```
|
||||||
|
|
||||||
|
## プロジェクト構造
|
||||||
|
|
||||||
|
```
|
||||||
|
~/.takt/
|
||||||
|
├── config.yaml # グローバル設定(プロバイダー、モデル、ワークフロー等)
|
||||||
|
├── workflows/ # ユーザーワークフロー定義(ビルトインを上書き)
|
||||||
|
└── agents/ # ユーザーエージェントプロンプトファイル
|
||||||
|
|
||||||
|
.takt/ # プロジェクトレベルの設定
|
||||||
|
├── agents.yaml # カスタムエージェント定義
|
||||||
|
├── tasks/ # 保留中のタスクファイル(.yaml, .md)
|
||||||
|
├── completed/ # 完了したタスクとレポート
|
||||||
|
├── reports/ # 実行レポート(自動生成)
|
||||||
|
└── logs/ # NDJSON形式のセッションログ
|
||||||
|
├── latest.json # 現在/最新セッションへのポインタ
|
||||||
|
├── previous.json # 前回セッションへのポインタ
|
||||||
|
└── {sessionId}.jsonl # ワークフロー実行ごとのNDJSONセッションログ
|
||||||
|
```
|
||||||
|
|
||||||
|
ビルトインリソースはnpmパッケージ(`dist/resources/`)に埋め込まれています。`~/.takt/` のユーザーファイルが優先されます。
|
||||||
|
|
||||||
## 実践的な使い方ガイド
|
## 実践的な使い方ガイド
|
||||||
|
|
||||||
### タスク管理
|
### タスク管理
|
||||||
@ -104,13 +266,16 @@ TAKTは`.takt/tasks/`内のタスクファイルによるバッチ処理をサ
|
|||||||
# クイック追加(隔離なし)
|
# クイック追加(隔離なし)
|
||||||
takt /add-task "認証機能を追加"
|
takt /add-task "認証機能を追加"
|
||||||
|
|
||||||
|
# GitHub Issueをタスクとして追加
|
||||||
|
takt /add-task "#6"
|
||||||
|
|
||||||
# 対話モード(隔離実行、ブランチ、ワークフローオプションを指定可能)
|
# 対話モード(隔離実行、ブランチ、ワークフローオプションを指定可能)
|
||||||
takt /add-task
|
takt /add-task
|
||||||
```
|
```
|
||||||
|
|
||||||
#### タスクファイルの形式
|
#### タスクファイルの形式
|
||||||
|
|
||||||
**YAML形式**(推奨、worktree/branch/workflowオプション対応):
|
**YAML形式**(推奨、worktree/branch/workflowオプション対応):
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
# .takt/tasks/add-auth.yaml
|
# .takt/tasks/add-auth.yaml
|
||||||
@ -120,14 +285,14 @@ branch: "feat/add-auth" # ブランチ名(省略時は自動生成)
|
|||||||
workflow: "default" # ワークフロー指定(省略時は現在のもの)
|
workflow: "default" # ワークフロー指定(省略時は現在のもの)
|
||||||
```
|
```
|
||||||
|
|
||||||
**Markdown形式**(シンプル、後方互換):
|
**Markdown形式**(シンプル、後方互換):
|
||||||
|
|
||||||
```markdown
|
```markdown
|
||||||
# .takt/tasks/add-login-feature.md
|
# .takt/tasks/add-login-feature.md
|
||||||
|
|
||||||
アプリケーションにログイン機能を追加する。
|
アプリケーションにログイン機能を追加する。
|
||||||
|
|
||||||
要件:
|
要件:
|
||||||
- ユーザー名とパスワードフィールド
|
- ユーザー名とパスワードフィールド
|
||||||
- フォームバリデーション
|
- フォームバリデーション
|
||||||
- 失敗時のエラーハンドリング
|
- 失敗時のエラーハンドリング
|
||||||
@ -135,7 +300,7 @@ workflow: "default" # ワークフロー指定(省略時は現在
|
|||||||
|
|
||||||
#### 共有クローンによる隔離実行
|
#### 共有クローンによる隔離実行
|
||||||
|
|
||||||
YAMLタスクファイルで`worktree`を指定すると、各タスクを`git clone --shared`で作成した隔離クローンで実行し、メインの作業ディレクトリをクリーンに保てます:
|
YAMLタスクファイルで`worktree`を指定すると、各タスクを`git clone --shared`で作成した隔離クローンで実行し、メインの作業ディレクトリをクリーンに保てます:
|
||||||
|
|
||||||
- `worktree: true` - 隣接ディレクトリ(または`worktree_dir`設定で指定した場所)に共有クローンを自動作成
|
- `worktree: true` - 隣接ディレクトリ(または`worktree_dir`設定で指定した場所)に共有クローンを自動作成
|
||||||
- `worktree: "/path/to/dir"` - 指定パスに作成
|
- `worktree: "/path/to/dir"` - 指定パスに作成
|
||||||
@ -162,7 +327,7 @@ takt /run-tasks
|
|||||||
takt /watch
|
takt /watch
|
||||||
```
|
```
|
||||||
|
|
||||||
ウォッチモードは`.takt/tasks/`をポーリングし、新しいタスクファイルが現れると自動実行します。`Ctrl+C`で停止する常駐プロセスです。以下のような場合に便利です:
|
ウォッチモードは`.takt/tasks/`をポーリングし、新しいタスクファイルが現れると自動実行します。`Ctrl+C`で停止する常駐プロセスです。以下のような場合に便利です:
|
||||||
- タスクファイルを生成するCI/CDパイプライン
|
- タスクファイルを生成するCI/CDパイプライン
|
||||||
- 外部プロセスがタスクを追加する自動化ワークフロー
|
- 外部プロセスがタスクを追加する自動化ワークフロー
|
||||||
- タスクを順次キューイングする長時間の開発セッション
|
- タスクを順次キューイングする長時間の開発セッション
|
||||||
@ -173,7 +338,7 @@ takt /watch
|
|||||||
takt /list-tasks
|
takt /list-tasks
|
||||||
```
|
```
|
||||||
|
|
||||||
`takt/`プレフィックスのブランチをファイル変更数とともに一覧表示します。各ブランチに対して以下の操作が可能です:
|
`takt/`プレフィックスのブランチをファイル変更数とともに一覧表示します。各ブランチに対して以下の操作が可能です:
|
||||||
- **Try merge** - mainにスカッシュマージ(変更をステージングのみ、コミットなし)
|
- **Try merge** - mainにスカッシュマージ(変更をステージングのみ、コミットなし)
|
||||||
- **Instruct** - 一時クローン経由で追加指示を与える
|
- **Instruct** - 一時クローン経由で追加指示を与える
|
||||||
- **Merge & cleanup** - マージしてブランチを削除
|
- **Merge & cleanup** - マージしてブランチを削除
|
||||||
@ -181,226 +346,119 @@ takt /list-tasks
|
|||||||
|
|
||||||
### セッションログ
|
### セッションログ
|
||||||
|
|
||||||
TAKTはセッションログを`.takt/logs/`にインクリメンタルに書き込みます。ログはワークフロー開始時、各ステップ完了後、ワークフロー終了時に保存されるため、プロセスが途中でクラッシュしても部分的なログが保持されます。
|
TAKTはセッションログをNDJSON(`.jsonl`)形式で`.takt/logs/`に書き込みます。各レコードはアトミックに追記されるため、プロセスが途中でクラッシュしても部分的なログが保持され、`tail -f`でリアルタイムに追跡できます。
|
||||||
|
|
||||||
- `.takt/logs/latest.json` - 現在(または最新の)セッションへのポインタ
|
- `.takt/logs/latest.json` - 現在(または最新の)セッションへのポインタ
|
||||||
- `.takt/logs/previous.json` - 前回セッションへのポインタ
|
- `.takt/logs/previous.json` - 前回セッションへのポインタ
|
||||||
- `.takt/logs/{sessionId}.json` - ワークフロー実行ごとの完全なセッションログ
|
- `.takt/logs/{sessionId}.jsonl` - ワークフロー実行ごとのNDJSONセッションログ
|
||||||
|
|
||||||
|
レコード種別: `workflow_start`, `step_start`, `step_complete`, `workflow_complete`, `workflow_abort`
|
||||||
|
|
||||||
エージェントは`previous.json`を読み取って前回の実行コンテキストを引き継ぐことができます。セッション継続は自動的に行われます — `takt "タスク"`を実行するだけで前回のセッションから続行されます。
|
エージェントは`previous.json`を読み取って前回の実行コンテキストを引き継ぐことができます。セッション継続は自動的に行われます — `takt "タスク"`を実行するだけで前回のセッションから続行されます。
|
||||||
|
|
||||||
### カスタムワークフローの追加
|
### カスタムワークフローの追加
|
||||||
|
|
||||||
`~/.takt/workflows/`にYAMLファイルを追加して独自のワークフローを作成できます:
|
`~/.takt/workflows/`にYAMLファイルを追加するか、`/eject`でビルトインをカスタマイズします:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# defaultワークフローを~/.takt/workflows/にコピーして編集
|
||||||
|
takt /eject default
|
||||||
|
```
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
# ~/.takt/workflows/my-workflow.yaml
|
# ~/.takt/workflows/my-workflow.yaml
|
||||||
name: my-workflow
|
name: my-workflow
|
||||||
description: カスタムワークフロー
|
description: カスタムワークフロー
|
||||||
|
|
||||||
max_iterations: 5
|
max_iterations: 5
|
||||||
|
initial_step: analyze
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: analyze
|
- name: analyze
|
||||||
agent: ~/.takt/agents/my-agents/analyzer.md
|
agent: ~/.takt/agents/my-agents/analyzer.md
|
||||||
|
edit: false
|
||||||
|
rules:
|
||||||
|
- condition: 分析完了
|
||||||
|
next: implement
|
||||||
instruction_template: |
|
instruction_template: |
|
||||||
このリクエストを分析してください: {task}
|
このリクエストを徹底的に分析してください。
|
||||||
transitions:
|
|
||||||
- condition: done
|
|
||||||
next_step: implement
|
|
||||||
|
|
||||||
- name: implement
|
- name: implement
|
||||||
agent: ~/.takt/agents/default/coder.md
|
agent: ~/.takt/agents/default/coder.md
|
||||||
instruction_template: |
|
edit: true
|
||||||
分析に基づいて実装してください: {previous_response}
|
permission_mode: acceptEdits
|
||||||
pass_previous_response: true
|
pass_previous_response: true
|
||||||
transitions:
|
rules:
|
||||||
- condition: done
|
- condition: 完了
|
||||||
next_step: COMPLETE
|
next: COMPLETE
|
||||||
|
instruction_template: |
|
||||||
|
分析に基づいて実装してください。
|
||||||
```
|
```
|
||||||
|
|
||||||
|
> **Note**: `{task}`、`{previous_response}`、`{user_inputs}` は自動的にインストラクションに注入されます。テンプレート内での位置を制御したい場合のみ、明示的なプレースホルダーが必要です。
|
||||||
|
|
||||||
### エージェントをパスで指定する
|
### エージェントをパスで指定する
|
||||||
|
|
||||||
ワークフロー定義ではファイルパスを使ってエージェントを指定します:
|
ワークフロー定義ではファイルパスを使ってエージェントを指定します:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
# ビルトインエージェントを使用
|
# ワークフローファイルからの相対パス
|
||||||
|
agent: ../agents/default/coder.md
|
||||||
|
|
||||||
|
# ホームディレクトリ
|
||||||
agent: ~/.takt/agents/default/coder.md
|
agent: ~/.takt/agents/default/coder.md
|
||||||
agent: ~/.takt/agents/magi/melchior.md
|
|
||||||
|
|
||||||
# プロジェクトローカルのエージェントを使用
|
# 絶対パス
|
||||||
agent: ./.takt/agents/my-reviewer.md
|
|
||||||
|
|
||||||
# 絶対パスを使用
|
|
||||||
agent: /path/to/custom/agent.md
|
agent: /path/to/custom/agent.md
|
||||||
```
|
```
|
||||||
|
|
||||||
カスタムエージェントプロンプトをMarkdownファイルとして作成:
|
|
||||||
|
|
||||||
```markdown
|
|
||||||
# ~/.takt/agents/my-agents/reviewer.md
|
|
||||||
|
|
||||||
あなたはセキュリティに特化したコードレビュアーです。
|
|
||||||
|
|
||||||
## 役割
|
|
||||||
- セキュリティ脆弱性をチェック
|
|
||||||
- 入力バリデーションを検証
|
|
||||||
- 認証ロジックをレビュー
|
|
||||||
|
|
||||||
## 出力形式
|
|
||||||
- [REVIEWER:APPROVE] コードが安全な場合
|
|
||||||
- [REVIEWER:REJECT] 問題が見つかった場合(問題点をリストアップ)
|
|
||||||
```
|
|
||||||
|
|
||||||
### ワークフロー変数
|
### ワークフロー変数
|
||||||
|
|
||||||
`instruction_template`で使用可能な変数:
|
`instruction_template`で使用可能な変数:
|
||||||
|
|
||||||
| 変数 | 説明 |
|
| 変数 | 説明 |
|
||||||
|------|------|
|
|------|------|
|
||||||
| `{task}` | 元のユーザーリクエスト |
|
| `{task}` | 元のユーザーリクエスト(テンプレートになければ自動注入) |
|
||||||
| `{iteration}` | ワークフロー全体のターン数(実行された全ステップ数) |
|
| `{iteration}` | ワークフロー全体のターン数(実行された全ステップ数) |
|
||||||
| `{max_iterations}` | 最大イテレーション数 |
|
| `{max_iterations}` | 最大イテレーション数 |
|
||||||
| `{step_iteration}` | ステップごとのイテレーション数(このステップが実行された回数) |
|
| `{step_iteration}` | ステップごとのイテレーション数(このステップが実行された回数) |
|
||||||
| `{previous_response}` | 前のステップの出力(`pass_previous_response: true`が必要) |
|
| `{previous_response}` | 前のステップの出力(テンプレートになければ自動注入) |
|
||||||
| `{user_inputs}` | ワークフロー中の追加ユーザー入力 |
|
| `{user_inputs}` | ワークフロー中の追加ユーザー入力(テンプレートになければ自動注入) |
|
||||||
| `{report_dir}` | レポートディレクトリ名(例:`20250126-143052-task-summary`) |
|
| `{report_dir}` | レポートディレクトリ名(例: `20250126-143052-task-summary`) |
|
||||||
|
|
||||||
### ワークフローの設計
|
### ワークフローの設計
|
||||||
|
|
||||||
各ワークフローステップには3つの重要な要素が必要です。
|
各ワークフローステップに必要な要素:
|
||||||
|
|
||||||
**1. エージェント** - システムプロンプトを含むMarkdownファイル:
|
**1. エージェント** - システムプロンプトを含むMarkdownファイル:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
agent: ~/.takt/agents/default/coder.md # エージェントプロンプトファイルのパス
|
agent: ../agents/default/coder.md # エージェントプロンプトファイルのパス
|
||||||
agent_name: coder # 表示名(オプション)
|
agent_name: coder # 表示名(オプション)
|
||||||
```
|
```
|
||||||
|
|
||||||
**2. ステータスルール** - エージェントが完了を通知する方法を定義。エージェントは`[CODER:DONE]`や`[ARCHITECT:REJECT]`のようなステータスマーカーを出力し、TAKTがそれを検出して遷移を駆動します:
|
**2. ルール** - ステップから次のステップへのルーティングを定義。インストラクションビルダーがステータス出力ルールを自動注入するため、エージェントはどのタグを出力すべきか把握できます:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
status_rules_prompt: |
|
rules:
|
||||||
最終出力には必ずステータスタグを含めてください:
|
- condition: "実装完了"
|
||||||
- `[CODER:DONE]` 実装が完了した場合
|
next: review
|
||||||
- `[CODER:BLOCKED]` 進行できない場合
|
- condition: "進行不可"
|
||||||
|
next: ABORT
|
||||||
```
|
```
|
||||||
|
|
||||||
**3. 遷移** - ステータスに基づいて次のステップにルーティング:
|
特殊な `next` 値: `COMPLETE`(成功)、`ABORT`(失敗)
|
||||||
|
|
||||||
```yaml
|
**3. ステップオプション:**
|
||||||
transitions:
|
|
||||||
- condition: done # ステータスタグDONEに対応
|
|
||||||
next_step: review # reviewステップへ遷移
|
|
||||||
- condition: blocked # ステータスタグBLOCKEDに対応
|
|
||||||
next_step: ABORT # ワークフローを失敗終了
|
|
||||||
```
|
|
||||||
|
|
||||||
使用可能な遷移条件:`done`、`blocked`、`approved`、`rejected`、`improve`、`answer`、`always`
|
|
||||||
特殊なnext_step値:`COMPLETE`(成功)、`ABORT`(失敗)
|
|
||||||
|
|
||||||
**ステップオプション:**
|
|
||||||
|
|
||||||
| オプション | デフォルト | 説明 |
|
| オプション | デフォルト | 説明 |
|
||||||
|-----------|-----------|------|
|
|-----------|-----------|------|
|
||||||
|
| `edit` | - | ステップがプロジェクトファイルを編集できるか(`true`/`false`) |
|
||||||
| `pass_previous_response` | `true` | 前のステップの出力を`{previous_response}`に渡す |
|
| `pass_previous_response` | `true` | 前のステップの出力を`{previous_response}`に渡す |
|
||||||
| `on_no_status` | - | ステータス未検出時の動作:`complete`、`continue`、`stay` |
|
|
||||||
| `allowed_tools` | - | エージェントが使用できるツール一覧(Read, Glob, Grep, Edit, Write, Bash等) |
|
| `allowed_tools` | - | エージェントが使用できるツール一覧(Read, Glob, Grep, Edit, Write, Bash等) |
|
||||||
| `provider` | - | このステップのプロバイダーを上書き(`claude`または`codex`) |
|
| `provider` | - | このステップのプロバイダーを上書き(`claude`または`codex`) |
|
||||||
| `model` | - | このステップのモデルを上書き |
|
| `model` | - | このステップのモデルを上書き |
|
||||||
| `permission_mode` | `default` | パーミッションモード:`default`、`acceptEdits`、`bypassPermissions` |
|
| `permission_mode` | `default` | パーミッションモード: `default`、`acceptEdits`、`bypassPermissions` |
|
||||||
|
| `report` | - | 自動生成レポートのファイル設定(name, format) |
|
||||||
## ワークフロー
|
|
||||||
|
|
||||||
TAKTはYAMLベースのワークフロー定義を使用します。以下に配置してください:
|
|
||||||
- `~/.takt/workflows/*.yaml`
|
|
||||||
|
|
||||||
### ワークフローの例
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
name: default
|
|
||||||
max_iterations: 10
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: implement
|
|
||||||
agent: coder
|
|
||||||
instruction_template: |
|
|
||||||
{task}
|
|
||||||
transitions:
|
|
||||||
- condition: done
|
|
||||||
next_step: review
|
|
||||||
- condition: blocked
|
|
||||||
next_step: ABORT
|
|
||||||
|
|
||||||
- name: review
|
|
||||||
agent: architect
|
|
||||||
transitions:
|
|
||||||
- condition: approved
|
|
||||||
next_step: COMPLETE
|
|
||||||
- condition: rejected
|
|
||||||
next_step: implement
|
|
||||||
```
|
|
||||||
|
|
||||||
## ビルトインワークフロー
|
|
||||||
|
|
||||||
TAKTには複数のビルトインワークフローが同梱されています:
|
|
||||||
|
|
||||||
| ワークフロー | 説明 |
|
|
||||||
|------------|------|
|
|
||||||
| `default` | フル開発ワークフロー:計画 → 実装 → アーキテクトレビュー → AIレビュー → セキュリティレビュー → スーパーバイザー承認。各レビュー段階に修正ループあり。 |
|
|
||||||
| `simple` | defaultの簡略版:計画 → 実装 → アーキテクトレビュー → AIレビュー → スーパーバイザー。中間の修正ステップなし。 |
|
|
||||||
| `research` | リサーチワークフロー:プランナー → ディガー → スーパーバイザー。質問せずに自律的にリサーチを実行。 |
|
|
||||||
| `expert-review` | ドメインエキスパートによる包括的レビュー:CQRS+ES、フロントエンド、AI、セキュリティ、QAレビューと修正ループ。 |
|
|
||||||
| `expert-cqrs` | CQRS+ES、フロントエンド、AI、セキュリティ、QA専門のエキスパートレビュー。計画 → 実装 → マルチエキスパートレビュー → スーパーバイザー。 |
|
|
||||||
| `magi` | エヴァンゲリオンにインスパイアされた審議システム。3つのAIペルソナ(MELCHIOR、BALTHASAR、CASPER)が分析し投票。 |
|
|
||||||
|
|
||||||
`takt /switch` でワークフローを切り替えられます。
|
|
||||||
|
|
||||||
## ビルトインエージェント
|
|
||||||
|
|
||||||
- **coder** - 機能を実装しバグを修正
|
|
||||||
- **architect** - コードをレビューしフィードバックを提供
|
|
||||||
- **supervisor** - 最終検証と承認
|
|
||||||
- **planner** - タスク分析と実装計画
|
|
||||||
- **ai-reviewer** - AI生成コードの品質レビュー
|
|
||||||
- **security** - セキュリティ脆弱性の評価
|
|
||||||
|
|
||||||
## カスタムエージェント
|
|
||||||
|
|
||||||
`.takt/agents.yaml`でカスタムエージェントを定義:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
agents:
|
|
||||||
- name: my-reviewer
|
|
||||||
prompt_file: .takt/prompts/reviewer.md
|
|
||||||
allowed_tools: [Read, Glob, Grep]
|
|
||||||
provider: claude # オプション:claude または codex
|
|
||||||
model: opus # Claude: opus/sonnet/haiku、Codex: gpt-5.2-codex 等
|
|
||||||
status_patterns:
|
|
||||||
approved: "\\[APPROVE\\]"
|
|
||||||
rejected: "\\[REJECT\\]"
|
|
||||||
```
|
|
||||||
|
|
||||||
## プロジェクト構造
|
|
||||||
|
|
||||||
```
|
|
||||||
~/.takt/
|
|
||||||
├── config.yaml # グローバル設定(プロバイダー、モデル、ワークフロー等)
|
|
||||||
├── workflows/ # ワークフロー定義
|
|
||||||
└── agents/ # エージェントプロンプトファイル
|
|
||||||
|
|
||||||
.takt/ # プロジェクトレベルの設定
|
|
||||||
├── agents.yaml # カスタムエージェント定義
|
|
||||||
├── tasks/ # 保留中のタスクファイル(.yaml, .md)
|
|
||||||
├── completed/ # 完了したタスクとレポート
|
|
||||||
├── worktree-meta/ # タスクブランチのメタデータ
|
|
||||||
├── worktree-sessions/ # クローンごとのエージェントセッション保存
|
|
||||||
├── reports/ # 実行レポート(自動生成)
|
|
||||||
└── logs/ # セッションログ(インクリメンタル)
|
|
||||||
├── latest.json # 現在/最新セッションへのポインタ
|
|
||||||
├── previous.json # 前回セッションへのポインタ
|
|
||||||
└── {sessionId}.json # ワークフロー実行ごとの完全なセッションログ
|
|
||||||
```
|
|
||||||
|
|
||||||
## API使用例
|
## API使用例
|
||||||
|
|
||||||
@ -434,7 +492,7 @@ await engine.run();
|
|||||||
|
|
||||||
## Docker サポート
|
## Docker サポート
|
||||||
|
|
||||||
他の環境でのテスト用にDocker環境が提供されています:
|
他の環境でのテスト用にDocker環境が提供されています:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Dockerイメージをビルド
|
# Dockerイメージをビルド
|
||||||
@ -458,7 +516,7 @@ docker compose run --rm build
|
|||||||
- [Agent Guide](./agents.md) - カスタムエージェントの設定
|
- [Agent Guide](./agents.md) - カスタムエージェントの設定
|
||||||
- [Changelog](../CHANGELOG.md) - バージョン履歴
|
- [Changelog](../CHANGELOG.md) - バージョン履歴
|
||||||
- [Security Policy](../SECURITY.md) - 脆弱性報告
|
- [Security Policy](../SECURITY.md) - 脆弱性報告
|
||||||
- [ブログ:TAKT - AIエージェントオーケストレーション](https://zenn.dev/nrs/articles/c6842288a526d7) - 設計思想と実践的な使い方ガイド
|
- [ブログ: TAKT - AIエージェントオーケストレーション](https://zenn.dev/nrs/articles/c6842288a526d7) - 設計思想と実践的な使い方ガイド
|
||||||
|
|
||||||
## ライセンス
|
## ライセンス
|
||||||
|
|
||||||
|
|||||||
143
src/__tests__/engine-blocked.test.ts
Normal file
143
src/__tests__/engine-blocked.test.ts
Normal file
@ -0,0 +1,143 @@
|
|||||||
|
/**
|
||||||
|
* WorkflowEngine integration tests: blocked handling scenarios.
|
||||||
|
*
|
||||||
|
* Covers:
|
||||||
|
* - Blocked without onUserInput callback (abort)
|
||||||
|
* - Blocked with onUserInput returning null (abort)
|
||||||
|
* - Blocked with onUserInput providing input (continue)
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
|
||||||
|
import { existsSync, rmSync } from 'node:fs';
|
||||||
|
|
||||||
|
// --- Mock setup (must be before imports that use these modules) ---
|
||||||
|
|
||||||
|
vi.mock('../agents/runner.js', () => ({
|
||||||
|
runAgent: vi.fn(),
|
||||||
|
}));
|
||||||
|
|
||||||
|
vi.mock('../workflow/rule-evaluator.js', () => ({
|
||||||
|
detectMatchedRule: vi.fn(),
|
||||||
|
}));
|
||||||
|
|
||||||
|
vi.mock('../workflow/phase-runner.js', () => ({
|
||||||
|
needsStatusJudgmentPhase: vi.fn().mockReturnValue(false),
|
||||||
|
runReportPhase: vi.fn().mockResolvedValue(undefined),
|
||||||
|
runStatusJudgmentPhase: vi.fn().mockResolvedValue(''),
|
||||||
|
}));
|
||||||
|
|
||||||
|
vi.mock('../utils/session.js', () => ({
|
||||||
|
generateReportDir: vi.fn().mockReturnValue('test-report-dir'),
|
||||||
|
}));
|
||||||
|
|
||||||
|
// --- Imports (after mocks) ---
|
||||||
|
|
||||||
|
import { WorkflowEngine } from '../workflow/engine.js';
|
||||||
|
import {
|
||||||
|
makeResponse,
|
||||||
|
buildDefaultWorkflowConfig,
|
||||||
|
mockRunAgentSequence,
|
||||||
|
mockDetectMatchedRuleSequence,
|
||||||
|
createTestTmpDir,
|
||||||
|
applyDefaultMocks,
|
||||||
|
} from './engine-test-helpers.js';
|
||||||
|
|
||||||
|
describe('WorkflowEngine Integration: Blocked Handling', () => {
|
||||||
|
let tmpDir: string;
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
vi.resetAllMocks();
|
||||||
|
applyDefaultMocks();
|
||||||
|
tmpDir = createTestTmpDir();
|
||||||
|
});
|
||||||
|
|
||||||
|
afterEach(() => {
|
||||||
|
if (existsSync(tmpDir)) {
|
||||||
|
rmSync(tmpDir, { recursive: true, force: true });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should abort when blocked and no onUserInput callback', async () => {
|
||||||
|
const config = buildDefaultWorkflowConfig();
|
||||||
|
const engine = new WorkflowEngine(config, tmpDir, 'test task');
|
||||||
|
|
||||||
|
mockRunAgentSequence([
|
||||||
|
makeResponse({ agent: 'plan', status: 'blocked', content: 'Need clarification' }),
|
||||||
|
]);
|
||||||
|
|
||||||
|
mockDetectMatchedRuleSequence([
|
||||||
|
{ index: 0, method: 'phase1_tag' },
|
||||||
|
]);
|
||||||
|
|
||||||
|
const blockedFn = vi.fn();
|
||||||
|
const abortFn = vi.fn();
|
||||||
|
engine.on('step:blocked', blockedFn);
|
||||||
|
engine.on('workflow:abort', abortFn);
|
||||||
|
|
||||||
|
const state = await engine.run();
|
||||||
|
|
||||||
|
expect(state.status).toBe('aborted');
|
||||||
|
expect(blockedFn).toHaveBeenCalledOnce();
|
||||||
|
expect(abortFn).toHaveBeenCalledOnce();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should abort when blocked and onUserInput returns null', async () => {
|
||||||
|
const config = buildDefaultWorkflowConfig();
|
||||||
|
const onUserInput = vi.fn().mockResolvedValue(null);
|
||||||
|
const engine = new WorkflowEngine(config, tmpDir, 'test task', { onUserInput });
|
||||||
|
|
||||||
|
mockRunAgentSequence([
|
||||||
|
makeResponse({ agent: 'plan', status: 'blocked', content: 'Need info' }),
|
||||||
|
]);
|
||||||
|
|
||||||
|
mockDetectMatchedRuleSequence([
|
||||||
|
{ index: 0, method: 'phase1_tag' },
|
||||||
|
]);
|
||||||
|
|
||||||
|
const state = await engine.run();
|
||||||
|
|
||||||
|
expect(state.status).toBe('aborted');
|
||||||
|
expect(onUserInput).toHaveBeenCalledOnce();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should continue when blocked and onUserInput provides input', async () => {
|
||||||
|
const config = buildDefaultWorkflowConfig();
|
||||||
|
const onUserInput = vi.fn().mockResolvedValueOnce('User provided clarification');
|
||||||
|
const engine = new WorkflowEngine(config, tmpDir, 'test task', { onUserInput });
|
||||||
|
|
||||||
|
mockRunAgentSequence([
|
||||||
|
// First: plan is blocked
|
||||||
|
makeResponse({ agent: 'plan', status: 'blocked', content: 'Need info' }),
|
||||||
|
// Second: plan succeeds after user input
|
||||||
|
makeResponse({ agent: 'plan', content: 'Plan done with user input' }),
|
||||||
|
makeResponse({ agent: 'implement', content: 'Impl done' }),
|
||||||
|
makeResponse({ agent: 'ai_review', content: 'OK' }),
|
||||||
|
makeResponse({ agent: 'arch-review', content: 'OK' }),
|
||||||
|
makeResponse({ agent: 'security-review', content: 'OK' }),
|
||||||
|
makeResponse({ agent: 'supervise', content: 'All passed' }),
|
||||||
|
]);
|
||||||
|
|
||||||
|
mockDetectMatchedRuleSequence([
|
||||||
|
// First plan call: blocked, rule matched but blocked handling takes over
|
||||||
|
{ index: 0, method: 'phase1_tag' },
|
||||||
|
// Second plan call: success
|
||||||
|
{ index: 0, method: 'phase1_tag' }, // plan → implement
|
||||||
|
{ index: 0, method: 'phase1_tag' }, // implement → ai_review
|
||||||
|
{ index: 0, method: 'phase1_tag' }, // ai_review → reviewers
|
||||||
|
{ index: 0, method: 'phase1_tag' }, // arch-review → approved
|
||||||
|
{ index: 0, method: 'phase1_tag' }, // security-review → approved
|
||||||
|
{ index: 0, method: 'aggregate' }, // reviewers → supervise
|
||||||
|
{ index: 0, method: 'phase1_tag' }, // supervise → COMPLETE
|
||||||
|
]);
|
||||||
|
|
||||||
|
const userInputFn = vi.fn();
|
||||||
|
engine.on('step:user_input', userInputFn);
|
||||||
|
|
||||||
|
const state = await engine.run();
|
||||||
|
|
||||||
|
expect(state.status).toBe('completed');
|
||||||
|
expect(onUserInput).toHaveBeenCalledOnce();
|
||||||
|
expect(userInputFn).toHaveBeenCalledOnce();
|
||||||
|
expect(state.userInputs).toContain('User provided clarification');
|
||||||
|
});
|
||||||
|
});
|
||||||
222
src/__tests__/engine-error.test.ts
Normal file
222
src/__tests__/engine-error.test.ts
Normal file
@ -0,0 +1,222 @@
|
|||||||
|
/**
|
||||||
|
* WorkflowEngine integration tests: error handling scenarios.
|
||||||
|
*
|
||||||
|
* Covers:
|
||||||
|
* - No rule matched (abort)
|
||||||
|
* - runAgent throws (abort)
|
||||||
|
* - Loop detection (abort)
|
||||||
|
* - Iteration limit (abort and extend)
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
|
||||||
|
import { existsSync, rmSync } from 'node:fs';
|
||||||
|
|
||||||
|
// --- Mock setup (must be before imports that use these modules) ---
|
||||||
|
|
||||||
|
vi.mock('../agents/runner.js', () => ({
|
||||||
|
runAgent: vi.fn(),
|
||||||
|
}));
|
||||||
|
|
||||||
|
vi.mock('../workflow/rule-evaluator.js', () => ({
|
||||||
|
detectMatchedRule: vi.fn(),
|
||||||
|
}));
|
||||||
|
|
||||||
|
vi.mock('../workflow/phase-runner.js', () => ({
|
||||||
|
needsStatusJudgmentPhase: vi.fn().mockReturnValue(false),
|
||||||
|
runReportPhase: vi.fn().mockResolvedValue(undefined),
|
||||||
|
runStatusJudgmentPhase: vi.fn().mockResolvedValue(''),
|
||||||
|
}));
|
||||||
|
|
||||||
|
vi.mock('../utils/session.js', () => ({
|
||||||
|
generateReportDir: vi.fn().mockReturnValue('test-report-dir'),
|
||||||
|
}));
|
||||||
|
|
||||||
|
// --- Imports (after mocks) ---
|
||||||
|
|
||||||
|
import { WorkflowEngine } from '../workflow/engine.js';
|
||||||
|
import { runAgent } from '../agents/runner.js';
|
||||||
|
import { detectMatchedRule } from '../workflow/rule-evaluator.js';
|
||||||
|
import {
|
||||||
|
makeResponse,
|
||||||
|
makeStep,
|
||||||
|
makeRule,
|
||||||
|
buildDefaultWorkflowConfig,
|
||||||
|
mockRunAgentSequence,
|
||||||
|
mockDetectMatchedRuleSequence,
|
||||||
|
createTestTmpDir,
|
||||||
|
applyDefaultMocks,
|
||||||
|
} from './engine-test-helpers.js';
|
||||||
|
|
||||||
|
describe('WorkflowEngine Integration: Error Handling', () => {
|
||||||
|
let tmpDir: string;
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
vi.resetAllMocks();
|
||||||
|
applyDefaultMocks();
|
||||||
|
tmpDir = createTestTmpDir();
|
||||||
|
});
|
||||||
|
|
||||||
|
afterEach(() => {
|
||||||
|
if (existsSync(tmpDir)) {
|
||||||
|
rmSync(tmpDir, { recursive: true, force: true });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// =====================================================
|
||||||
|
// 1. No rule matched
|
||||||
|
// =====================================================
|
||||||
|
describe('No rule matched', () => {
|
||||||
|
it('should abort when detectMatchedRule returns undefined', async () => {
|
||||||
|
const config = buildDefaultWorkflowConfig();
|
||||||
|
const engine = new WorkflowEngine(config, tmpDir, 'test task');
|
||||||
|
|
||||||
|
mockRunAgentSequence([
|
||||||
|
makeResponse({ agent: 'plan', content: 'Unclear output' }),
|
||||||
|
]);
|
||||||
|
|
||||||
|
mockDetectMatchedRuleSequence([undefined]);
|
||||||
|
|
||||||
|
const abortFn = vi.fn();
|
||||||
|
engine.on('workflow:abort', abortFn);
|
||||||
|
|
||||||
|
const state = await engine.run();
|
||||||
|
|
||||||
|
expect(state.status).toBe('aborted');
|
||||||
|
expect(abortFn).toHaveBeenCalledOnce();
|
||||||
|
const reason = abortFn.mock.calls[0]![1] as string;
|
||||||
|
expect(reason).toContain('plan');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// =====================================================
|
||||||
|
// 2. runAgent throws
|
||||||
|
// =====================================================
|
||||||
|
describe('runAgent throws', () => {
|
||||||
|
it('should abort when runAgent throws an error', async () => {
|
||||||
|
const config = buildDefaultWorkflowConfig();
|
||||||
|
const engine = new WorkflowEngine(config, tmpDir, 'test task');
|
||||||
|
|
||||||
|
vi.mocked(runAgent).mockRejectedValueOnce(new Error('API connection failed'));
|
||||||
|
|
||||||
|
const abortFn = vi.fn();
|
||||||
|
engine.on('workflow:abort', abortFn);
|
||||||
|
|
||||||
|
const state = await engine.run();
|
||||||
|
|
||||||
|
expect(state.status).toBe('aborted');
|
||||||
|
expect(abortFn).toHaveBeenCalledOnce();
|
||||||
|
const reason = abortFn.mock.calls[0]![1] as string;
|
||||||
|
expect(reason).toContain('API connection failed');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// =====================================================
|
||||||
|
// 3. Loop detection
|
||||||
|
// =====================================================
|
||||||
|
describe('Loop detection', () => {
|
||||||
|
it('should abort when loop detected with action: abort', async () => {
|
||||||
|
const config = buildDefaultWorkflowConfig({
|
||||||
|
maxIterations: 100,
|
||||||
|
loopDetection: { maxConsecutiveSameStep: 3, action: 'abort' },
|
||||||
|
initialStep: 'loop-step',
|
||||||
|
steps: [
|
||||||
|
makeStep('loop-step', {
|
||||||
|
rules: [makeRule('continue', 'loop-step')],
|
||||||
|
}),
|
||||||
|
],
|
||||||
|
});
|
||||||
|
|
||||||
|
const engine = new WorkflowEngine(config, tmpDir, 'test task');
|
||||||
|
|
||||||
|
for (let i = 0; i < 5; i++) {
|
||||||
|
vi.mocked(runAgent).mockResolvedValueOnce(
|
||||||
|
makeResponse({ content: `iteration ${i}` })
|
||||||
|
);
|
||||||
|
vi.mocked(detectMatchedRule).mockResolvedValueOnce(
|
||||||
|
{ index: 0, method: 'phase1_tag' }
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const abortFn = vi.fn();
|
||||||
|
engine.on('workflow:abort', abortFn);
|
||||||
|
|
||||||
|
const state = await engine.run();
|
||||||
|
|
||||||
|
expect(state.status).toBe('aborted');
|
||||||
|
expect(abortFn).toHaveBeenCalledOnce();
|
||||||
|
const reason = abortFn.mock.calls[0]![1] as string;
|
||||||
|
expect(reason).toContain('Loop detected');
|
||||||
|
expect(reason).toContain('loop-step');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// =====================================================
|
||||||
|
// 4. Iteration limit
|
||||||
|
// =====================================================
|
||||||
|
describe('Iteration limit', () => {
|
||||||
|
it('should abort when max iterations reached without onIterationLimit callback', async () => {
|
||||||
|
const config = buildDefaultWorkflowConfig({ maxIterations: 2 });
|
||||||
|
const engine = new WorkflowEngine(config, tmpDir, 'test task');
|
||||||
|
|
||||||
|
mockRunAgentSequence([
|
||||||
|
makeResponse({ agent: 'plan', content: 'Plan done' }),
|
||||||
|
makeResponse({ agent: 'implement', content: 'Impl done' }),
|
||||||
|
makeResponse({ agent: 'ai_review', content: 'OK' }),
|
||||||
|
]);
|
||||||
|
|
||||||
|
mockDetectMatchedRuleSequence([
|
||||||
|
{ index: 0, method: 'phase1_tag' }, // plan → implement
|
||||||
|
{ index: 0, method: 'phase1_tag' }, // implement → ai_review
|
||||||
|
{ index: 0, method: 'phase1_tag' }, // ai_review → reviewers (won't be reached)
|
||||||
|
]);
|
||||||
|
|
||||||
|
const limitFn = vi.fn();
|
||||||
|
const abortFn = vi.fn();
|
||||||
|
engine.on('iteration:limit', limitFn);
|
||||||
|
engine.on('workflow:abort', abortFn);
|
||||||
|
|
||||||
|
const state = await engine.run();
|
||||||
|
|
||||||
|
expect(state.status).toBe('aborted');
|
||||||
|
expect(limitFn).toHaveBeenCalledWith(2, 2);
|
||||||
|
expect(abortFn).toHaveBeenCalledOnce();
|
||||||
|
const reason = abortFn.mock.calls[0]![1] as string;
|
||||||
|
expect(reason).toContain('Max iterations');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should extend iterations when onIterationLimit provides additional iterations', async () => {
|
||||||
|
const config = buildDefaultWorkflowConfig({ maxIterations: 2 });
|
||||||
|
|
||||||
|
const onIterationLimit = vi.fn().mockResolvedValueOnce(10);
|
||||||
|
|
||||||
|
const engine = new WorkflowEngine(config, tmpDir, 'test task', {
|
||||||
|
onIterationLimit,
|
||||||
|
});
|
||||||
|
|
||||||
|
mockRunAgentSequence([
|
||||||
|
makeResponse({ agent: 'plan', content: 'Plan done' }),
|
||||||
|
makeResponse({ agent: 'implement', content: 'Impl done' }),
|
||||||
|
// After hitting limit at iteration 2, onIterationLimit extends to 12
|
||||||
|
makeResponse({ agent: 'ai_review', content: 'OK' }),
|
||||||
|
makeResponse({ agent: 'arch-review', content: 'OK' }),
|
||||||
|
makeResponse({ agent: 'security-review', content: 'OK' }),
|
||||||
|
makeResponse({ agent: 'supervise', content: 'All passed' }),
|
||||||
|
]);
|
||||||
|
|
||||||
|
mockDetectMatchedRuleSequence([
|
||||||
|
{ index: 0, method: 'phase1_tag' }, // plan → implement
|
||||||
|
{ index: 0, method: 'phase1_tag' }, // implement → ai_review
|
||||||
|
{ index: 0, method: 'phase1_tag' }, // ai_review → reviewers
|
||||||
|
{ index: 0, method: 'phase1_tag' }, // arch-review → approved
|
||||||
|
{ index: 0, method: 'phase1_tag' }, // security-review → approved
|
||||||
|
{ index: 0, method: 'aggregate' }, // reviewers → supervise
|
||||||
|
{ index: 0, method: 'phase1_tag' }, // supervise → COMPLETE
|
||||||
|
]);
|
||||||
|
|
||||||
|
const state = await engine.run();
|
||||||
|
|
||||||
|
expect(state.status).toBe('completed');
|
||||||
|
expect(onIterationLimit).toHaveBeenCalledOnce();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
344
src/__tests__/engine-happy-path.test.ts
Normal file
344
src/__tests__/engine-happy-path.test.ts
Normal file
@ -0,0 +1,344 @@
|
|||||||
|
/**
|
||||||
|
* WorkflowEngine integration tests: happy path and normal flow scenarios.
|
||||||
|
*
|
||||||
|
* Covers:
|
||||||
|
* - Full happy path (plan → implement → ai_review → reviewers → supervise → COMPLETE)
|
||||||
|
* - Review reject and fix loop
|
||||||
|
* - AI review reject and fix
|
||||||
|
* - ABORT transition
|
||||||
|
* - Event emissions
|
||||||
|
* - Step output tracking
|
||||||
|
* - Config validation
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
|
||||||
|
import { existsSync, rmSync } from 'node:fs';
|
||||||
|
import type { WorkflowConfig, WorkflowStep } from '../models/types.js';
|
||||||
|
|
||||||
|
// --- Mock setup (must be before imports that use these modules) ---
|
||||||
|
|
||||||
|
vi.mock('../agents/runner.js', () => ({
|
||||||
|
runAgent: vi.fn(),
|
||||||
|
}));
|
||||||
|
|
||||||
|
vi.mock('../workflow/rule-evaluator.js', () => ({
|
||||||
|
detectMatchedRule: vi.fn(),
|
||||||
|
}));
|
||||||
|
|
||||||
|
vi.mock('../workflow/phase-runner.js', () => ({
|
||||||
|
needsStatusJudgmentPhase: vi.fn().mockReturnValue(false),
|
||||||
|
runReportPhase: vi.fn().mockResolvedValue(undefined),
|
||||||
|
runStatusJudgmentPhase: vi.fn().mockResolvedValue(''),
|
||||||
|
}));
|
||||||
|
|
||||||
|
vi.mock('../utils/session.js', () => ({
|
||||||
|
generateReportDir: vi.fn().mockReturnValue('test-report-dir'),
|
||||||
|
}));
|
||||||
|
|
||||||
|
// --- Imports (after mocks) ---
|
||||||
|
|
||||||
|
import { WorkflowEngine } from '../workflow/engine.js';
|
||||||
|
import { runAgent } from '../agents/runner.js';
|
||||||
|
import {
|
||||||
|
makeResponse,
|
||||||
|
makeStep,
|
||||||
|
makeRule,
|
||||||
|
buildDefaultWorkflowConfig,
|
||||||
|
mockRunAgentSequence,
|
||||||
|
mockDetectMatchedRuleSequence,
|
||||||
|
createTestTmpDir,
|
||||||
|
applyDefaultMocks,
|
||||||
|
} from './engine-test-helpers.js';
|
||||||
|
|
||||||
|
describe('WorkflowEngine Integration: Happy Path', () => {
|
||||||
|
let tmpDir: string;
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
vi.resetAllMocks();
|
||||||
|
applyDefaultMocks();
|
||||||
|
tmpDir = createTestTmpDir();
|
||||||
|
});
|
||||||
|
|
||||||
|
afterEach(() => {
|
||||||
|
if (existsSync(tmpDir)) {
|
||||||
|
rmSync(tmpDir, { recursive: true, force: true });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// =====================================================
|
||||||
|
// 1. Happy Path
|
||||||
|
// =====================================================
|
||||||
|
describe('Happy path', () => {
|
||||||
|
it('should complete: plan → implement → ai_review → reviewers(all approved) → supervise → COMPLETE', async () => {
|
||||||
|
const config = buildDefaultWorkflowConfig();
|
||||||
|
const engine = new WorkflowEngine(config, tmpDir, 'test task');
|
||||||
|
|
||||||
|
mockRunAgentSequence([
|
||||||
|
makeResponse({ agent: 'plan', content: 'Plan complete' }),
|
||||||
|
makeResponse({ agent: 'implement', content: 'Implementation done' }),
|
||||||
|
makeResponse({ agent: 'ai_review', content: 'No issues' }),
|
||||||
|
makeResponse({ agent: 'arch-review', content: 'Architecture OK' }),
|
||||||
|
makeResponse({ agent: 'security-review', content: 'Security OK' }),
|
||||||
|
makeResponse({ agent: 'supervise', content: 'All passed' }),
|
||||||
|
]);
|
||||||
|
|
||||||
|
mockDetectMatchedRuleSequence([
|
||||||
|
{ index: 0, method: 'phase1_tag' }, // plan → implement
|
||||||
|
{ index: 0, method: 'phase1_tag' }, // implement → ai_review
|
||||||
|
{ index: 0, method: 'phase1_tag' }, // ai_review → reviewers
|
||||||
|
{ index: 0, method: 'phase1_tag' }, // arch-review → approved
|
||||||
|
{ index: 0, method: 'phase1_tag' }, // security-review → approved
|
||||||
|
{ index: 0, method: 'aggregate' }, // reviewers(all approved) → supervise
|
||||||
|
{ index: 0, method: 'phase1_tag' }, // supervise → COMPLETE
|
||||||
|
]);
|
||||||
|
|
||||||
|
const completeFn = vi.fn();
|
||||||
|
engine.on('workflow:complete', completeFn);
|
||||||
|
|
||||||
|
const state = await engine.run();
|
||||||
|
|
||||||
|
expect(state.status).toBe('completed');
|
||||||
|
expect(state.iteration).toBe(5); // plan, implement, ai_review, reviewers, supervise
|
||||||
|
expect(completeFn).toHaveBeenCalledOnce();
|
||||||
|
expect(vi.mocked(runAgent)).toHaveBeenCalledTimes(6); // 4 normal + 2 parallel sub-steps
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// =====================================================
|
||||||
|
// 2. Review reject and fix loop
|
||||||
|
// =====================================================
|
||||||
|
describe('Review reject and fix loop', () => {
|
||||||
|
it('should handle: reviewers(needs_fix) → fix → reviewers(all approved) → supervise → COMPLETE', async () => {
|
||||||
|
const config = buildDefaultWorkflowConfig();
|
||||||
|
const engine = new WorkflowEngine(config, tmpDir, 'test task');
|
||||||
|
|
||||||
|
mockRunAgentSequence([
|
||||||
|
makeResponse({ agent: 'plan', content: 'Plan done' }),
|
||||||
|
makeResponse({ agent: 'implement', content: 'Impl done' }),
|
||||||
|
makeResponse({ agent: 'ai_review', content: 'No issues' }),
|
||||||
|
// Round 1 reviewers: arch approved, security needs fix
|
||||||
|
makeResponse({ agent: 'arch-review', content: 'OK' }),
|
||||||
|
makeResponse({ agent: 'security-review', content: 'Vulnerability found' }),
|
||||||
|
// fix step
|
||||||
|
makeResponse({ agent: 'fix', content: 'Fixed security issue' }),
|
||||||
|
// Round 2 reviewers: both approved
|
||||||
|
makeResponse({ agent: 'arch-review', content: 'OK' }),
|
||||||
|
makeResponse({ agent: 'security-review', content: 'Security OK now' }),
|
||||||
|
// supervise
|
||||||
|
makeResponse({ agent: 'supervise', content: 'All passed' }),
|
||||||
|
]);
|
||||||
|
|
||||||
|
mockDetectMatchedRuleSequence([
|
||||||
|
{ index: 0, method: 'phase1_tag' }, // plan → implement
|
||||||
|
{ index: 0, method: 'phase1_tag' }, // implement → ai_review
|
||||||
|
{ index: 0, method: 'phase1_tag' }, // ai_review → reviewers
|
||||||
|
{ index: 0, method: 'phase1_tag' }, // arch-review → approved
|
||||||
|
{ index: 1, method: 'phase1_tag' }, // security-review → needs_fix
|
||||||
|
{ index: 1, method: 'aggregate' }, // reviewers: any(needs_fix) → fix
|
||||||
|
{ index: 0, method: 'phase1_tag' }, // fix → reviewers
|
||||||
|
{ index: 0, method: 'phase1_tag' }, // arch-review → approved
|
||||||
|
{ index: 0, method: 'phase1_tag' }, // security-review → approved
|
||||||
|
{ index: 0, method: 'aggregate' }, // reviewers: all(approved) → supervise
|
||||||
|
{ index: 0, method: 'phase1_tag' }, // supervise → COMPLETE
|
||||||
|
]);
|
||||||
|
|
||||||
|
const state = await engine.run();
|
||||||
|
|
||||||
|
expect(state.status).toBe('completed');
|
||||||
|
// plan, implement, ai_review, reviewers(1st), fix, reviewers(2nd), supervise = 7
|
||||||
|
expect(state.iteration).toBe(7);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// =====================================================
|
||||||
|
// 3. AI review reject and fix
|
||||||
|
// =====================================================
|
||||||
|
describe('AI review reject and fix', () => {
|
||||||
|
it('should handle: ai_review(issues) → ai_fix → reviewers → supervise → COMPLETE', async () => {
|
||||||
|
const config = buildDefaultWorkflowConfig();
|
||||||
|
const engine = new WorkflowEngine(config, tmpDir, 'test task');
|
||||||
|
|
||||||
|
mockRunAgentSequence([
|
||||||
|
makeResponse({ agent: 'plan', content: 'Plan done' }),
|
||||||
|
makeResponse({ agent: 'implement', content: 'Impl done' }),
|
||||||
|
makeResponse({ agent: 'ai_review', content: 'AI issues found' }),
|
||||||
|
makeResponse({ agent: 'ai_fix', content: 'Issues fixed' }),
|
||||||
|
makeResponse({ agent: 'arch-review', content: 'OK' }),
|
||||||
|
makeResponse({ agent: 'security-review', content: 'OK' }),
|
||||||
|
makeResponse({ agent: 'supervise', content: 'All passed' }),
|
||||||
|
]);
|
||||||
|
|
||||||
|
mockDetectMatchedRuleSequence([
|
||||||
|
{ index: 0, method: 'phase1_tag' }, // plan → implement
|
||||||
|
{ index: 0, method: 'phase1_tag' }, // implement → ai_review
|
||||||
|
{ index: 1, method: 'phase1_tag' }, // ai_review → ai_fix (issues found)
|
||||||
|
{ index: 0, method: 'phase1_tag' }, // ai_fix → reviewers
|
||||||
|
{ index: 0, method: 'phase1_tag' }, // arch-review → approved
|
||||||
|
{ index: 0, method: 'phase1_tag' }, // security-review → approved
|
||||||
|
{ index: 0, method: 'aggregate' }, // reviewers → supervise
|
||||||
|
{ index: 0, method: 'phase1_tag' }, // supervise → COMPLETE
|
||||||
|
]);
|
||||||
|
|
||||||
|
const state = await engine.run();
|
||||||
|
|
||||||
|
expect(state.status).toBe('completed');
|
||||||
|
// plan, implement, ai_review, ai_fix, reviewers, supervise = 6
|
||||||
|
expect(state.iteration).toBe(6);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// =====================================================
|
||||||
|
// 4. ABORT transition
|
||||||
|
// =====================================================
|
||||||
|
describe('ABORT transition', () => {
|
||||||
|
it('should abort when step transitions to ABORT', async () => {
|
||||||
|
const config = buildDefaultWorkflowConfig();
|
||||||
|
const engine = new WorkflowEngine(config, tmpDir, 'test task');
|
||||||
|
|
||||||
|
mockRunAgentSequence([
|
||||||
|
makeResponse({ agent: 'plan', content: 'Requirements unclear' }),
|
||||||
|
]);
|
||||||
|
|
||||||
|
// plan rule index 1 → ABORT
|
||||||
|
mockDetectMatchedRuleSequence([
|
||||||
|
{ index: 1, method: 'phase1_tag' },
|
||||||
|
]);
|
||||||
|
|
||||||
|
const abortFn = vi.fn();
|
||||||
|
engine.on('workflow:abort', abortFn);
|
||||||
|
|
||||||
|
const state = await engine.run();
|
||||||
|
|
||||||
|
expect(state.status).toBe('aborted');
|
||||||
|
expect(abortFn).toHaveBeenCalledOnce();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// =====================================================
|
||||||
|
// 5. Event emissions
|
||||||
|
// =====================================================
|
||||||
|
describe('Event emissions', () => {
|
||||||
|
it('should emit step:start and step:complete for each step', async () => {
|
||||||
|
const config = buildDefaultWorkflowConfig();
|
||||||
|
const engine = new WorkflowEngine(config, tmpDir, 'test task');
|
||||||
|
|
||||||
|
mockRunAgentSequence([
|
||||||
|
makeResponse({ agent: 'plan', content: 'Plan' }),
|
||||||
|
makeResponse({ agent: 'implement', content: 'Impl' }),
|
||||||
|
makeResponse({ agent: 'ai_review', content: 'OK' }),
|
||||||
|
makeResponse({ agent: 'arch-review', content: 'OK' }),
|
||||||
|
makeResponse({ agent: 'security-review', content: 'OK' }),
|
||||||
|
makeResponse({ agent: 'supervise', content: 'Pass' }),
|
||||||
|
]);
|
||||||
|
|
||||||
|
mockDetectMatchedRuleSequence([
|
||||||
|
{ index: 0, method: 'phase1_tag' },
|
||||||
|
{ index: 0, method: 'phase1_tag' },
|
||||||
|
{ index: 0, method: 'phase1_tag' },
|
||||||
|
{ index: 0, method: 'phase1_tag' },
|
||||||
|
{ index: 0, method: 'phase1_tag' },
|
||||||
|
{ index: 0, method: 'aggregate' },
|
||||||
|
{ index: 0, method: 'phase1_tag' },
|
||||||
|
]);
|
||||||
|
|
||||||
|
const startFn = vi.fn();
|
||||||
|
const completeFn = vi.fn();
|
||||||
|
engine.on('step:start', startFn);
|
||||||
|
engine.on('step:complete', completeFn);
|
||||||
|
|
||||||
|
await engine.run();
|
||||||
|
|
||||||
|
// 5 steps: plan, implement, ai_review, reviewers, supervise
|
||||||
|
expect(startFn).toHaveBeenCalledTimes(5);
|
||||||
|
expect(completeFn).toHaveBeenCalledTimes(5);
|
||||||
|
|
||||||
|
const startedSteps = startFn.mock.calls.map(call => (call[0] as WorkflowStep).name);
|
||||||
|
expect(startedSteps).toEqual(['plan', 'implement', 'ai_review', 'reviewers', 'supervise']);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should emit iteration:limit when max iterations reached', async () => {
|
||||||
|
const config = buildDefaultWorkflowConfig({ maxIterations: 1 });
|
||||||
|
const engine = new WorkflowEngine(config, tmpDir, 'test task');
|
||||||
|
|
||||||
|
mockRunAgentSequence([
|
||||||
|
makeResponse({ agent: 'plan', content: 'Plan' }),
|
||||||
|
]);
|
||||||
|
mockDetectMatchedRuleSequence([
|
||||||
|
{ index: 0, method: 'phase1_tag' },
|
||||||
|
]);
|
||||||
|
|
||||||
|
const limitFn = vi.fn();
|
||||||
|
engine.on('iteration:limit', limitFn);
|
||||||
|
|
||||||
|
await engine.run();
|
||||||
|
|
||||||
|
expect(limitFn).toHaveBeenCalledWith(1, 1);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// =====================================================
|
||||||
|
// 6. Step output tracking
|
||||||
|
// =====================================================
|
||||||
|
describe('Step output tracking', () => {
|
||||||
|
it('should store outputs for all executed steps', async () => {
|
||||||
|
const config = buildDefaultWorkflowConfig();
|
||||||
|
const engine = new WorkflowEngine(config, tmpDir, 'test task');
|
||||||
|
|
||||||
|
mockRunAgentSequence([
|
||||||
|
makeResponse({ agent: 'plan', content: 'Plan output' }),
|
||||||
|
makeResponse({ agent: 'implement', content: 'Implement output' }),
|
||||||
|
makeResponse({ agent: 'ai_review', content: 'AI review output' }),
|
||||||
|
makeResponse({ agent: 'arch-review', content: 'Arch output' }),
|
||||||
|
makeResponse({ agent: 'security-review', content: 'Sec output' }),
|
||||||
|
makeResponse({ agent: 'supervise', content: 'Supervise output' }),
|
||||||
|
]);
|
||||||
|
|
||||||
|
mockDetectMatchedRuleSequence([
|
||||||
|
{ index: 0, method: 'phase1_tag' },
|
||||||
|
{ index: 0, method: 'phase1_tag' },
|
||||||
|
{ index: 0, method: 'phase1_tag' },
|
||||||
|
{ index: 0, method: 'phase1_tag' },
|
||||||
|
{ index: 0, method: 'phase1_tag' },
|
||||||
|
{ index: 0, method: 'aggregate' },
|
||||||
|
{ index: 0, method: 'phase1_tag' },
|
||||||
|
]);
|
||||||
|
|
||||||
|
const state = await engine.run();
|
||||||
|
|
||||||
|
expect(state.stepOutputs.get('plan')!.content).toBe('Plan output');
|
||||||
|
expect(state.stepOutputs.get('implement')!.content).toBe('Implement output');
|
||||||
|
expect(state.stepOutputs.get('ai_review')!.content).toBe('AI review output');
|
||||||
|
expect(state.stepOutputs.get('supervise')!.content).toBe('Supervise output');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// =====================================================
|
||||||
|
// 7. Config validation
|
||||||
|
// =====================================================
|
||||||
|
describe('Config validation', () => {
|
||||||
|
it('should throw when initial step does not exist', () => {
|
||||||
|
const config = buildDefaultWorkflowConfig({ initialStep: 'nonexistent' });
|
||||||
|
|
||||||
|
expect(() => {
|
||||||
|
new WorkflowEngine(config, tmpDir, 'test task');
|
||||||
|
}).toThrow('Unknown step: nonexistent');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should throw when rule references nonexistent step', () => {
|
||||||
|
const config: WorkflowConfig = {
|
||||||
|
name: 'test',
|
||||||
|
maxIterations: 10,
|
||||||
|
initialStep: 'step1',
|
||||||
|
steps: [
|
||||||
|
makeStep('step1', {
|
||||||
|
rules: [makeRule('done', 'nonexistent_step')],
|
||||||
|
}),
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
|
expect(() => {
|
||||||
|
new WorkflowEngine(config, tmpDir, 'test task');
|
||||||
|
}).toThrow('nonexistent_step');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
162
src/__tests__/engine-parallel.test.ts
Normal file
162
src/__tests__/engine-parallel.test.ts
Normal file
@ -0,0 +1,162 @@
|
|||||||
|
/**
|
||||||
|
* WorkflowEngine integration tests: parallel step aggregation.
|
||||||
|
*
|
||||||
|
* Covers:
|
||||||
|
* - Aggregated output format (## headers and --- separators)
|
||||||
|
* - Individual sub-step output storage
|
||||||
|
* - Concurrent execution of sub-steps
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
|
||||||
|
import { existsSync, rmSync } from 'node:fs';
|
||||||
|
|
||||||
|
// --- Mock setup (must be before imports that use these modules) ---
|
||||||
|
|
||||||
|
vi.mock('../agents/runner.js', () => ({
|
||||||
|
runAgent: vi.fn(),
|
||||||
|
}));
|
||||||
|
|
||||||
|
vi.mock('../workflow/rule-evaluator.js', () => ({
|
||||||
|
detectMatchedRule: vi.fn(),
|
||||||
|
}));
|
||||||
|
|
||||||
|
vi.mock('../workflow/phase-runner.js', () => ({
|
||||||
|
needsStatusJudgmentPhase: vi.fn().mockReturnValue(false),
|
||||||
|
runReportPhase: vi.fn().mockResolvedValue(undefined),
|
||||||
|
runStatusJudgmentPhase: vi.fn().mockResolvedValue(''),
|
||||||
|
}));
|
||||||
|
|
||||||
|
vi.mock('../utils/session.js', () => ({
|
||||||
|
generateReportDir: vi.fn().mockReturnValue('test-report-dir'),
|
||||||
|
}));
|
||||||
|
|
||||||
|
// --- Imports (after mocks) ---
|
||||||
|
|
||||||
|
import { WorkflowEngine } from '../workflow/engine.js';
|
||||||
|
import { runAgent } from '../agents/runner.js';
|
||||||
|
import {
|
||||||
|
makeResponse,
|
||||||
|
buildDefaultWorkflowConfig,
|
||||||
|
mockRunAgentSequence,
|
||||||
|
mockDetectMatchedRuleSequence,
|
||||||
|
createTestTmpDir,
|
||||||
|
applyDefaultMocks,
|
||||||
|
} from './engine-test-helpers.js';
|
||||||
|
|
||||||
|
describe('WorkflowEngine Integration: Parallel Step Aggregation', () => {
|
||||||
|
let tmpDir: string;
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
vi.resetAllMocks();
|
||||||
|
applyDefaultMocks();
|
||||||
|
tmpDir = createTestTmpDir();
|
||||||
|
});
|
||||||
|
|
||||||
|
afterEach(() => {
|
||||||
|
if (existsSync(tmpDir)) {
|
||||||
|
rmSync(tmpDir, { recursive: true, force: true });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should aggregate sub-step outputs with ## headers and --- separators', async () => {
|
||||||
|
const config = buildDefaultWorkflowConfig();
|
||||||
|
const engine = new WorkflowEngine(config, tmpDir, 'test task');
|
||||||
|
|
||||||
|
mockRunAgentSequence([
|
||||||
|
makeResponse({ agent: 'plan', content: 'Plan done' }),
|
||||||
|
makeResponse({ agent: 'implement', content: 'Impl done' }),
|
||||||
|
makeResponse({ agent: 'ai_review', content: 'OK' }),
|
||||||
|
makeResponse({ agent: 'arch-review', content: 'Architecture review content' }),
|
||||||
|
makeResponse({ agent: 'security-review', content: 'Security review content' }),
|
||||||
|
makeResponse({ agent: 'supervise', content: 'All passed' }),
|
||||||
|
]);
|
||||||
|
|
||||||
|
mockDetectMatchedRuleSequence([
|
||||||
|
{ index: 0, method: 'phase1_tag' },
|
||||||
|
{ index: 0, method: 'phase1_tag' },
|
||||||
|
{ index: 0, method: 'phase1_tag' },
|
||||||
|
{ index: 0, method: 'phase1_tag' }, // arch-review
|
||||||
|
{ index: 0, method: 'phase1_tag' }, // security-review
|
||||||
|
{ index: 0, method: 'aggregate' }, // reviewers
|
||||||
|
{ index: 0, method: 'phase1_tag' },
|
||||||
|
]);
|
||||||
|
|
||||||
|
const state = await engine.run();
|
||||||
|
|
||||||
|
expect(state.status).toBe('completed');
|
||||||
|
|
||||||
|
const reviewersOutput = state.stepOutputs.get('reviewers');
|
||||||
|
expect(reviewersOutput).toBeDefined();
|
||||||
|
expect(reviewersOutput!.content).toContain('## arch-review');
|
||||||
|
expect(reviewersOutput!.content).toContain('Architecture review content');
|
||||||
|
expect(reviewersOutput!.content).toContain('---');
|
||||||
|
expect(reviewersOutput!.content).toContain('## security-review');
|
||||||
|
expect(reviewersOutput!.content).toContain('Security review content');
|
||||||
|
expect(reviewersOutput!.matchedRuleMethod).toBe('aggregate');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should store individual sub-step outputs in stepOutputs', async () => {
|
||||||
|
const config = buildDefaultWorkflowConfig();
|
||||||
|
const engine = new WorkflowEngine(config, tmpDir, 'test task');
|
||||||
|
|
||||||
|
mockRunAgentSequence([
|
||||||
|
makeResponse({ agent: 'plan', content: 'Plan' }),
|
||||||
|
makeResponse({ agent: 'implement', content: 'Impl' }),
|
||||||
|
makeResponse({ agent: 'ai_review', content: 'OK' }),
|
||||||
|
makeResponse({ agent: 'arch-review', content: 'Arch content' }),
|
||||||
|
makeResponse({ agent: 'security-review', content: 'Sec content' }),
|
||||||
|
makeResponse({ agent: 'supervise', content: 'Pass' }),
|
||||||
|
]);
|
||||||
|
|
||||||
|
mockDetectMatchedRuleSequence([
|
||||||
|
{ index: 0, method: 'phase1_tag' },
|
||||||
|
{ index: 0, method: 'phase1_tag' },
|
||||||
|
{ index: 0, method: 'phase1_tag' },
|
||||||
|
{ index: 0, method: 'phase1_tag' },
|
||||||
|
{ index: 0, method: 'phase1_tag' },
|
||||||
|
{ index: 0, method: 'aggregate' },
|
||||||
|
{ index: 0, method: 'phase1_tag' },
|
||||||
|
]);
|
||||||
|
|
||||||
|
const state = await engine.run();
|
||||||
|
|
||||||
|
expect(state.stepOutputs.has('arch-review')).toBe(true);
|
||||||
|
expect(state.stepOutputs.has('security-review')).toBe(true);
|
||||||
|
expect(state.stepOutputs.has('reviewers')).toBe(true);
|
||||||
|
expect(state.stepOutputs.get('arch-review')!.content).toBe('Arch content');
|
||||||
|
expect(state.stepOutputs.get('security-review')!.content).toBe('Sec content');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should execute sub-steps concurrently (both runAgent calls happen)', async () => {
|
||||||
|
const config = buildDefaultWorkflowConfig();
|
||||||
|
const engine = new WorkflowEngine(config, tmpDir, 'test task');
|
||||||
|
|
||||||
|
mockRunAgentSequence([
|
||||||
|
makeResponse({ agent: 'plan', content: 'Plan' }),
|
||||||
|
makeResponse({ agent: 'implement', content: 'Impl' }),
|
||||||
|
makeResponse({ agent: 'ai_review', content: 'OK' }),
|
||||||
|
makeResponse({ agent: 'arch-review', content: 'OK' }),
|
||||||
|
makeResponse({ agent: 'security-review', content: 'OK' }),
|
||||||
|
makeResponse({ agent: 'supervise', content: 'Pass' }),
|
||||||
|
]);
|
||||||
|
|
||||||
|
mockDetectMatchedRuleSequence([
|
||||||
|
{ index: 0, method: 'phase1_tag' },
|
||||||
|
{ index: 0, method: 'phase1_tag' },
|
||||||
|
{ index: 0, method: 'phase1_tag' },
|
||||||
|
{ index: 0, method: 'phase1_tag' },
|
||||||
|
{ index: 0, method: 'phase1_tag' },
|
||||||
|
{ index: 0, method: 'aggregate' },
|
||||||
|
{ index: 0, method: 'phase1_tag' },
|
||||||
|
]);
|
||||||
|
|
||||||
|
await engine.run();
|
||||||
|
|
||||||
|
// 6 total: 4 normal + 2 parallel sub-steps
|
||||||
|
expect(vi.mocked(runAgent)).toHaveBeenCalledTimes(6);
|
||||||
|
|
||||||
|
const calledAgents = vi.mocked(runAgent).mock.calls.map(call => call[0]);
|
||||||
|
expect(calledAgents).toContain('../agents/arch-review.md');
|
||||||
|
expect(calledAgents).toContain('../agents/security-review.md');
|
||||||
|
});
|
||||||
|
});
|
||||||
175
src/__tests__/engine-test-helpers.ts
Normal file
175
src/__tests__/engine-test-helpers.ts
Normal file
@ -0,0 +1,175 @@
|
|||||||
|
/**
|
||||||
|
* Shared helpers for WorkflowEngine integration tests.
|
||||||
|
*
|
||||||
|
* Provides mock setup, factory functions, and a default workflow config
|
||||||
|
* matching the parallel reviewers structure (plan → implement → ai_review → reviewers → supervise).
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { vi } from 'vitest';
|
||||||
|
import { mkdirSync } from 'node:fs';
|
||||||
|
import { join } from 'node:path';
|
||||||
|
import { tmpdir } from 'node:os';
|
||||||
|
import { randomUUID } from 'node:crypto';
|
||||||
|
import type { WorkflowConfig, WorkflowStep, AgentResponse, WorkflowRule } from '../models/types.js';
|
||||||
|
|
||||||
|
// --- Mock imports (consumers must call vi.mock before importing this) ---
|
||||||
|
|
||||||
|
import { runAgent } from '../agents/runner.js';
|
||||||
|
import { detectMatchedRule } from '../workflow/rule-evaluator.js';
|
||||||
|
import type { RuleMatch } from '../workflow/rule-evaluator.js';
|
||||||
|
import { needsStatusJudgmentPhase, runReportPhase, runStatusJudgmentPhase } from '../workflow/phase-runner.js';
|
||||||
|
import { generateReportDir } from '../utils/session.js';
|
||||||
|
|
||||||
|
// --- Factory functions ---
|
||||||
|
|
||||||
|
export function makeResponse(overrides: Partial<AgentResponse> = {}): AgentResponse {
|
||||||
|
return {
|
||||||
|
agent: 'test-agent',
|
||||||
|
status: 'done',
|
||||||
|
content: 'test response',
|
||||||
|
timestamp: new Date(),
|
||||||
|
sessionId: `session-${randomUUID()}`,
|
||||||
|
...overrides,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export function makeRule(condition: string, next: string, extra: Partial<WorkflowRule> = {}): WorkflowRule {
|
||||||
|
return { condition, next, ...extra };
|
||||||
|
}
|
||||||
|
|
||||||
|
export function makeStep(name: string, overrides: Partial<WorkflowStep> = {}): WorkflowStep {
|
||||||
|
return {
|
||||||
|
name,
|
||||||
|
agent: `../agents/${name}.md`,
|
||||||
|
agentDisplayName: name,
|
||||||
|
instructionTemplate: `Run ${name}`,
|
||||||
|
passPreviousResponse: true,
|
||||||
|
...overrides,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build a workflow config matching the default.yaml parallel reviewers structure:
|
||||||
|
* plan → implement → ai_review → (ai_fix↔) → reviewers(parallel) → (fix↔) → supervise
|
||||||
|
*/
|
||||||
|
export function buildDefaultWorkflowConfig(overrides: Partial<WorkflowConfig> = {}): WorkflowConfig {
|
||||||
|
const archReviewSubStep = makeStep('arch-review', {
|
||||||
|
rules: [
|
||||||
|
makeRule('approved', 'COMPLETE'),
|
||||||
|
makeRule('needs_fix', 'fix'),
|
||||||
|
],
|
||||||
|
});
|
||||||
|
|
||||||
|
const securityReviewSubStep = makeStep('security-review', {
|
||||||
|
rules: [
|
||||||
|
makeRule('approved', 'COMPLETE'),
|
||||||
|
makeRule('needs_fix', 'fix'),
|
||||||
|
],
|
||||||
|
});
|
||||||
|
|
||||||
|
return {
|
||||||
|
name: 'test-default',
|
||||||
|
description: 'Test workflow',
|
||||||
|
maxIterations: 30,
|
||||||
|
initialStep: 'plan',
|
||||||
|
steps: [
|
||||||
|
makeStep('plan', {
|
||||||
|
rules: [
|
||||||
|
makeRule('Requirements are clear', 'implement'),
|
||||||
|
makeRule('Requirements unclear', 'ABORT'),
|
||||||
|
],
|
||||||
|
}),
|
||||||
|
makeStep('implement', {
|
||||||
|
rules: [
|
||||||
|
makeRule('Implementation complete', 'ai_review'),
|
||||||
|
makeRule('Cannot proceed', 'plan'),
|
||||||
|
],
|
||||||
|
}),
|
||||||
|
makeStep('ai_review', {
|
||||||
|
rules: [
|
||||||
|
makeRule('No AI-specific issues', 'reviewers'),
|
||||||
|
makeRule('AI-specific issues found', 'ai_fix'),
|
||||||
|
],
|
||||||
|
}),
|
||||||
|
makeStep('ai_fix', {
|
||||||
|
rules: [
|
||||||
|
makeRule('AI issues fixed', 'reviewers'),
|
||||||
|
makeRule('Cannot proceed', 'plan'),
|
||||||
|
],
|
||||||
|
}),
|
||||||
|
makeStep('reviewers', {
|
||||||
|
parallel: [archReviewSubStep, securityReviewSubStep],
|
||||||
|
rules: [
|
||||||
|
makeRule('all("approved")', 'supervise', {
|
||||||
|
isAggregateCondition: true,
|
||||||
|
aggregateType: 'all',
|
||||||
|
aggregateConditionText: 'approved',
|
||||||
|
}),
|
||||||
|
makeRule('any("needs_fix")', 'fix', {
|
||||||
|
isAggregateCondition: true,
|
||||||
|
aggregateType: 'any',
|
||||||
|
aggregateConditionText: 'needs_fix',
|
||||||
|
}),
|
||||||
|
],
|
||||||
|
}),
|
||||||
|
makeStep('fix', {
|
||||||
|
rules: [
|
||||||
|
makeRule('Fix complete', 'reviewers'),
|
||||||
|
makeRule('Cannot proceed', 'plan'),
|
||||||
|
],
|
||||||
|
}),
|
||||||
|
makeStep('supervise', {
|
||||||
|
rules: [
|
||||||
|
makeRule('All checks passed', 'COMPLETE'),
|
||||||
|
makeRule('Requirements unmet', 'plan'),
|
||||||
|
],
|
||||||
|
}),
|
||||||
|
],
|
||||||
|
...overrides,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Mock sequence helpers ---
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Configure runAgent mock to return a sequence of responses.
|
||||||
|
*/
|
||||||
|
export function mockRunAgentSequence(responses: AgentResponse[]): void {
|
||||||
|
const mock = vi.mocked(runAgent);
|
||||||
|
for (const response of responses) {
|
||||||
|
mock.mockResolvedValueOnce(response);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Configure detectMatchedRule mock to return a sequence of rule matches.
|
||||||
|
*/
|
||||||
|
export function mockDetectMatchedRuleSequence(matches: (RuleMatch | undefined)[]): void {
|
||||||
|
const mock = vi.mocked(detectMatchedRule);
|
||||||
|
for (const match of matches) {
|
||||||
|
mock.mockResolvedValueOnce(match);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Test environment setup ---
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a temporary directory with the required .takt/reports structure.
|
||||||
|
* Returns the tmpDir path. Caller is responsible for cleanup.
|
||||||
|
*/
|
||||||
|
export function createTestTmpDir(): string {
|
||||||
|
const tmpDir = join(tmpdir(), `takt-engine-test-${randomUUID()}`);
|
||||||
|
mkdirSync(tmpDir, { recursive: true });
|
||||||
|
mkdirSync(join(tmpDir, '.takt', 'reports', 'test-report-dir'), { recursive: true });
|
||||||
|
return tmpDir;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Re-apply default mocks for phase-runner and session after vi.resetAllMocks().
|
||||||
|
*/
|
||||||
|
export function applyDefaultMocks(): void {
|
||||||
|
vi.mocked(needsStatusJudgmentPhase).mockReturnValue(false);
|
||||||
|
vi.mocked(runReportPhase).mockResolvedValue(undefined);
|
||||||
|
vi.mocked(runStatusJudgmentPhase).mockResolvedValue('');
|
||||||
|
vi.mocked(generateReportDir).mockReturnValue('test-report-dir');
|
||||||
|
}
|
||||||
@ -60,4 +60,22 @@ describe('determineNextStepByRules', () => {
|
|||||||
|
|
||||||
expect(determineNextStepByRules(step, 0)).toBe('COMPLETE');
|
expect(determineNextStepByRules(step, 0)).toBe('COMPLETE');
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('should return null when rule exists but next is undefined', () => {
|
||||||
|
// Parallel sub-step rules may omit `next` (optional field)
|
||||||
|
const step: WorkflowStep = {
|
||||||
|
name: 'sub-step',
|
||||||
|
agent: 'test-agent',
|
||||||
|
agentDisplayName: 'Test Agent',
|
||||||
|
instructionTemplate: '{task}',
|
||||||
|
passPreviousResponse: false,
|
||||||
|
rules: [
|
||||||
|
{ condition: 'approved' },
|
||||||
|
{ condition: 'needs_fix' },
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
|
expect(determineNextStepByRules(step, 0)).toBeNull();
|
||||||
|
expect(determineNextStepByRules(step, 1)).toBeNull();
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user