From 4e7c3d0afbdd97e8ca6955d9ea7404a5f82d1d62 Mon Sep 17 00:00:00 2001 From: nrslib <38722970+nrslib@users.noreply.github.com> Date: Fri, 30 Jan 2026 21:01:00 +0900 Subject: [PATCH 1/3] update README --- CLAUDE.md | 204 ++++++++++++++++++++++++++++++++------------ README.md | 248 +++++++++++++++++++++++++++++++----------------------- 2 files changed, 295 insertions(+), 157 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 5699f00..4993e9d 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -4,7 +4,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co ## Project Overview -TAKT (Task Agent Koordination Tool) is a multi-agent orchestration system for Claude Code. It enables YAML-based workflow definitions that coordinate multiple AI agents through state machine transitions. +TAKT (Task Agent Koordination Tool) is a multi-agent orchestration system for Claude Code. It enables YAML-based workflow definitions that coordinate multiple AI agents through state machine transitions with rule-based routing. ## Development Commands @@ -25,35 +25,75 @@ TAKT (Task Agent Koordination Tool) is a multi-agent orchestration system for Cl | `takt /watch` | | Watch `.takt/tasks/` and auto-execute tasks (resident process) | | `takt /add-task` | `/add` | Add a new task interactively (YAML format, multiline supported) | | `takt /list-tasks` | `/list` | List task branches (try merge, merge & cleanup, or delete) | -| `takt /switch` | | Switch workflow interactively | +| `takt /switch` | `/sw` | Switch workflow interactively | | `takt /clear` | | Clear agent conversation sessions (reset state) | +| `takt /eject` | | Copy builtin workflow/agents to `~/.takt/` for customization | | `takt /refresh-builtin` | | Update builtin resources from `resources/` to `~/.takt/` | | `takt /help` | | Show help message | | `takt /config` | | Display current configuration | +GitHub issue references: `takt #6` fetches issue #6 and executes it as a task. + ## Architecture ### Core Flow ``` CLI (cli.ts) - → Slash commands (/run-tasks, /watch, /add-task, /list-tasks, /switch, /clear, /refresh-builtin, /help, /config) - → or executeTask() + → Slash commands or executeTask() → WorkflowEngine (workflow/engine.ts) - → runAgent() (agents/runner.ts) - → callClaude() (claude/client.ts) - → executeClaudeCli() (claude/process.ts) - → ClaudeProcess (claude-agent-sdk) + → Per step: 3-phase execution + Phase 1: runAgent() → main work + Phase 2: runReportPhase() → report output (if step.report defined) + Phase 3: runStatusJudgmentPhase() → status tag output (if tag-based rules) + → detectMatchedRule() → rule evaluation → determineNextStep() + → Parallel steps: Promise.all() for sub-steps, aggregate evaluation ``` +### Three-Phase Step Execution + +Each step executes in up to 3 phases (session is resumed across phases): + +| Phase | Purpose | Tools | When | +|-------|---------|-------|------| +| Phase 1 | Main work (coding, review, etc.) | Step's allowed_tools (Write excluded if report defined) | Always | +| Phase 2 | Report output | Write only | When `step.report` is defined | +| Phase 3 | Status judgment | None (judgment only) | When step has tag-based rules | + +Phase 2/3 are implemented in `src/workflow/phase-runner.ts`. The session is resumed so the agent retains context from Phase 1. + +### Rule Evaluation (5-Stage Fallback) + +After step execution, rules are evaluated to determine the next step. Evaluation order (first match wins): + +1. **Aggregate** (`all()`/`any()`) - For parallel parent steps +2. **Phase 3 tag** - `[STEP:N]` tag from status judgment output +3. **Phase 1 tag** - `[STEP:N]` tag from main execution output (fallback) +4. **AI judge (ai() only)** - AI evaluates `ai("condition text")` rules +5. **AI judge fallback** - AI evaluates ALL conditions as final resort + +Implemented in `src/workflow/rule-evaluator.ts`. The matched method is tracked as `RuleMatchMethod` type. + ### Key Components **WorkflowEngine** (`src/workflow/engine.ts`) - State machine that orchestrates agent execution via EventEmitter -- Manages step transitions based on agent response status +- Manages step transitions based on rule evaluation results - Emits events: `step:start`, `step:complete`, `step:blocked`, `step:loop_detected`, `workflow:complete`, `workflow:abort`, `iteration:limit` - Supports loop detection (`LoopDetector`) and iteration limits - Maintains agent sessions per step for conversation continuity +- Parallel step execution via `runParallelStep()` with `Promise.all()` + +**Instruction Builder** (`src/workflow/instruction-builder.ts`) +- Auto-injects standard sections into every instruction (no need for `{task}` or `{previous_response}` placeholders in templates): + 1. Execution context (working dir, edit permission rules) + 2. Workflow context (iteration counts, report dir) + 3. User request (`{task}` — auto-injected unless placeholder present) + 4. Previous response (auto-injected if `pass_previous_response: true`) + 5. User inputs (auto-injected unless `{user_inputs}` placeholder present) + 6. `instruction_template` content + 7. Status output rules (auto-injected for tag-based rules) +- Localized for `en` and `ja` **Agent Runner** (`src/agents/runner.ts`) - Resolves agent specs (name or path) to agent configurations @@ -63,17 +103,16 @@ CLI (cli.ts) - `supervisor`: Read/Glob/Grep/Bash/WebSearch/WebFetch - `planner`: Read/Glob/Grep/Bash/WebSearch/WebFetch - Custom agents via `.takt/agents.yaml` or prompt files (.md) -- Supports Claude Code agents (`claudeAgent`) and skills (`claudeSkill`) **Claude Integration** (`src/claude/`) -- `client.ts` - High-level API: `callClaude()`, `callClaudeCustom()`, `callClaudeAgent()`, `callClaudeSkill()`, status detection via regex patterns -- `process.ts` - SDK wrapper with `ClaudeProcess` class, re-exports query management +- `client.ts` - High-level API: `callClaude()`, `callClaudeCustom()`, `callClaudeAgent()`, `callClaudeSkill()` +- `process.ts` - SDK wrapper with `ClaudeProcess` class - `executor.ts` - Query execution using `@anthropic-ai/claude-agent-sdk` - `query-manager.ts` - Concurrent query tracking with query IDs **Configuration** (`src/config/`) - `loader.ts` - Custom agent loading from `.takt/agents.yaml` -- `workflowLoader.ts` - YAML workflow parsing with Zod validation (loads from `~/.takt/workflows/` only) +- `workflowLoader.ts` - YAML workflow parsing with Zod validation; resolves user workflows (`~/.takt/workflows/`) with builtin fallback (`resources/global/{lang}/workflows/`) - `agentLoader.ts` - Agent prompt file loading - `paths.ts` - Directory structure (`.takt/`, `~/.takt/`), session management @@ -82,74 +121,123 @@ CLI (cli.ts) - `watcher.ts` - TaskWatcher class for polling and auto-executing tasks (used by `/watch`) - `index.ts` - Task operations (getNextTask, completeTask, addTask) +**GitHub Integration** (`src/github/issue.ts`) +- Fetches issues via `gh` CLI, formats as task text with title/body/labels/comments + ### Data Flow -1. User provides task or slash command → CLI -2. CLI loads workflow from `~/.takt/workflows/{name}.yaml` -3. WorkflowEngine starts at `initialStep` -4. Each step: `buildInstruction()` → `runStep()` → `runAgent()` → `callClaude()` → detect status → `determineNextStep()` -5. Status patterns (regex in `statusPatterns`) determine next step via `transitions` +1. User provides task (text or `#N` issue reference) or slash command → CLI +2. CLI loads workflow: user `~/.takt/workflows/` → builtin `resources/global/{lang}/workflows/` fallback +3. WorkflowEngine starts at `initial_step` +4. Each step: `buildInstruction()` → Phase 1 (main) → Phase 2 (report) → Phase 3 (status) → `detectMatchedRule()` → `determineNextStep()` +5. Rule evaluation determines next step name 6. Special transitions: `COMPLETE` ends workflow successfully, `ABORT` ends with failure -### Status Detection - -Agents output status markers (e.g., `[CODER:DONE]`) that are matched against `GENERIC_STATUS_PATTERNS` in `src/models/schemas.ts`. Common statuses: `done`, `blocked`, `approved`, `rejected`, `improve`, `in_progress`, `interrupted`. - ## Directory Structure ``` ~/.takt/ # Global user config (created on first run) config.yaml # Trusted dirs, default workflow, log level, language - workflows/ # Workflow YAML files (required location) - agents/ # Agent prompt files (.md) + workflows/ # User workflow YAML files (override builtins) + agents/ # User agent prompt files (.md) .takt/ # Project-level config agents.yaml # Custom agent definitions tasks/ # Task files for /run-tasks reports/ # Execution reports (auto-generated) - logs/ # Session logs (gitignored) + logs/ # Session logs in NDJSON format (gitignored) -resources/ # Bundled defaults (copied to ~/.takt on init) +resources/ # Bundled defaults (builtin, read from dist/ at runtime) global/ en/ # English agents and workflows ja/ # Japanese agents and workflows ``` +Builtin resources are embedded in the npm package (`dist/resources/`). User files in `~/.takt/` take priority. Use `/eject` to copy builtins to `~/.takt/` for customization. + ## Workflow YAML Schema ```yaml name: workflow-name description: Optional description -max_iterations: 10 # snake_case in YAML +max_iterations: 10 +initial_step: plan # First step to execute steps: + # Normal step - name: step-name - agent: ~/.takt/agents/default/coder.md # Path to agent prompt - agent_name: coder # Display name (optional) - provider: codex # claude|codex (optional) - model: opus # Model name (optional) + agent: ../agents/default/coder.md # Path to agent prompt + agent_name: coder # Display name (optional) + provider: codex # claude|codex (optional) + model: opus # Model name (optional) + edit: true # Whether step can edit files + permission_mode: acceptEdits # Tool permission mode (optional) instruction_template: | - {task} - {previous_response} - pass_previous_response: true # Default: true - transitions: - - condition: done - next_step: next-step + Custom instructions for this step. + {task}, {previous_response} are auto-injected if not present as placeholders. + pass_previous_response: true # Default: true + report: + name: 01-plan.md # Report file name + format: | # Report format template + # Plan Report + ... + rules: + - condition: "Human-readable condition" + next: next-step-name + - condition: ai("AI evaluates this condition text") + next: other-step - condition: blocked - next_step: ABORT - on_no_status: complete # complete|continue|stay + next: ABORT + + # Parallel step (sub-steps execute concurrently) + - name: reviewers + parallel: + - name: arch-review + agent: ../agents/default/architecture-reviewer.md + rules: + - condition: approved # next is optional for sub-steps + - condition: needs_fix + instruction_template: | + Review architecture... + - name: security-review + agent: ../agents/default/security-reviewer.md + rules: + - condition: approved + - condition: needs_fix + instruction_template: | + Review security... + rules: # Parent rules use aggregate conditions + - condition: all("approved") + next: supervise + - condition: any("needs_fix") + next: fix ``` +Key points about parallel steps: +- Sub-step `rules` define possible outcomes but `next` is ignored (parent handles routing) +- Parent `rules` use `all("X")`/`any("X")` to aggregate sub-step results +- `all("X")`: true if ALL sub-steps matched condition X +- `any("X")`: true if ANY sub-step matched condition X + +### Rule Condition Types + +| Type | Syntax | Evaluation | +|------|--------|------------| +| Tag-based | `"condition text"` | Agent outputs `[STEP:N]` tag, matched by index | +| AI judge | `ai("condition text")` | AI evaluates condition against agent output | +| Aggregate | `all("X")` / `any("X")` | Aggregates parallel sub-step matched conditions | + ### Template Variables | Variable | Description | |----------|-------------| -| `{task}` | Original user request | -| `{iteration}` | Current iteration number | -| `{max_iterations}` | Maximum iterations | -| `{previous_response}` | Previous step output (requires `pass_previous_response: true`) | -| `{user_inputs}` | Accumulated user inputs during workflow | -| `{report_dir}` | Report directory name (e.g., `20250126-143052-task-summary`) | +| `{task}` | Original user request (auto-injected if not in template) | +| `{iteration}` | Workflow-wide iteration count | +| `{max_iterations}` | Maximum iterations allowed | +| `{step_iteration}` | Per-step iteration count | +| `{previous_response}` | Previous step output (auto-injected if not in template) | +| `{user_inputs}` | Accumulated user inputs (auto-injected if not in template) | +| `{report_dir}` | Report directory name | ### Model Resolution @@ -166,24 +254,34 @@ provider: claude model: opus # Default model for all steps (unless overridden) ``` +## NDJSON Session Logging + +Session logs use NDJSON (`.jsonl`) format for real-time append-only writes. Record types: + +| Record | Description | +|--------|-------------| +| `workflow_start` | Workflow initialization with task, workflow name | +| `step_start` | Step execution start | +| `step_complete` | Step result with status, content, matched rule info | +| `workflow_complete` | Successful completion | +| `workflow_abort` | Abort with reason | + +Files: `.takt/logs/{sessionId}.jsonl`, with `latest.json` pointer. Legacy `.json` format is still readable via `loadSessionLog()`. + ## TypeScript Notes - ESM modules with `.js` extensions in imports - Strict TypeScript with `noUncheckedIndexedAccess` -- Zod schemas (v4 syntax) for runtime validation (`src/models/schemas.ts`) +- Zod schemas for runtime validation (`src/models/schemas.ts`) - Uses `@anthropic-ai/claude-agent-sdk` for Claude integration ## Design Principles **Keep commands minimal.** One command per concept. Use arguments/modes instead of multiple similar commands. Before adding a new command, consider if existing commands can be extended. -**Do NOT expand schemas carelessly.** The `TransitionConditionSchema` defines allowed condition values for workflow transitions. Do NOT add new values without strong justification. Use existing values creatively: -- `done` - Task completed (minor fixes, successful completion) -- `blocked` - Cannot proceed (needs plan rework) -- `approved` - Review passed -- `rejected` - Review failed, needs major rework -- `improve` - Needs improvement (security concerns, quality issues) -- `always` - Unconditional transition +**Do NOT expand schemas carelessly.** Rule conditions are free-form text (not enum-restricted). However, the engine's behavior depends on specific patterns (`ai()`, `all()`, `any()`). Do not add new special syntax without updating the loader's regex parsing in `workflowLoader.ts`. + +**Instruction auto-injection over explicit placeholders.** The instruction builder auto-injects `{task}`, `{previous_response}`, `{user_inputs}`, and status rules. Templates should contain only step-specific instructions, not boilerplate. ## Isolated Execution (Shared Clone) diff --git a/README.md b/README.md index daa2c9d..4394d0e 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,9 @@ npm install -g takt # Run a task (will prompt for workflow selection and optional isolated clone) takt "Add a login feature" +# Run a GitHub issue as a task +takt "#6" + # Add a task to the queue takt /add-task "Fix the login bug" @@ -75,7 +78,7 @@ Choose `y` to run in a `git clone --shared` isolated environment, keeping your w | Workflow | Best for | |----------|----------| -| `default` | Full development tasks. Used for TAKT's own development. Multi-stage review with fix loops. | +| `default` | Full development tasks. Used for TAKT's own development. Multi-stage review with parallel architect + security review. | | `simple` | Lightweight tasks like README updates or small fixes. Reviews without fix loops. | | `expert-review` / `expert-cqrs` | Web development projects. Multi-expert review (CQRS, Frontend, Security, QA). | | `research` | Research and investigation. Autonomous research without asking questions. | @@ -86,67 +89,111 @@ Choose `y` to run in a `git clone --shared` isolated environment, keeping your w | Command | Alias | Description | |---------|-------|-------------| | `takt "task"` | | Execute task with current workflow (session auto-continued) | +| `takt "#N"` | | Execute GitHub issue #N as a task | | `takt /run-tasks` | `/run` | Run all pending tasks from `.takt/tasks/` | | `takt /watch` | | Watch `.takt/tasks/` and auto-execute tasks (stays resident) | | `takt /add-task` | `/add` | Add a new task interactively (YAML format, multiline supported) | | `takt /list-tasks` | `/list` | List task branches (try merge, merge & cleanup, or delete) | | `takt /switch` | `/sw` | Switch workflow interactively | | `takt /clear` | | Clear agent conversation sessions | +| `takt /eject` | | Copy builtin workflow/agents to `~/.takt/` for customization | | `takt /refresh-builtin` | | Update builtin agents/workflows to latest version | | `takt /config` | | Configure permission mode | | `takt /help` | | Show help | ## Workflows -TAKT uses YAML-based workflow definitions. Place them in: -- `~/.takt/workflows/*.yaml` +TAKT uses YAML-based workflow definitions with rule-based routing. Builtin workflows are embedded in the package; user workflows in `~/.takt/workflows/` take priority. Use `/eject` to copy a builtin to `~/.takt/` for customization. ### Example Workflow ```yaml name: default max_iterations: 10 +initial_step: plan steps: - name: plan - agent: planner - provider: claude # Optional: claude or codex - model: opus # Claude: opus/sonnet/haiku, Codex: gpt-5.2-codex/gpt-5.1-codex + agent: ../agents/default/planner.md + model: opus + edit: false + rules: + - condition: Plan complete + next: implement instruction_template: | - {task} - transitions: - - condition: done - next_step: implement + Analyze the request and create an implementation plan. - name: implement - agent: coder - provider: codex - model: gpt-5.2-codex # Codex model example + agent: ../agents/default/coder.md + edit: true + permission_mode: acceptEdits + rules: + - condition: Implementation complete + next: review + - condition: Cannot proceed + next: ABORT instruction_template: | - {task} - transitions: - - condition: done - next_step: review - - condition: blocked - next_step: ABORT + Implement based on the plan. - name: review - agent: architect - model: sonnet # Model alias (no provider = uses global default) - transitions: - - condition: approved - next_step: COMPLETE - - condition: rejected - next_step: implement + agent: ../agents/default/architecture-reviewer.md + edit: false + rules: + - condition: Approved + next: COMPLETE + - condition: Needs fix + next: implement + instruction_template: | + Review the implementation for architecture and code quality. ``` +### Parallel Steps + +Steps can execute sub-steps concurrently with aggregate evaluation: + +```yaml + - name: reviewers + parallel: + - name: arch-review + agent: ../agents/default/architecture-reviewer.md + rules: + - condition: approved + - condition: needs_fix + instruction_template: | + Review architecture and code quality. + - name: security-review + agent: ../agents/default/security-reviewer.md + rules: + - condition: approved + - condition: needs_fix + instruction_template: | + Review for security vulnerabilities. + rules: + - condition: all("approved") + next: supervise + - condition: any("needs_fix") + next: fix +``` + +- `all("X")`: true if ALL sub-steps matched condition X +- `any("X")`: true if ANY sub-step matched condition X +- Sub-step `rules` define possible outcomes; `next` is optional (parent handles routing) + +### Rule Condition Types + +| Type | Syntax | Description | +|------|--------|-------------| +| Tag-based | `"condition text"` | Agent outputs `[STEP:N]` tag, matched by index | +| AI judge | `ai("condition text")` | AI evaluates the condition against agent output | +| Aggregate | `all("X")` / `any("X")` | Aggregates parallel sub-step results | + ## Built-in Workflows TAKT ships with several built-in workflows: | Workflow | Description | |----------|-------------| -| `default` | Full development workflow: plan → implement → architect review → AI review → security review → supervisor approval. Includes fix loops for each review stage. | +| `default` | Full development workflow: plan → implement → AI review → parallel reviewers (architect + security) → supervisor approval. Includes fix loops for each review stage. | | `simple` | Simplified version of default: plan → implement → architect review → AI review → supervisor. No intermediate fix steps. | | `research` | Research workflow: planner → digger → supervisor. Autonomously researches topics without asking questions. | | `expert-review` | Comprehensive review with domain experts: CQRS+ES, Frontend, AI, Security, QA reviews with fix loops. | @@ -158,9 +205,9 @@ Switch between workflows with `takt /switch`. ## Built-in Agents - **coder** - Implements features and fixes bugs -- **architect** - Reviews code and provides feedback -- **supervisor** - Final verification and approval -- **planner** - Task analysis and implementation planning +- **architect** - Reviews architecture and code quality, verifies spec compliance +- **supervisor** - Final verification, validation, and approval +- **planner** - Task analysis, spec investigation, and implementation planning - **ai-reviewer** - AI-generated code quality review - **security** - Security vulnerability assessment @@ -175,14 +222,19 @@ agents: allowed_tools: [Read, Glob, Grep] provider: claude # Optional: claude or codex model: opus # Claude: opus/sonnet/haiku or full name (claude-opus-4-5-20251101) - status_patterns: - approved: "\\[APPROVE\\]" - rejected: "\\[REJECT\\]" +``` - - name: my-codex-agent - prompt_file: .takt/prompts/analyzer.md - provider: codex - model: gpt-5.2-codex # Codex: gpt-5.2-codex, gpt-5.1-codex, etc. +Or create agent prompt files as Markdown: + +```markdown +# ~/.takt/agents/my-agents/reviewer.md + +You are a code reviewer focused on security. + +## Your Role +- Check for security vulnerabilities +- Verify input validation +- Review authentication logic ``` ## Model Selection @@ -217,22 +269,22 @@ Available Codex models: ``` ~/.takt/ ├── config.yaml # Global config (provider, model, workflows, etc.) -├── workflows/ # Workflow definitions -└── agents/ # Agent prompt files +├── workflows/ # User workflow definitions (override builtins) +└── agents/ # User agent prompt files .takt/ # Project-level config ├── agents.yaml # Custom agent definitions ├── tasks/ # Pending task files (.yaml, .md) ├── completed/ # Completed tasks with reports -├── worktree-meta/ # Metadata for task branches -├── worktree-sessions/ # Per-clone agent session storage ├── reports/ # Execution reports (auto-generated) -└── logs/ # Session logs (incremental) +└── logs/ # Session logs in NDJSON format ├── latest.json # Pointer to current/latest session ├── previous.json # Pointer to previous session - └── {sessionId}.json # Full session log per workflow run + └── {sessionId}.jsonl # NDJSON session log per workflow run ``` +Builtin resources are embedded in the npm package (`dist/resources/`). User files in `~/.takt/` take priority. + ### Global Configuration Configure default provider and model in `~/.takt/config.yaml`: @@ -268,67 +320,59 @@ This interactive flow ensures each task runs with the right workflow and isolati ### Adding Custom Workflows -Create your own workflow by adding YAML files to `~/.takt/workflows/`: +Create your own workflow by adding YAML files to `~/.takt/workflows/`, or use `/eject` to customize a builtin: + +```bash +# Copy the default workflow to ~/.takt/workflows/ for editing +takt /eject default +``` ```yaml # ~/.takt/workflows/my-workflow.yaml name: my-workflow description: My custom workflow - max_iterations: 5 +initial_step: analyze steps: - name: analyze agent: ~/.takt/agents/my-agents/analyzer.md + edit: false + rules: + - condition: Analysis complete + next: implement instruction_template: | - Analyze this request: {task} - transitions: - - condition: done - next_step: implement + Analyze this request thoroughly. - name: implement agent: ~/.takt/agents/default/coder.md - instruction_template: | - Implement based on the analysis: {previous_response} + edit: true + permission_mode: acceptEdits pass_previous_response: true - transitions: - - condition: done - next_step: COMPLETE + rules: + - condition: Done + next: COMPLETE + instruction_template: | + Implement based on the analysis. ``` +> **Note**: `{task}`, `{previous_response}`, and `{user_inputs}` are auto-injected into instructions. You only need explicit placeholders if you want to control their position in the template. + ### Specifying Agents by Path Agents are specified using file paths in workflow definitions: ```yaml -# Use built-in agents +# Relative to workflow file directory +agent: ../agents/default/coder.md + +# Home directory agent: ~/.takt/agents/default/coder.md -agent: ~/.takt/agents/magi/melchior.md -# Use project-local agents -agent: ./.takt/agents/my-reviewer.md - -# Use absolute paths +# Absolute paths agent: /path/to/custom/agent.md ``` -Create custom agent prompts as Markdown files: - -```markdown -# ~/.takt/agents/my-agents/reviewer.md - -You are a code reviewer focused on security. - -## Your Role -- Check for security vulnerabilities -- Verify input validation -- Review authentication logic - -## Output Format -- [REVIEWER:APPROVE] if code is secure -- [REVIEWER:REJECT] if issues found (list them) -``` - ### Task Management TAKT supports batch task processing through task files in `.takt/tasks/`. Both `.yaml`/`.yml` and `.md` file formats are supported. @@ -339,6 +383,9 @@ TAKT supports batch task processing through task files in `.takt/tasks/`. Both ` # Quick add (no isolation) takt /add-task "Add authentication feature" +# Add a GitHub issue as a task +takt /add-task "#6" + # Interactive mode (prompts for isolation, branch, workflow options) takt /add-task ``` @@ -416,11 +463,13 @@ Lists all `takt/`-prefixed branches with file change counts. For each branch you ### Session Logs -TAKT writes session logs incrementally to `.takt/logs/`. Logs are saved at workflow start, after each step, and at workflow end — so even if the process crashes mid-execution, partial logs are preserved. +TAKT writes session logs in NDJSON (`.jsonl`) format to `.takt/logs/`. Each record is appended atomically, so even if the process crashes mid-execution, partial logs are preserved and logs can be tailed in real-time with `tail -f`. - `.takt/logs/latest.json` - Pointer to the current (or most recent) session - `.takt/logs/previous.json` - Pointer to the previous session -- `.takt/logs/{sessionId}.json` - Full session log with step history +- `.takt/logs/{sessionId}.jsonl` - NDJSON session log with step history + +Record types: `workflow_start`, `step_start`, `step_complete`, `workflow_complete`, `workflow_abort`. Agents can read `previous.json` to pick up context from a prior run. Session continuity is automatic — simply run `takt "task"` to continue where the previous session left off. @@ -430,57 +479,48 @@ Available variables in `instruction_template`: | Variable | Description | |----------|-------------| -| `{task}` | Original user request | +| `{task}` | Original user request (auto-injected if not in template) | | `{iteration}` | Workflow-wide turn count (total steps executed) | | `{max_iterations}` | Maximum iterations allowed | | `{step_iteration}` | Per-step iteration count (how many times THIS step has run) | -| `{previous_response}` | Previous step's output (requires `pass_previous_response: true`) | -| `{user_inputs}` | Additional user inputs during workflow | +| `{previous_response}` | Previous step's output (auto-injected if not in template) | +| `{user_inputs}` | Additional user inputs during workflow (auto-injected if not in template) | | `{report_dir}` | Report directory name (e.g., `20250126-143052-task-summary`) | ### Designing Workflows -Each workflow step requires three key elements: +Each workflow step requires: **1. Agent** - A Markdown file containing the system prompt: ```yaml -agent: ~/.takt/agents/default/coder.md # Path to agent prompt file -agent_name: coder # Display name (optional) +agent: ../agents/default/coder.md # Path to agent prompt file +agent_name: coder # Display name (optional) ``` -**2. Status Rules** - Define how the agent signals completion. Agents output status markers like `[CODER:DONE]` or `[ARCHITECT:REJECT]` that TAKT detects to drive transitions: +**2. Rules** - Define how the step routes to the next step. The instruction builder auto-injects status output rules so agents know what tags to output: ```yaml -status_rules_prompt: | - Your final output MUST include a status tag: - - `[CODER:DONE]` if implementation is complete - - `[CODER:BLOCKED]` if you cannot proceed +rules: + - condition: "Implementation complete" + next: review + - condition: "Cannot proceed" + next: ABORT ``` -**3. Transitions** - Route to the next step based on status: +Special `next` values: `COMPLETE` (success), `ABORT` (failure). -```yaml -transitions: - - condition: done # Maps to status tag DONE - next_step: review # Go to review step - - condition: blocked # Maps to status tag BLOCKED - next_step: ABORT # End workflow with failure -``` - -Available transition conditions: `done`, `blocked`, `approved`, `rejected`, `improve`, `answer`, `always`. -Special next_step values: `COMPLETE` (success), `ABORT` (failure). - -**Step options:** +**3. Step options:** | Option | Default | Description | |--------|---------|-------------| +| `edit` | - | Whether the step can edit project files (`true`/`false`) | | `pass_previous_response` | `true` | Pass previous step's output to `{previous_response}` | -| `on_no_status` | - | Behavior when no status is detected: `complete`, `continue`, `stay` | | `allowed_tools` | - | List of tools the agent can use (Read, Glob, Grep, Edit, Write, Bash, etc.) | | `provider` | - | Override provider for this step (`claude` or `codex`) | | `model` | - | Override model for this step | | `permission_mode` | `default` | Permission mode: `default`, `acceptEdits`, or `bypassPermissions` | +| `report` | - | Report file configuration (name, format) for auto-generated reports | ## API Usage From e657211591b278f7c7451b888eba1d5a93725824 Mon Sep 17 00:00:00 2001 From: nrslib <38722970+nrslib@users.noreply.github.com> Date: Fri, 30 Jan 2026 21:01:38 +0900 Subject: [PATCH 2/3] update ja --- docs/README.ja.md | 394 ++++++++++++++++++++++++++-------------------- 1 file changed, 226 insertions(+), 168 deletions(-) diff --git a/docs/README.ja.md b/docs/README.ja.md index 9a0bb3d..f04bb38 100644 --- a/docs/README.ja.md +++ b/docs/README.ja.md @@ -22,6 +22,9 @@ npm install -g takt # タスクを実行(ワークフロー選択プロンプトが表示されます) takt "ログイン機能を追加して" +# GitHub Issueをタスクとして実行 +takt "#6" + # タスクをキューに追加 takt /add-task "ログインのバグを修正" @@ -40,7 +43,7 @@ takt /switch ### タスク実行の流れ -`takt "ログイン機能を追加して"` を実行すると、以下の対話フローが表示されます: +`takt "ログイン機能を追加して"` を実行すると、以下の対話フローが表示されます: **1. ワークフロー選択** @@ -71,7 +74,7 @@ Select workflow: | ワークフロー | おすすめ用途 | |------------|------------| -| `default` | 本格的な開発タスク。TAKT自身の開発で使用。修正ループ付きの多段階レビュー。 | +| `default` | 本格的な開発タスク。TAKT自身の開発で使用。アーキテクト+セキュリティの並列レビュー付き多段階レビュー。 | | `simple` | README更新や小さな修正などの軽量タスク。レビューはあるが修正ループなし。 | | `expert-review` / `expert-cqrs` | Web開発プロジェクト。マルチエキスパートレビュー(CQRS、フロントエンド、セキュリティ、QA)。 | | `research` | 調査・リサーチ。質問せずに自律的にリサーチを実行。 | @@ -82,16 +85,175 @@ Select workflow: | コマンド | エイリアス | 説明 | |---------|-----------|------| | `takt "タスク"` | | 現在のワークフローでタスクを実行(セッション自動継続) | +| `takt "#N"` | | GitHub Issue #Nをタスクとして実行 | | `takt /run-tasks` | `/run` | `.takt/tasks/` の保留中タスクをすべて実行 | | `takt /watch` | | `.takt/tasks/` を監視してタスクを自動実行(常駐プロセス) | | `takt /add-task` | `/add` | 新しいタスクを対話的に追加(YAML形式、複数行対応) | | `takt /list-tasks` | `/list` | タスクブランチ一覧(マージ・削除) | | `takt /switch` | `/sw` | ワークフローを対話的に切り替え | | `takt /clear` | | エージェントの会話セッションをクリア | +| `takt /eject` | | ビルトインのワークフロー/エージェントを`~/.takt/`にコピーしてカスタマイズ | | `takt /refresh-builtin` | | ビルトインのエージェント/ワークフローを最新版に更新 | | `takt /config` | | パーミッションモードを設定 | | `takt /help` | | ヘルプを表示 | +## ワークフロー + +TAKTはYAMLベースのワークフロー定義とルールベースルーティングを使用します。ビルトインワークフローはパッケージに埋め込まれており、`~/.takt/workflows/` のユーザーワークフローが優先されます。`/eject` でビルトインを`~/.takt/`にコピーしてカスタマイズできます。 + +### ワークフローの例 + +```yaml +name: default +max_iterations: 10 +initial_step: plan + +steps: + - name: plan + agent: ../agents/default/planner.md + model: opus + edit: false + rules: + - condition: 計画完了 + next: implement + instruction_template: | + リクエストを分析し、実装計画を作成してください。 + + - name: implement + agent: ../agents/default/coder.md + edit: true + permission_mode: acceptEdits + rules: + - condition: 実装完了 + next: review + - condition: 進行不可 + next: ABORT + instruction_template: | + 計画に基づいて実装してください。 + + - name: review + agent: ../agents/default/architecture-reviewer.md + edit: false + rules: + - condition: 承認 + next: COMPLETE + - condition: 修正が必要 + next: implement + instruction_template: | + アーキテクチャとコード品質の観点で実装をレビューしてください。 +``` + +### パラレルステップ + +ステップ内でサブステップを並列実行し、集約条件で評価できます: + +```yaml + - name: reviewers + parallel: + - name: arch-review + agent: ../agents/default/architecture-reviewer.md + rules: + - condition: approved + - condition: needs_fix + instruction_template: | + アーキテクチャとコード品質をレビューしてください。 + - name: security-review + agent: ../agents/default/security-reviewer.md + rules: + - condition: approved + - condition: needs_fix + instruction_template: | + セキュリティ脆弱性をレビューしてください。 + rules: + - condition: all("approved") + next: supervise + - condition: any("needs_fix") + next: fix +``` + +- `all("X")`: すべてのサブステップが条件Xにマッチした場合にtrue +- `any("X")`: いずれかのサブステップが条件Xにマッチした場合にtrue +- サブステップの `rules` は可能な結果を定義しますが、`next` は省略可能(親が遷移を制御) + +### ルール条件の種類 + +| 種類 | 構文 | 説明 | +|------|------|------| +| タグベース | `"条件テキスト"` | エージェントが `[STEP:N]` タグを出力し、インデックスでマッチ | +| AI判定 | `ai("条件テキスト")` | AIが条件をエージェント出力に対して評価 | +| 集約 | `all("X")` / `any("X")` | パラレルサブステップの結果を集約 | + +## ビルトインワークフロー + +TAKTには複数のビルトインワークフローが同梱されています: + +| ワークフロー | 説明 | +|------------|------| +| `default` | フル開発ワークフロー: 計画 → 実装 → AIレビュー → 並列レビュー(アーキテクト+セキュリティ)→ スーパーバイザー承認。各レビュー段階に修正ループあり。 | +| `simple` | defaultの簡略版: 計画 → 実装 → アーキテクトレビュー → AIレビュー → スーパーバイザー。中間の修正ステップなし。 | +| `research` | リサーチワークフロー: プランナー → ディガー → スーパーバイザー。質問せずに自律的にリサーチを実行。 | +| `expert-review` | ドメインエキスパートによる包括的レビュー: CQRS+ES、フロントエンド、AI、セキュリティ、QAレビューと修正ループ。 | +| `expert-cqrs` | CQRS+ES、フロントエンド、AI、セキュリティ、QA専門のエキスパートレビュー。計画 → 実装 → マルチエキスパートレビュー → スーパーバイザー。 | +| `magi` | エヴァンゲリオンにインスパイアされた審議システム。3つのAIペルソナ(MELCHIOR、BALTHASAR、CASPER)が分析し投票。 | + +`takt /switch` でワークフローを切り替えられます。 + +## ビルトインエージェント + +- **coder** - 機能を実装しバグを修正 +- **architect** - アーキテクチャとコード品質をレビュー、仕様準拠を検証 +- **supervisor** - 最終検証、バリデーション、承認 +- **planner** - タスク分析、仕様調査、実装計画 +- **ai-reviewer** - AI生成コードの品質レビュー +- **security** - セキュリティ脆弱性の評価 + +## カスタムエージェント + +`.takt/agents.yaml`でカスタムエージェントを定義: + +```yaml +agents: + - name: my-reviewer + prompt_file: .takt/prompts/reviewer.md + allowed_tools: [Read, Glob, Grep] + provider: claude # オプション: claude または codex + model: opus # Claude: opus/sonnet/haiku、Codex: gpt-5.2-codex 等 +``` + +またはMarkdownファイルでエージェントプロンプトを作成: + +```markdown +# ~/.takt/agents/my-agents/reviewer.md + +あなたはセキュリティに特化したコードレビュアーです。 + +## 役割 +- セキュリティ脆弱性をチェック +- 入力バリデーションを検証 +- 認証ロジックをレビュー +``` + +## プロジェクト構造 + +``` +~/.takt/ +├── config.yaml # グローバル設定(プロバイダー、モデル、ワークフロー等) +├── workflows/ # ユーザーワークフロー定義(ビルトインを上書き) +└── agents/ # ユーザーエージェントプロンプトファイル + +.takt/ # プロジェクトレベルの設定 +├── agents.yaml # カスタムエージェント定義 +├── tasks/ # 保留中のタスクファイル(.yaml, .md) +├── completed/ # 完了したタスクとレポート +├── reports/ # 実行レポート(自動生成) +└── logs/ # NDJSON形式のセッションログ + ├── latest.json # 現在/最新セッションへのポインタ + ├── previous.json # 前回セッションへのポインタ + └── {sessionId}.jsonl # ワークフロー実行ごとのNDJSONセッションログ +``` + +ビルトインリソースはnpmパッケージ(`dist/resources/`)に埋め込まれています。`~/.takt/` のユーザーファイルが優先されます。 + ## 実践的な使い方ガイド ### タスク管理 @@ -104,13 +266,16 @@ TAKTは`.takt/tasks/`内のタスクファイルによるバッチ処理をサ # クイック追加(隔離なし) takt /add-task "認証機能を追加" +# GitHub Issueをタスクとして追加 +takt /add-task "#6" + # 対話モード(隔離実行、ブランチ、ワークフローオプションを指定可能) takt /add-task ``` #### タスクファイルの形式 -**YAML形式**(推奨、worktree/branch/workflowオプション対応): +**YAML形式**(推奨、worktree/branch/workflowオプション対応): ```yaml # .takt/tasks/add-auth.yaml @@ -120,14 +285,14 @@ branch: "feat/add-auth" # ブランチ名(省略時は自動生成) workflow: "default" # ワークフロー指定(省略時は現在のもの) ``` -**Markdown形式**(シンプル、後方互換): +**Markdown形式**(シンプル、後方互換): ```markdown # .takt/tasks/add-login-feature.md アプリケーションにログイン機能を追加する。 -要件: +要件: - ユーザー名とパスワードフィールド - フォームバリデーション - 失敗時のエラーハンドリング @@ -135,7 +300,7 @@ workflow: "default" # ワークフロー指定(省略時は現在 #### 共有クローンによる隔離実行 -YAMLタスクファイルで`worktree`を指定すると、各タスクを`git clone --shared`で作成した隔離クローンで実行し、メインの作業ディレクトリをクリーンに保てます: +YAMLタスクファイルで`worktree`を指定すると、各タスクを`git clone --shared`で作成した隔離クローンで実行し、メインの作業ディレクトリをクリーンに保てます: - `worktree: true` - 隣接ディレクトリ(または`worktree_dir`設定で指定した場所)に共有クローンを自動作成 - `worktree: "/path/to/dir"` - 指定パスに作成 @@ -162,7 +327,7 @@ takt /run-tasks takt /watch ``` -ウォッチモードは`.takt/tasks/`をポーリングし、新しいタスクファイルが現れると自動実行します。`Ctrl+C`で停止する常駐プロセスです。以下のような場合に便利です: +ウォッチモードは`.takt/tasks/`をポーリングし、新しいタスクファイルが現れると自動実行します。`Ctrl+C`で停止する常駐プロセスです。以下のような場合に便利です: - タスクファイルを生成するCI/CDパイプライン - 外部プロセスがタスクを追加する自動化ワークフロー - タスクを順次キューイングする長時間の開発セッション @@ -173,7 +338,7 @@ takt /watch takt /list-tasks ``` -`takt/`プレフィックスのブランチをファイル変更数とともに一覧表示します。各ブランチに対して以下の操作が可能です: +`takt/`プレフィックスのブランチをファイル変更数とともに一覧表示します。各ブランチに対して以下の操作が可能です: - **Try merge** - mainにスカッシュマージ(変更をステージングのみ、コミットなし) - **Instruct** - 一時クローン経由で追加指示を与える - **Merge & cleanup** - マージしてブランチを削除 @@ -181,226 +346,119 @@ takt /list-tasks ### セッションログ -TAKTはセッションログを`.takt/logs/`にインクリメンタルに書き込みます。ログはワークフロー開始時、各ステップ完了後、ワークフロー終了時に保存されるため、プロセスが途中でクラッシュしても部分的なログが保持されます。 +TAKTはセッションログをNDJSON(`.jsonl`)形式で`.takt/logs/`に書き込みます。各レコードはアトミックに追記されるため、プロセスが途中でクラッシュしても部分的なログが保持され、`tail -f`でリアルタイムに追跡できます。 - `.takt/logs/latest.json` - 現在(または最新の)セッションへのポインタ - `.takt/logs/previous.json` - 前回セッションへのポインタ -- `.takt/logs/{sessionId}.json` - ワークフロー実行ごとの完全なセッションログ +- `.takt/logs/{sessionId}.jsonl` - ワークフロー実行ごとのNDJSONセッションログ + +レコード種別: `workflow_start`, `step_start`, `step_complete`, `workflow_complete`, `workflow_abort` エージェントは`previous.json`を読み取って前回の実行コンテキストを引き継ぐことができます。セッション継続は自動的に行われます — `takt "タスク"`を実行するだけで前回のセッションから続行されます。 ### カスタムワークフローの追加 -`~/.takt/workflows/`にYAMLファイルを追加して独自のワークフローを作成できます: +`~/.takt/workflows/`にYAMLファイルを追加するか、`/eject`でビルトインをカスタマイズします: + +```bash +# defaultワークフローを~/.takt/workflows/にコピーして編集 +takt /eject default +``` ```yaml # ~/.takt/workflows/my-workflow.yaml name: my-workflow description: カスタムワークフロー - max_iterations: 5 +initial_step: analyze steps: - name: analyze agent: ~/.takt/agents/my-agents/analyzer.md + edit: false + rules: + - condition: 分析完了 + next: implement instruction_template: | - このリクエストを分析してください: {task} - transitions: - - condition: done - next_step: implement + このリクエストを徹底的に分析してください。 - name: implement agent: ~/.takt/agents/default/coder.md - instruction_template: | - 分析に基づいて実装してください: {previous_response} + edit: true + permission_mode: acceptEdits pass_previous_response: true - transitions: - - condition: done - next_step: COMPLETE + rules: + - condition: 完了 + next: COMPLETE + instruction_template: | + 分析に基づいて実装してください。 ``` +> **Note**: `{task}`、`{previous_response}`、`{user_inputs}` は自動的にインストラクションに注入されます。テンプレート内での位置を制御したい場合のみ、明示的なプレースホルダーが必要です。 + ### エージェントをパスで指定する -ワークフロー定義ではファイルパスを使ってエージェントを指定します: +ワークフロー定義ではファイルパスを使ってエージェントを指定します: ```yaml -# ビルトインエージェントを使用 +# ワークフローファイルからの相対パス +agent: ../agents/default/coder.md + +# ホームディレクトリ agent: ~/.takt/agents/default/coder.md -agent: ~/.takt/agents/magi/melchior.md -# プロジェクトローカルのエージェントを使用 -agent: ./.takt/agents/my-reviewer.md - -# 絶対パスを使用 +# 絶対パス agent: /path/to/custom/agent.md ``` -カスタムエージェントプロンプトをMarkdownファイルとして作成: - -```markdown -# ~/.takt/agents/my-agents/reviewer.md - -あなたはセキュリティに特化したコードレビュアーです。 - -## 役割 -- セキュリティ脆弱性をチェック -- 入力バリデーションを検証 -- 認証ロジックをレビュー - -## 出力形式 -- [REVIEWER:APPROVE] コードが安全な場合 -- [REVIEWER:REJECT] 問題が見つかった場合(問題点をリストアップ) -``` - ### ワークフロー変数 -`instruction_template`で使用可能な変数: +`instruction_template`で使用可能な変数: | 変数 | 説明 | |------|------| -| `{task}` | 元のユーザーリクエスト | +| `{task}` | 元のユーザーリクエスト(テンプレートになければ自動注入) | | `{iteration}` | ワークフロー全体のターン数(実行された全ステップ数) | | `{max_iterations}` | 最大イテレーション数 | | `{step_iteration}` | ステップごとのイテレーション数(このステップが実行された回数) | -| `{previous_response}` | 前のステップの出力(`pass_previous_response: true`が必要) | -| `{user_inputs}` | ワークフロー中の追加ユーザー入力 | -| `{report_dir}` | レポートディレクトリ名(例:`20250126-143052-task-summary`) | +| `{previous_response}` | 前のステップの出力(テンプレートになければ自動注入) | +| `{user_inputs}` | ワークフロー中の追加ユーザー入力(テンプレートになければ自動注入) | +| `{report_dir}` | レポートディレクトリ名(例: `20250126-143052-task-summary`) | ### ワークフローの設計 -各ワークフローステップには3つの重要な要素が必要です。 +各ワークフローステップに必要な要素: -**1. エージェント** - システムプロンプトを含むMarkdownファイル: +**1. エージェント** - システムプロンプトを含むMarkdownファイル: ```yaml -agent: ~/.takt/agents/default/coder.md # エージェントプロンプトファイルのパス -agent_name: coder # 表示名(オプション) +agent: ../agents/default/coder.md # エージェントプロンプトファイルのパス +agent_name: coder # 表示名(オプション) ``` -**2. ステータスルール** - エージェントが完了を通知する方法を定義。エージェントは`[CODER:DONE]`や`[ARCHITECT:REJECT]`のようなステータスマーカーを出力し、TAKTがそれを検出して遷移を駆動します: +**2. ルール** - ステップから次のステップへのルーティングを定義。インストラクションビルダーがステータス出力ルールを自動注入するため、エージェントはどのタグを出力すべきか把握できます: ```yaml -status_rules_prompt: | - 最終出力には必ずステータスタグを含めてください: - - `[CODER:DONE]` 実装が完了した場合 - - `[CODER:BLOCKED]` 進行できない場合 +rules: + - condition: "実装完了" + next: review + - condition: "進行不可" + next: ABORT ``` -**3. 遷移** - ステータスに基づいて次のステップにルーティング: +特殊な `next` 値: `COMPLETE`(成功)、`ABORT`(失敗) -```yaml -transitions: - - condition: done # ステータスタグDONEに対応 - next_step: review # reviewステップへ遷移 - - condition: blocked # ステータスタグBLOCKEDに対応 - next_step: ABORT # ワークフローを失敗終了 -``` - -使用可能な遷移条件:`done`、`blocked`、`approved`、`rejected`、`improve`、`answer`、`always` -特殊なnext_step値:`COMPLETE`(成功)、`ABORT`(失敗) - -**ステップオプション:** +**3. ステップオプション:** | オプション | デフォルト | 説明 | |-----------|-----------|------| +| `edit` | - | ステップがプロジェクトファイルを編集できるか(`true`/`false`) | | `pass_previous_response` | `true` | 前のステップの出力を`{previous_response}`に渡す | -| `on_no_status` | - | ステータス未検出時の動作:`complete`、`continue`、`stay` | | `allowed_tools` | - | エージェントが使用できるツール一覧(Read, Glob, Grep, Edit, Write, Bash等) | | `provider` | - | このステップのプロバイダーを上書き(`claude`または`codex`) | | `model` | - | このステップのモデルを上書き | -| `permission_mode` | `default` | パーミッションモード:`default`、`acceptEdits`、`bypassPermissions` | - -## ワークフロー - -TAKTはYAMLベースのワークフロー定義を使用します。以下に配置してください: -- `~/.takt/workflows/*.yaml` - -### ワークフローの例 - -```yaml -name: default -max_iterations: 10 - -steps: - - name: implement - agent: coder - instruction_template: | - {task} - transitions: - - condition: done - next_step: review - - condition: blocked - next_step: ABORT - - - name: review - agent: architect - transitions: - - condition: approved - next_step: COMPLETE - - condition: rejected - next_step: implement -``` - -## ビルトインワークフロー - -TAKTには複数のビルトインワークフローが同梱されています: - -| ワークフロー | 説明 | -|------------|------| -| `default` | フル開発ワークフロー:計画 → 実装 → アーキテクトレビュー → AIレビュー → セキュリティレビュー → スーパーバイザー承認。各レビュー段階に修正ループあり。 | -| `simple` | defaultの簡略版:計画 → 実装 → アーキテクトレビュー → AIレビュー → スーパーバイザー。中間の修正ステップなし。 | -| `research` | リサーチワークフロー:プランナー → ディガー → スーパーバイザー。質問せずに自律的にリサーチを実行。 | -| `expert-review` | ドメインエキスパートによる包括的レビュー:CQRS+ES、フロントエンド、AI、セキュリティ、QAレビューと修正ループ。 | -| `expert-cqrs` | CQRS+ES、フロントエンド、AI、セキュリティ、QA専門のエキスパートレビュー。計画 → 実装 → マルチエキスパートレビュー → スーパーバイザー。 | -| `magi` | エヴァンゲリオンにインスパイアされた審議システム。3つのAIペルソナ(MELCHIOR、BALTHASAR、CASPER)が分析し投票。 | - -`takt /switch` でワークフローを切り替えられます。 - -## ビルトインエージェント - -- **coder** - 機能を実装しバグを修正 -- **architect** - コードをレビューしフィードバックを提供 -- **supervisor** - 最終検証と承認 -- **planner** - タスク分析と実装計画 -- **ai-reviewer** - AI生成コードの品質レビュー -- **security** - セキュリティ脆弱性の評価 - -## カスタムエージェント - -`.takt/agents.yaml`でカスタムエージェントを定義: - -```yaml -agents: - - name: my-reviewer - prompt_file: .takt/prompts/reviewer.md - allowed_tools: [Read, Glob, Grep] - provider: claude # オプション:claude または codex - model: opus # Claude: opus/sonnet/haiku、Codex: gpt-5.2-codex 等 - status_patterns: - approved: "\\[APPROVE\\]" - rejected: "\\[REJECT\\]" -``` - -## プロジェクト構造 - -``` -~/.takt/ -├── config.yaml # グローバル設定(プロバイダー、モデル、ワークフロー等) -├── workflows/ # ワークフロー定義 -└── agents/ # エージェントプロンプトファイル - -.takt/ # プロジェクトレベルの設定 -├── agents.yaml # カスタムエージェント定義 -├── tasks/ # 保留中のタスクファイル(.yaml, .md) -├── completed/ # 完了したタスクとレポート -├── worktree-meta/ # タスクブランチのメタデータ -├── worktree-sessions/ # クローンごとのエージェントセッション保存 -├── reports/ # 実行レポート(自動生成) -└── logs/ # セッションログ(インクリメンタル) - ├── latest.json # 現在/最新セッションへのポインタ - ├── previous.json # 前回セッションへのポインタ - └── {sessionId}.json # ワークフロー実行ごとの完全なセッションログ -``` +| `permission_mode` | `default` | パーミッションモード: `default`、`acceptEdits`、`bypassPermissions` | +| `report` | - | 自動生成レポートのファイル設定(name, format) | ## API使用例 @@ -434,7 +492,7 @@ await engine.run(); ## Docker サポート -他の環境でのテスト用にDocker環境が提供されています: +他の環境でのテスト用にDocker環境が提供されています: ```bash # Dockerイメージをビルド @@ -458,7 +516,7 @@ docker compose run --rm build - [Agent Guide](./agents.md) - カスタムエージェントの設定 - [Changelog](../CHANGELOG.md) - バージョン履歴 - [Security Policy](../SECURITY.md) - 脆弱性報告 -- [ブログ:TAKT - AIエージェントオーケストレーション](https://zenn.dev/nrs/articles/c6842288a526d7) - 設計思想と実践的な使い方ガイド +- [ブログ: TAKT - AIエージェントオーケストレーション](https://zenn.dev/nrs/articles/c6842288a526d7) - 設計思想と実践的な使い方ガイド ## ライセンス From cd67a2355a5b1cae730c3be0ddd0ef38eb9bc83c Mon Sep 17 00:00:00 2001 From: nrslib <38722970+nrslib@users.noreply.github.com> Date: Fri, 30 Jan 2026 21:11:41 +0900 Subject: [PATCH 3/3] =?UTF-8?q?feat:=20WorkflowEngine=E3=81=AE=E3=83=A2?= =?UTF-8?q?=E3=83=83=E3=82=AF=E3=82=A4=E3=83=B3=E3=83=86=E3=82=B0=E3=83=AC?= =?UTF-8?q?=E3=83=BC=E3=82=B7=E3=83=A7=E3=83=B3=E3=83=86=E3=82=B9=E3=83=88?= =?UTF-8?q?=E8=BF=BD=E5=8A=A0=20(#17)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit runAgentをモックし、ワークフロー全体の状態遷移を検証する インテグレーションテストを追加。 テストケース: - 正常フロー(Happy Path) - 差し戻しフロー(review reject → fix → re-review) - AI review差し戻し(ai_review → ai_fix → ai_review) - エラー: ルール未マッチ、runAgent例外 - ループ検出 - イテレーション上限 - blockedハンドリング(onUserInputあり/なし) - パラレルステップ集約(all/any条件) - rulesのnextがundefinedのケース --- src/__tests__/engine-blocked.test.ts | 143 ++++++++++ src/__tests__/engine-error.test.ts | 222 +++++++++++++++ src/__tests__/engine-happy-path.test.ts | 344 ++++++++++++++++++++++++ src/__tests__/engine-parallel.test.ts | 162 +++++++++++ src/__tests__/engine-test-helpers.ts | 175 ++++++++++++ src/__tests__/transitions.test.ts | 18 ++ 6 files changed, 1064 insertions(+) create mode 100644 src/__tests__/engine-blocked.test.ts create mode 100644 src/__tests__/engine-error.test.ts create mode 100644 src/__tests__/engine-happy-path.test.ts create mode 100644 src/__tests__/engine-parallel.test.ts create mode 100644 src/__tests__/engine-test-helpers.ts diff --git a/src/__tests__/engine-blocked.test.ts b/src/__tests__/engine-blocked.test.ts new file mode 100644 index 0000000..9ee707a --- /dev/null +++ b/src/__tests__/engine-blocked.test.ts @@ -0,0 +1,143 @@ +/** + * WorkflowEngine integration tests: blocked handling scenarios. + * + * Covers: + * - Blocked without onUserInput callback (abort) + * - Blocked with onUserInput returning null (abort) + * - Blocked with onUserInput providing input (continue) + */ + +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import { existsSync, rmSync } from 'node:fs'; + +// --- Mock setup (must be before imports that use these modules) --- + +vi.mock('../agents/runner.js', () => ({ + runAgent: vi.fn(), +})); + +vi.mock('../workflow/rule-evaluator.js', () => ({ + detectMatchedRule: vi.fn(), +})); + +vi.mock('../workflow/phase-runner.js', () => ({ + needsStatusJudgmentPhase: vi.fn().mockReturnValue(false), + runReportPhase: vi.fn().mockResolvedValue(undefined), + runStatusJudgmentPhase: vi.fn().mockResolvedValue(''), +})); + +vi.mock('../utils/session.js', () => ({ + generateReportDir: vi.fn().mockReturnValue('test-report-dir'), +})); + +// --- Imports (after mocks) --- + +import { WorkflowEngine } from '../workflow/engine.js'; +import { + makeResponse, + buildDefaultWorkflowConfig, + mockRunAgentSequence, + mockDetectMatchedRuleSequence, + createTestTmpDir, + applyDefaultMocks, +} from './engine-test-helpers.js'; + +describe('WorkflowEngine Integration: Blocked Handling', () => { + let tmpDir: string; + + beforeEach(() => { + vi.resetAllMocks(); + applyDefaultMocks(); + tmpDir = createTestTmpDir(); + }); + + afterEach(() => { + if (existsSync(tmpDir)) { + rmSync(tmpDir, { recursive: true, force: true }); + } + }); + + it('should abort when blocked and no onUserInput callback', async () => { + const config = buildDefaultWorkflowConfig(); + const engine = new WorkflowEngine(config, tmpDir, 'test task'); + + mockRunAgentSequence([ + makeResponse({ agent: 'plan', status: 'blocked', content: 'Need clarification' }), + ]); + + mockDetectMatchedRuleSequence([ + { index: 0, method: 'phase1_tag' }, + ]); + + const blockedFn = vi.fn(); + const abortFn = vi.fn(); + engine.on('step:blocked', blockedFn); + engine.on('workflow:abort', abortFn); + + const state = await engine.run(); + + expect(state.status).toBe('aborted'); + expect(blockedFn).toHaveBeenCalledOnce(); + expect(abortFn).toHaveBeenCalledOnce(); + }); + + it('should abort when blocked and onUserInput returns null', async () => { + const config = buildDefaultWorkflowConfig(); + const onUserInput = vi.fn().mockResolvedValue(null); + const engine = new WorkflowEngine(config, tmpDir, 'test task', { onUserInput }); + + mockRunAgentSequence([ + makeResponse({ agent: 'plan', status: 'blocked', content: 'Need info' }), + ]); + + mockDetectMatchedRuleSequence([ + { index: 0, method: 'phase1_tag' }, + ]); + + const state = await engine.run(); + + expect(state.status).toBe('aborted'); + expect(onUserInput).toHaveBeenCalledOnce(); + }); + + it('should continue when blocked and onUserInput provides input', async () => { + const config = buildDefaultWorkflowConfig(); + const onUserInput = vi.fn().mockResolvedValueOnce('User provided clarification'); + const engine = new WorkflowEngine(config, tmpDir, 'test task', { onUserInput }); + + mockRunAgentSequence([ + // First: plan is blocked + makeResponse({ agent: 'plan', status: 'blocked', content: 'Need info' }), + // Second: plan succeeds after user input + makeResponse({ agent: 'plan', content: 'Plan done with user input' }), + makeResponse({ agent: 'implement', content: 'Impl done' }), + makeResponse({ agent: 'ai_review', content: 'OK' }), + makeResponse({ agent: 'arch-review', content: 'OK' }), + makeResponse({ agent: 'security-review', content: 'OK' }), + makeResponse({ agent: 'supervise', content: 'All passed' }), + ]); + + mockDetectMatchedRuleSequence([ + // First plan call: blocked, rule matched but blocked handling takes over + { index: 0, method: 'phase1_tag' }, + // Second plan call: success + { index: 0, method: 'phase1_tag' }, // plan → implement + { index: 0, method: 'phase1_tag' }, // implement → ai_review + { index: 0, method: 'phase1_tag' }, // ai_review → reviewers + { index: 0, method: 'phase1_tag' }, // arch-review → approved + { index: 0, method: 'phase1_tag' }, // security-review → approved + { index: 0, method: 'aggregate' }, // reviewers → supervise + { index: 0, method: 'phase1_tag' }, // supervise → COMPLETE + ]); + + const userInputFn = vi.fn(); + engine.on('step:user_input', userInputFn); + + const state = await engine.run(); + + expect(state.status).toBe('completed'); + expect(onUserInput).toHaveBeenCalledOnce(); + expect(userInputFn).toHaveBeenCalledOnce(); + expect(state.userInputs).toContain('User provided clarification'); + }); +}); diff --git a/src/__tests__/engine-error.test.ts b/src/__tests__/engine-error.test.ts new file mode 100644 index 0000000..7e9006b --- /dev/null +++ b/src/__tests__/engine-error.test.ts @@ -0,0 +1,222 @@ +/** + * WorkflowEngine integration tests: error handling scenarios. + * + * Covers: + * - No rule matched (abort) + * - runAgent throws (abort) + * - Loop detection (abort) + * - Iteration limit (abort and extend) + */ + +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import { existsSync, rmSync } from 'node:fs'; + +// --- Mock setup (must be before imports that use these modules) --- + +vi.mock('../agents/runner.js', () => ({ + runAgent: vi.fn(), +})); + +vi.mock('../workflow/rule-evaluator.js', () => ({ + detectMatchedRule: vi.fn(), +})); + +vi.mock('../workflow/phase-runner.js', () => ({ + needsStatusJudgmentPhase: vi.fn().mockReturnValue(false), + runReportPhase: vi.fn().mockResolvedValue(undefined), + runStatusJudgmentPhase: vi.fn().mockResolvedValue(''), +})); + +vi.mock('../utils/session.js', () => ({ + generateReportDir: vi.fn().mockReturnValue('test-report-dir'), +})); + +// --- Imports (after mocks) --- + +import { WorkflowEngine } from '../workflow/engine.js'; +import { runAgent } from '../agents/runner.js'; +import { detectMatchedRule } from '../workflow/rule-evaluator.js'; +import { + makeResponse, + makeStep, + makeRule, + buildDefaultWorkflowConfig, + mockRunAgentSequence, + mockDetectMatchedRuleSequence, + createTestTmpDir, + applyDefaultMocks, +} from './engine-test-helpers.js'; + +describe('WorkflowEngine Integration: Error Handling', () => { + let tmpDir: string; + + beforeEach(() => { + vi.resetAllMocks(); + applyDefaultMocks(); + tmpDir = createTestTmpDir(); + }); + + afterEach(() => { + if (existsSync(tmpDir)) { + rmSync(tmpDir, { recursive: true, force: true }); + } + }); + + // ===================================================== + // 1. No rule matched + // ===================================================== + describe('No rule matched', () => { + it('should abort when detectMatchedRule returns undefined', async () => { + const config = buildDefaultWorkflowConfig(); + const engine = new WorkflowEngine(config, tmpDir, 'test task'); + + mockRunAgentSequence([ + makeResponse({ agent: 'plan', content: 'Unclear output' }), + ]); + + mockDetectMatchedRuleSequence([undefined]); + + const abortFn = vi.fn(); + engine.on('workflow:abort', abortFn); + + const state = await engine.run(); + + expect(state.status).toBe('aborted'); + expect(abortFn).toHaveBeenCalledOnce(); + const reason = abortFn.mock.calls[0]![1] as string; + expect(reason).toContain('plan'); + }); + }); + + // ===================================================== + // 2. runAgent throws + // ===================================================== + describe('runAgent throws', () => { + it('should abort when runAgent throws an error', async () => { + const config = buildDefaultWorkflowConfig(); + const engine = new WorkflowEngine(config, tmpDir, 'test task'); + + vi.mocked(runAgent).mockRejectedValueOnce(new Error('API connection failed')); + + const abortFn = vi.fn(); + engine.on('workflow:abort', abortFn); + + const state = await engine.run(); + + expect(state.status).toBe('aborted'); + expect(abortFn).toHaveBeenCalledOnce(); + const reason = abortFn.mock.calls[0]![1] as string; + expect(reason).toContain('API connection failed'); + }); + }); + + // ===================================================== + // 3. Loop detection + // ===================================================== + describe('Loop detection', () => { + it('should abort when loop detected with action: abort', async () => { + const config = buildDefaultWorkflowConfig({ + maxIterations: 100, + loopDetection: { maxConsecutiveSameStep: 3, action: 'abort' }, + initialStep: 'loop-step', + steps: [ + makeStep('loop-step', { + rules: [makeRule('continue', 'loop-step')], + }), + ], + }); + + const engine = new WorkflowEngine(config, tmpDir, 'test task'); + + for (let i = 0; i < 5; i++) { + vi.mocked(runAgent).mockResolvedValueOnce( + makeResponse({ content: `iteration ${i}` }) + ); + vi.mocked(detectMatchedRule).mockResolvedValueOnce( + { index: 0, method: 'phase1_tag' } + ); + } + + const abortFn = vi.fn(); + engine.on('workflow:abort', abortFn); + + const state = await engine.run(); + + expect(state.status).toBe('aborted'); + expect(abortFn).toHaveBeenCalledOnce(); + const reason = abortFn.mock.calls[0]![1] as string; + expect(reason).toContain('Loop detected'); + expect(reason).toContain('loop-step'); + }); + }); + + // ===================================================== + // 4. Iteration limit + // ===================================================== + describe('Iteration limit', () => { + it('should abort when max iterations reached without onIterationLimit callback', async () => { + const config = buildDefaultWorkflowConfig({ maxIterations: 2 }); + const engine = new WorkflowEngine(config, tmpDir, 'test task'); + + mockRunAgentSequence([ + makeResponse({ agent: 'plan', content: 'Plan done' }), + makeResponse({ agent: 'implement', content: 'Impl done' }), + makeResponse({ agent: 'ai_review', content: 'OK' }), + ]); + + mockDetectMatchedRuleSequence([ + { index: 0, method: 'phase1_tag' }, // plan → implement + { index: 0, method: 'phase1_tag' }, // implement → ai_review + { index: 0, method: 'phase1_tag' }, // ai_review → reviewers (won't be reached) + ]); + + const limitFn = vi.fn(); + const abortFn = vi.fn(); + engine.on('iteration:limit', limitFn); + engine.on('workflow:abort', abortFn); + + const state = await engine.run(); + + expect(state.status).toBe('aborted'); + expect(limitFn).toHaveBeenCalledWith(2, 2); + expect(abortFn).toHaveBeenCalledOnce(); + const reason = abortFn.mock.calls[0]![1] as string; + expect(reason).toContain('Max iterations'); + }); + + it('should extend iterations when onIterationLimit provides additional iterations', async () => { + const config = buildDefaultWorkflowConfig({ maxIterations: 2 }); + + const onIterationLimit = vi.fn().mockResolvedValueOnce(10); + + const engine = new WorkflowEngine(config, tmpDir, 'test task', { + onIterationLimit, + }); + + mockRunAgentSequence([ + makeResponse({ agent: 'plan', content: 'Plan done' }), + makeResponse({ agent: 'implement', content: 'Impl done' }), + // After hitting limit at iteration 2, onIterationLimit extends to 12 + makeResponse({ agent: 'ai_review', content: 'OK' }), + makeResponse({ agent: 'arch-review', content: 'OK' }), + makeResponse({ agent: 'security-review', content: 'OK' }), + makeResponse({ agent: 'supervise', content: 'All passed' }), + ]); + + mockDetectMatchedRuleSequence([ + { index: 0, method: 'phase1_tag' }, // plan → implement + { index: 0, method: 'phase1_tag' }, // implement → ai_review + { index: 0, method: 'phase1_tag' }, // ai_review → reviewers + { index: 0, method: 'phase1_tag' }, // arch-review → approved + { index: 0, method: 'phase1_tag' }, // security-review → approved + { index: 0, method: 'aggregate' }, // reviewers → supervise + { index: 0, method: 'phase1_tag' }, // supervise → COMPLETE + ]); + + const state = await engine.run(); + + expect(state.status).toBe('completed'); + expect(onIterationLimit).toHaveBeenCalledOnce(); + }); + }); +}); diff --git a/src/__tests__/engine-happy-path.test.ts b/src/__tests__/engine-happy-path.test.ts new file mode 100644 index 0000000..8292b69 --- /dev/null +++ b/src/__tests__/engine-happy-path.test.ts @@ -0,0 +1,344 @@ +/** + * WorkflowEngine integration tests: happy path and normal flow scenarios. + * + * Covers: + * - Full happy path (plan → implement → ai_review → reviewers → supervise → COMPLETE) + * - Review reject and fix loop + * - AI review reject and fix + * - ABORT transition + * - Event emissions + * - Step output tracking + * - Config validation + */ + +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import { existsSync, rmSync } from 'node:fs'; +import type { WorkflowConfig, WorkflowStep } from '../models/types.js'; + +// --- Mock setup (must be before imports that use these modules) --- + +vi.mock('../agents/runner.js', () => ({ + runAgent: vi.fn(), +})); + +vi.mock('../workflow/rule-evaluator.js', () => ({ + detectMatchedRule: vi.fn(), +})); + +vi.mock('../workflow/phase-runner.js', () => ({ + needsStatusJudgmentPhase: vi.fn().mockReturnValue(false), + runReportPhase: vi.fn().mockResolvedValue(undefined), + runStatusJudgmentPhase: vi.fn().mockResolvedValue(''), +})); + +vi.mock('../utils/session.js', () => ({ + generateReportDir: vi.fn().mockReturnValue('test-report-dir'), +})); + +// --- Imports (after mocks) --- + +import { WorkflowEngine } from '../workflow/engine.js'; +import { runAgent } from '../agents/runner.js'; +import { + makeResponse, + makeStep, + makeRule, + buildDefaultWorkflowConfig, + mockRunAgentSequence, + mockDetectMatchedRuleSequence, + createTestTmpDir, + applyDefaultMocks, +} from './engine-test-helpers.js'; + +describe('WorkflowEngine Integration: Happy Path', () => { + let tmpDir: string; + + beforeEach(() => { + vi.resetAllMocks(); + applyDefaultMocks(); + tmpDir = createTestTmpDir(); + }); + + afterEach(() => { + if (existsSync(tmpDir)) { + rmSync(tmpDir, { recursive: true, force: true }); + } + }); + + // ===================================================== + // 1. Happy Path + // ===================================================== + describe('Happy path', () => { + it('should complete: plan → implement → ai_review → reviewers(all approved) → supervise → COMPLETE', async () => { + const config = buildDefaultWorkflowConfig(); + const engine = new WorkflowEngine(config, tmpDir, 'test task'); + + mockRunAgentSequence([ + makeResponse({ agent: 'plan', content: 'Plan complete' }), + makeResponse({ agent: 'implement', content: 'Implementation done' }), + makeResponse({ agent: 'ai_review', content: 'No issues' }), + makeResponse({ agent: 'arch-review', content: 'Architecture OK' }), + makeResponse({ agent: 'security-review', content: 'Security OK' }), + makeResponse({ agent: 'supervise', content: 'All passed' }), + ]); + + mockDetectMatchedRuleSequence([ + { index: 0, method: 'phase1_tag' }, // plan → implement + { index: 0, method: 'phase1_tag' }, // implement → ai_review + { index: 0, method: 'phase1_tag' }, // ai_review → reviewers + { index: 0, method: 'phase1_tag' }, // arch-review → approved + { index: 0, method: 'phase1_tag' }, // security-review → approved + { index: 0, method: 'aggregate' }, // reviewers(all approved) → supervise + { index: 0, method: 'phase1_tag' }, // supervise → COMPLETE + ]); + + const completeFn = vi.fn(); + engine.on('workflow:complete', completeFn); + + const state = await engine.run(); + + expect(state.status).toBe('completed'); + expect(state.iteration).toBe(5); // plan, implement, ai_review, reviewers, supervise + expect(completeFn).toHaveBeenCalledOnce(); + expect(vi.mocked(runAgent)).toHaveBeenCalledTimes(6); // 4 normal + 2 parallel sub-steps + }); + }); + + // ===================================================== + // 2. Review reject and fix loop + // ===================================================== + describe('Review reject and fix loop', () => { + it('should handle: reviewers(needs_fix) → fix → reviewers(all approved) → supervise → COMPLETE', async () => { + const config = buildDefaultWorkflowConfig(); + const engine = new WorkflowEngine(config, tmpDir, 'test task'); + + mockRunAgentSequence([ + makeResponse({ agent: 'plan', content: 'Plan done' }), + makeResponse({ agent: 'implement', content: 'Impl done' }), + makeResponse({ agent: 'ai_review', content: 'No issues' }), + // Round 1 reviewers: arch approved, security needs fix + makeResponse({ agent: 'arch-review', content: 'OK' }), + makeResponse({ agent: 'security-review', content: 'Vulnerability found' }), + // fix step + makeResponse({ agent: 'fix', content: 'Fixed security issue' }), + // Round 2 reviewers: both approved + makeResponse({ agent: 'arch-review', content: 'OK' }), + makeResponse({ agent: 'security-review', content: 'Security OK now' }), + // supervise + makeResponse({ agent: 'supervise', content: 'All passed' }), + ]); + + mockDetectMatchedRuleSequence([ + { index: 0, method: 'phase1_tag' }, // plan → implement + { index: 0, method: 'phase1_tag' }, // implement → ai_review + { index: 0, method: 'phase1_tag' }, // ai_review → reviewers + { index: 0, method: 'phase1_tag' }, // arch-review → approved + { index: 1, method: 'phase1_tag' }, // security-review → needs_fix + { index: 1, method: 'aggregate' }, // reviewers: any(needs_fix) → fix + { index: 0, method: 'phase1_tag' }, // fix → reviewers + { index: 0, method: 'phase1_tag' }, // arch-review → approved + { index: 0, method: 'phase1_tag' }, // security-review → approved + { index: 0, method: 'aggregate' }, // reviewers: all(approved) → supervise + { index: 0, method: 'phase1_tag' }, // supervise → COMPLETE + ]); + + const state = await engine.run(); + + expect(state.status).toBe('completed'); + // plan, implement, ai_review, reviewers(1st), fix, reviewers(2nd), supervise = 7 + expect(state.iteration).toBe(7); + }); + }); + + // ===================================================== + // 3. AI review reject and fix + // ===================================================== + describe('AI review reject and fix', () => { + it('should handle: ai_review(issues) → ai_fix → reviewers → supervise → COMPLETE', async () => { + const config = buildDefaultWorkflowConfig(); + const engine = new WorkflowEngine(config, tmpDir, 'test task'); + + mockRunAgentSequence([ + makeResponse({ agent: 'plan', content: 'Plan done' }), + makeResponse({ agent: 'implement', content: 'Impl done' }), + makeResponse({ agent: 'ai_review', content: 'AI issues found' }), + makeResponse({ agent: 'ai_fix', content: 'Issues fixed' }), + makeResponse({ agent: 'arch-review', content: 'OK' }), + makeResponse({ agent: 'security-review', content: 'OK' }), + makeResponse({ agent: 'supervise', content: 'All passed' }), + ]); + + mockDetectMatchedRuleSequence([ + { index: 0, method: 'phase1_tag' }, // plan → implement + { index: 0, method: 'phase1_tag' }, // implement → ai_review + { index: 1, method: 'phase1_tag' }, // ai_review → ai_fix (issues found) + { index: 0, method: 'phase1_tag' }, // ai_fix → reviewers + { index: 0, method: 'phase1_tag' }, // arch-review → approved + { index: 0, method: 'phase1_tag' }, // security-review → approved + { index: 0, method: 'aggregate' }, // reviewers → supervise + { index: 0, method: 'phase1_tag' }, // supervise → COMPLETE + ]); + + const state = await engine.run(); + + expect(state.status).toBe('completed'); + // plan, implement, ai_review, ai_fix, reviewers, supervise = 6 + expect(state.iteration).toBe(6); + }); + }); + + // ===================================================== + // 4. ABORT transition + // ===================================================== + describe('ABORT transition', () => { + it('should abort when step transitions to ABORT', async () => { + const config = buildDefaultWorkflowConfig(); + const engine = new WorkflowEngine(config, tmpDir, 'test task'); + + mockRunAgentSequence([ + makeResponse({ agent: 'plan', content: 'Requirements unclear' }), + ]); + + // plan rule index 1 → ABORT + mockDetectMatchedRuleSequence([ + { index: 1, method: 'phase1_tag' }, + ]); + + const abortFn = vi.fn(); + engine.on('workflow:abort', abortFn); + + const state = await engine.run(); + + expect(state.status).toBe('aborted'); + expect(abortFn).toHaveBeenCalledOnce(); + }); + }); + + // ===================================================== + // 5. Event emissions + // ===================================================== + describe('Event emissions', () => { + it('should emit step:start and step:complete for each step', async () => { + const config = buildDefaultWorkflowConfig(); + const engine = new WorkflowEngine(config, tmpDir, 'test task'); + + mockRunAgentSequence([ + makeResponse({ agent: 'plan', content: 'Plan' }), + makeResponse({ agent: 'implement', content: 'Impl' }), + makeResponse({ agent: 'ai_review', content: 'OK' }), + makeResponse({ agent: 'arch-review', content: 'OK' }), + makeResponse({ agent: 'security-review', content: 'OK' }), + makeResponse({ agent: 'supervise', content: 'Pass' }), + ]); + + mockDetectMatchedRuleSequence([ + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'aggregate' }, + { index: 0, method: 'phase1_tag' }, + ]); + + const startFn = vi.fn(); + const completeFn = vi.fn(); + engine.on('step:start', startFn); + engine.on('step:complete', completeFn); + + await engine.run(); + + // 5 steps: plan, implement, ai_review, reviewers, supervise + expect(startFn).toHaveBeenCalledTimes(5); + expect(completeFn).toHaveBeenCalledTimes(5); + + const startedSteps = startFn.mock.calls.map(call => (call[0] as WorkflowStep).name); + expect(startedSteps).toEqual(['plan', 'implement', 'ai_review', 'reviewers', 'supervise']); + }); + + it('should emit iteration:limit when max iterations reached', async () => { + const config = buildDefaultWorkflowConfig({ maxIterations: 1 }); + const engine = new WorkflowEngine(config, tmpDir, 'test task'); + + mockRunAgentSequence([ + makeResponse({ agent: 'plan', content: 'Plan' }), + ]); + mockDetectMatchedRuleSequence([ + { index: 0, method: 'phase1_tag' }, + ]); + + const limitFn = vi.fn(); + engine.on('iteration:limit', limitFn); + + await engine.run(); + + expect(limitFn).toHaveBeenCalledWith(1, 1); + }); + }); + + // ===================================================== + // 6. Step output tracking + // ===================================================== + describe('Step output tracking', () => { + it('should store outputs for all executed steps', async () => { + const config = buildDefaultWorkflowConfig(); + const engine = new WorkflowEngine(config, tmpDir, 'test task'); + + mockRunAgentSequence([ + makeResponse({ agent: 'plan', content: 'Plan output' }), + makeResponse({ agent: 'implement', content: 'Implement output' }), + makeResponse({ agent: 'ai_review', content: 'AI review output' }), + makeResponse({ agent: 'arch-review', content: 'Arch output' }), + makeResponse({ agent: 'security-review', content: 'Sec output' }), + makeResponse({ agent: 'supervise', content: 'Supervise output' }), + ]); + + mockDetectMatchedRuleSequence([ + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'aggregate' }, + { index: 0, method: 'phase1_tag' }, + ]); + + const state = await engine.run(); + + expect(state.stepOutputs.get('plan')!.content).toBe('Plan output'); + expect(state.stepOutputs.get('implement')!.content).toBe('Implement output'); + expect(state.stepOutputs.get('ai_review')!.content).toBe('AI review output'); + expect(state.stepOutputs.get('supervise')!.content).toBe('Supervise output'); + }); + }); + + // ===================================================== + // 7. Config validation + // ===================================================== + describe('Config validation', () => { + it('should throw when initial step does not exist', () => { + const config = buildDefaultWorkflowConfig({ initialStep: 'nonexistent' }); + + expect(() => { + new WorkflowEngine(config, tmpDir, 'test task'); + }).toThrow('Unknown step: nonexistent'); + }); + + it('should throw when rule references nonexistent step', () => { + const config: WorkflowConfig = { + name: 'test', + maxIterations: 10, + initialStep: 'step1', + steps: [ + makeStep('step1', { + rules: [makeRule('done', 'nonexistent_step')], + }), + ], + }; + + expect(() => { + new WorkflowEngine(config, tmpDir, 'test task'); + }).toThrow('nonexistent_step'); + }); + }); +}); diff --git a/src/__tests__/engine-parallel.test.ts b/src/__tests__/engine-parallel.test.ts new file mode 100644 index 0000000..7e11741 --- /dev/null +++ b/src/__tests__/engine-parallel.test.ts @@ -0,0 +1,162 @@ +/** + * WorkflowEngine integration tests: parallel step aggregation. + * + * Covers: + * - Aggregated output format (## headers and --- separators) + * - Individual sub-step output storage + * - Concurrent execution of sub-steps + */ + +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import { existsSync, rmSync } from 'node:fs'; + +// --- Mock setup (must be before imports that use these modules) --- + +vi.mock('../agents/runner.js', () => ({ + runAgent: vi.fn(), +})); + +vi.mock('../workflow/rule-evaluator.js', () => ({ + detectMatchedRule: vi.fn(), +})); + +vi.mock('../workflow/phase-runner.js', () => ({ + needsStatusJudgmentPhase: vi.fn().mockReturnValue(false), + runReportPhase: vi.fn().mockResolvedValue(undefined), + runStatusJudgmentPhase: vi.fn().mockResolvedValue(''), +})); + +vi.mock('../utils/session.js', () => ({ + generateReportDir: vi.fn().mockReturnValue('test-report-dir'), +})); + +// --- Imports (after mocks) --- + +import { WorkflowEngine } from '../workflow/engine.js'; +import { runAgent } from '../agents/runner.js'; +import { + makeResponse, + buildDefaultWorkflowConfig, + mockRunAgentSequence, + mockDetectMatchedRuleSequence, + createTestTmpDir, + applyDefaultMocks, +} from './engine-test-helpers.js'; + +describe('WorkflowEngine Integration: Parallel Step Aggregation', () => { + let tmpDir: string; + + beforeEach(() => { + vi.resetAllMocks(); + applyDefaultMocks(); + tmpDir = createTestTmpDir(); + }); + + afterEach(() => { + if (existsSync(tmpDir)) { + rmSync(tmpDir, { recursive: true, force: true }); + } + }); + + it('should aggregate sub-step outputs with ## headers and --- separators', async () => { + const config = buildDefaultWorkflowConfig(); + const engine = new WorkflowEngine(config, tmpDir, 'test task'); + + mockRunAgentSequence([ + makeResponse({ agent: 'plan', content: 'Plan done' }), + makeResponse({ agent: 'implement', content: 'Impl done' }), + makeResponse({ agent: 'ai_review', content: 'OK' }), + makeResponse({ agent: 'arch-review', content: 'Architecture review content' }), + makeResponse({ agent: 'security-review', content: 'Security review content' }), + makeResponse({ agent: 'supervise', content: 'All passed' }), + ]); + + mockDetectMatchedRuleSequence([ + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'phase1_tag' }, // arch-review + { index: 0, method: 'phase1_tag' }, // security-review + { index: 0, method: 'aggregate' }, // reviewers + { index: 0, method: 'phase1_tag' }, + ]); + + const state = await engine.run(); + + expect(state.status).toBe('completed'); + + const reviewersOutput = state.stepOutputs.get('reviewers'); + expect(reviewersOutput).toBeDefined(); + expect(reviewersOutput!.content).toContain('## arch-review'); + expect(reviewersOutput!.content).toContain('Architecture review content'); + expect(reviewersOutput!.content).toContain('---'); + expect(reviewersOutput!.content).toContain('## security-review'); + expect(reviewersOutput!.content).toContain('Security review content'); + expect(reviewersOutput!.matchedRuleMethod).toBe('aggregate'); + }); + + it('should store individual sub-step outputs in stepOutputs', async () => { + const config = buildDefaultWorkflowConfig(); + const engine = new WorkflowEngine(config, tmpDir, 'test task'); + + mockRunAgentSequence([ + makeResponse({ agent: 'plan', content: 'Plan' }), + makeResponse({ agent: 'implement', content: 'Impl' }), + makeResponse({ agent: 'ai_review', content: 'OK' }), + makeResponse({ agent: 'arch-review', content: 'Arch content' }), + makeResponse({ agent: 'security-review', content: 'Sec content' }), + makeResponse({ agent: 'supervise', content: 'Pass' }), + ]); + + mockDetectMatchedRuleSequence([ + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'aggregate' }, + { index: 0, method: 'phase1_tag' }, + ]); + + const state = await engine.run(); + + expect(state.stepOutputs.has('arch-review')).toBe(true); + expect(state.stepOutputs.has('security-review')).toBe(true); + expect(state.stepOutputs.has('reviewers')).toBe(true); + expect(state.stepOutputs.get('arch-review')!.content).toBe('Arch content'); + expect(state.stepOutputs.get('security-review')!.content).toBe('Sec content'); + }); + + it('should execute sub-steps concurrently (both runAgent calls happen)', async () => { + const config = buildDefaultWorkflowConfig(); + const engine = new WorkflowEngine(config, tmpDir, 'test task'); + + mockRunAgentSequence([ + makeResponse({ agent: 'plan', content: 'Plan' }), + makeResponse({ agent: 'implement', content: 'Impl' }), + makeResponse({ agent: 'ai_review', content: 'OK' }), + makeResponse({ agent: 'arch-review', content: 'OK' }), + makeResponse({ agent: 'security-review', content: 'OK' }), + makeResponse({ agent: 'supervise', content: 'Pass' }), + ]); + + mockDetectMatchedRuleSequence([ + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'aggregate' }, + { index: 0, method: 'phase1_tag' }, + ]); + + await engine.run(); + + // 6 total: 4 normal + 2 parallel sub-steps + expect(vi.mocked(runAgent)).toHaveBeenCalledTimes(6); + + const calledAgents = vi.mocked(runAgent).mock.calls.map(call => call[0]); + expect(calledAgents).toContain('../agents/arch-review.md'); + expect(calledAgents).toContain('../agents/security-review.md'); + }); +}); diff --git a/src/__tests__/engine-test-helpers.ts b/src/__tests__/engine-test-helpers.ts new file mode 100644 index 0000000..9a79f11 --- /dev/null +++ b/src/__tests__/engine-test-helpers.ts @@ -0,0 +1,175 @@ +/** + * Shared helpers for WorkflowEngine integration tests. + * + * Provides mock setup, factory functions, and a default workflow config + * matching the parallel reviewers structure (plan → implement → ai_review → reviewers → supervise). + */ + +import { vi } from 'vitest'; +import { mkdirSync } from 'node:fs'; +import { join } from 'node:path'; +import { tmpdir } from 'node:os'; +import { randomUUID } from 'node:crypto'; +import type { WorkflowConfig, WorkflowStep, AgentResponse, WorkflowRule } from '../models/types.js'; + +// --- Mock imports (consumers must call vi.mock before importing this) --- + +import { runAgent } from '../agents/runner.js'; +import { detectMatchedRule } from '../workflow/rule-evaluator.js'; +import type { RuleMatch } from '../workflow/rule-evaluator.js'; +import { needsStatusJudgmentPhase, runReportPhase, runStatusJudgmentPhase } from '../workflow/phase-runner.js'; +import { generateReportDir } from '../utils/session.js'; + +// --- Factory functions --- + +export function makeResponse(overrides: Partial = {}): AgentResponse { + return { + agent: 'test-agent', + status: 'done', + content: 'test response', + timestamp: new Date(), + sessionId: `session-${randomUUID()}`, + ...overrides, + }; +} + +export function makeRule(condition: string, next: string, extra: Partial = {}): WorkflowRule { + return { condition, next, ...extra }; +} + +export function makeStep(name: string, overrides: Partial = {}): WorkflowStep { + return { + name, + agent: `../agents/${name}.md`, + agentDisplayName: name, + instructionTemplate: `Run ${name}`, + passPreviousResponse: true, + ...overrides, + }; +} + +/** + * Build a workflow config matching the default.yaml parallel reviewers structure: + * plan → implement → ai_review → (ai_fix↔) → reviewers(parallel) → (fix↔) → supervise + */ +export function buildDefaultWorkflowConfig(overrides: Partial = {}): WorkflowConfig { + const archReviewSubStep = makeStep('arch-review', { + rules: [ + makeRule('approved', 'COMPLETE'), + makeRule('needs_fix', 'fix'), + ], + }); + + const securityReviewSubStep = makeStep('security-review', { + rules: [ + makeRule('approved', 'COMPLETE'), + makeRule('needs_fix', 'fix'), + ], + }); + + return { + name: 'test-default', + description: 'Test workflow', + maxIterations: 30, + initialStep: 'plan', + steps: [ + makeStep('plan', { + rules: [ + makeRule('Requirements are clear', 'implement'), + makeRule('Requirements unclear', 'ABORT'), + ], + }), + makeStep('implement', { + rules: [ + makeRule('Implementation complete', 'ai_review'), + makeRule('Cannot proceed', 'plan'), + ], + }), + makeStep('ai_review', { + rules: [ + makeRule('No AI-specific issues', 'reviewers'), + makeRule('AI-specific issues found', 'ai_fix'), + ], + }), + makeStep('ai_fix', { + rules: [ + makeRule('AI issues fixed', 'reviewers'), + makeRule('Cannot proceed', 'plan'), + ], + }), + makeStep('reviewers', { + parallel: [archReviewSubStep, securityReviewSubStep], + rules: [ + makeRule('all("approved")', 'supervise', { + isAggregateCondition: true, + aggregateType: 'all', + aggregateConditionText: 'approved', + }), + makeRule('any("needs_fix")', 'fix', { + isAggregateCondition: true, + aggregateType: 'any', + aggregateConditionText: 'needs_fix', + }), + ], + }), + makeStep('fix', { + rules: [ + makeRule('Fix complete', 'reviewers'), + makeRule('Cannot proceed', 'plan'), + ], + }), + makeStep('supervise', { + rules: [ + makeRule('All checks passed', 'COMPLETE'), + makeRule('Requirements unmet', 'plan'), + ], + }), + ], + ...overrides, + }; +} + +// --- Mock sequence helpers --- + +/** + * Configure runAgent mock to return a sequence of responses. + */ +export function mockRunAgentSequence(responses: AgentResponse[]): void { + const mock = vi.mocked(runAgent); + for (const response of responses) { + mock.mockResolvedValueOnce(response); + } +} + +/** + * Configure detectMatchedRule mock to return a sequence of rule matches. + */ +export function mockDetectMatchedRuleSequence(matches: (RuleMatch | undefined)[]): void { + const mock = vi.mocked(detectMatchedRule); + for (const match of matches) { + mock.mockResolvedValueOnce(match); + } +} + +// --- Test environment setup --- + +/** + * Create a temporary directory with the required .takt/reports structure. + * Returns the tmpDir path. Caller is responsible for cleanup. + */ +export function createTestTmpDir(): string { + const tmpDir = join(tmpdir(), `takt-engine-test-${randomUUID()}`); + mkdirSync(tmpDir, { recursive: true }); + mkdirSync(join(tmpDir, '.takt', 'reports', 'test-report-dir'), { recursive: true }); + return tmpDir; +} + +/** + * Re-apply default mocks for phase-runner and session after vi.resetAllMocks(). + */ +export function applyDefaultMocks(): void { + vi.mocked(needsStatusJudgmentPhase).mockReturnValue(false); + vi.mocked(runReportPhase).mockResolvedValue(undefined); + vi.mocked(runStatusJudgmentPhase).mockResolvedValue(''); + vi.mocked(generateReportDir).mockReturnValue('test-report-dir'); +} diff --git a/src/__tests__/transitions.test.ts b/src/__tests__/transitions.test.ts index fcc8890..7ea899a 100644 --- a/src/__tests__/transitions.test.ts +++ b/src/__tests__/transitions.test.ts @@ -60,4 +60,22 @@ describe('determineNextStepByRules', () => { expect(determineNextStepByRules(step, 0)).toBe('COMPLETE'); }); + + it('should return null when rule exists but next is undefined', () => { + // Parallel sub-step rules may omit `next` (optional field) + const step: WorkflowStep = { + name: 'sub-step', + agent: 'test-agent', + agentDisplayName: 'Test Agent', + instructionTemplate: '{task}', + passPreviousResponse: false, + rules: [ + { condition: 'approved' }, + { condition: 'needs_fix' }, + ], + }; + + expect(determineNextStepByRules(step, 0)).toBeNull(); + expect(determineNextStepByRules(step, 1)).toBeNull(); + }); });