#!/usr/bin/env node /** * KIS-TOiR Orchestrator — Multi-Agent Coordination Engine * * This tool uses the Claude Agent SDK to coordinate specialized subagents * for KIS-TOiR generation workflows. The orchestrator: * * 1. Runs discovery + contract freeze via a single orchestrator prompt * 2. Dispatches bounded generator tasks in parallel streams * (Prisma / NestJS / React Admin) with graceful error isolation * 3. Collects per-stream outputs with write-zone + contract accountability * 4. Emits performance metrics for baseline-vs-optimized comparison * * The public entry point `orchestrate(prompt, options)` is preserved for * backward compatibility. A new `generate(contract, options)` entry point * exposes the structured graceful-error + parallel pipeline for callers * that want programmatic control over per-agent failure modes. * * USAGE: * npm run orchestrate "Generate Prisma schema from domain/toir.api.dsl" */ import { query } from "@anthropic-ai/claude-agent-sdk"; import agents from "../agents/definitions.js"; import { PerformanceMonitor } from "./performance-monitor.js"; import { getProjectRoot } from "./lib/project-root.mjs"; import fs from "fs"; import path from "path"; import { fileURLToPath } from "url"; const projectRoot = getProjectRoot(); // --------------------------------------------------------------------------- // Types — graceful error handling contract // --------------------------------------------------------------------------- export type GenerationStatus = "success" | "partial" | "failed"; export interface GenerationError { stage: string; agent: string; message: string; timestamp: string; /** * Whether a partial artifact was still returned despite this error. * Reviewers use this flag to decide whether to attempt bounded repair. */ partialOutputAvailable: boolean; } export interface AgentRunResult { agent: string; stage: string; /** Whether this stage is allowed to fail without stopping generation. */ critical: boolean; ok: boolean; durationMs: number; output?: string; error?: string; } export interface GenerationOutput { status: GenerationStatus; runs: AgentRunResult[]; errors: GenerationError[]; metrics: ReturnType; /** Wall-clock ms, end-to-end. */ totalMs: number; /** Sum of per-stage durations — a conservative sequential baseline. */ sequentialBaselineMs: number; /** Max per-stage duration — the best case for pure parallelism. */ parallelLowerBoundMs: number; improvementPct: number; } /** * Description of one generator task. The orchestrator treats each task as a * self-contained query() invocation and records its success/failure in * isolation. Critical tasks block the pipeline on failure; optional ones * degrade to partial outputs. */ export interface GeneratorTask { stage: string; agent: keyof typeof agents; critical: boolean; /** Human-readable task description that will be embedded in the prompt. */ task: string; /** The write-zones this agent is permitted to touch. */ writeZones: string[]; } // --------------------------------------------------------------------------- // Logging helpers — 3 level logger with ISO timestamps, no external deps // --------------------------------------------------------------------------- type LogLevel = "INFO" | "WARN" | "ERROR"; function log(level: LogLevel, message: string): void { const ts = new Date().toISOString(); const line = `[${ts}] [${level}] ${message}`; if (level === "ERROR") { // eslint-disable-next-line no-console console.error(line); } else if (level === "WARN") { // eslint-disable-next-line no-console console.warn(line); } else { // eslint-disable-next-line no-console console.log(line); } } // --------------------------------------------------------------------------- // Low-level: run a single query() stream and return its final text result // --------------------------------------------------------------------------- interface RunQueryOptions { prompt: string; verbose?: boolean; } /** * Execute one `query()` call end-to-end and return the final text result. * * We iterate the async message stream and capture the `result` message. All * errors — SDK errors, tool errors, transport errors — propagate as thrown * exceptions so the caller's try/catch can classify them as stage failures. */ async function runQueryStream({ prompt, verbose = false, }: RunQueryOptions): Promise { let finalResult = ""; for await (const message of query({ prompt, options: { // Agent tool must be in allowedTools for subagent delegation. allowedTools: ["Read", "Write", "Edit", "Bash", "Glob", "Grep", "Agent"], agents, }, })) { // The SDK streams a discriminated-union message type; we duck-type the // few shapes we care about via `unknown` to avoid coupling to SDK // internals while still keeping strict-mode safety. const raw = message as unknown as Record; if (verbose && typeof raw.thinking === "string") { log("INFO", `💭 ${raw.thinking.substring(0, 160)}...`); } if (verbose && raw.toolUse && typeof raw.toolUse === "object") { const toolUse = raw.toolUse as { name?: string }; log("INFO", `🔧 Tool: ${toolUse.name ?? "unknown"}`); } if ("result" in raw) { finalResult = String(raw.result ?? ""); } } return finalResult; } // --------------------------------------------------------------------------- // Per-task runner — wraps runQueryStream with timing + graceful error capture // --------------------------------------------------------------------------- function buildAgentPrompt(task: GeneratorTask): string { // Each task receives a bounded prompt naming the target subagent, the // frozen-contract task text, and the explicit write-zones. The main agent // is expected to delegate to the matching subagent via the Agent tool. return `You are the KIS-TOiR orchestrator running a single bounded generator task. Target subagent: ${task.agent} Stage: ${task.stage} Write-zones (STRICT — violations must be rejected): ${task.writeZones.map((z) => ` - ${z}`).join("\n")} Task (from frozen contract): ${task.task} Delegation rules: - Use the ${task.agent} agent via the Agent tool for all code generation. - Do not touch files outside the declared write-zones. - Report back a short summary of what was changed and any blockers. - If you cannot complete the task, return a clear error message — do NOT fabricate partial success.`; } async function runTask( task: GeneratorTask, monitor: PerformanceMonitor, verbose: boolean ): Promise { const started = Date.now(); log("INFO", `▶️ stage=${task.stage} agent=${task.agent} critical=${task.critical}`); try { const output = await runQueryStream({ prompt: buildAgentPrompt(task), verbose, }); const durationMs = Date.now() - started; monitor.record(task.stage, durationMs); log("INFO", `✅ stage=${task.stage} completed in ${durationMs}ms`); return { agent: task.agent, stage: task.stage, critical: task.critical, ok: true, durationMs, output, }; } catch (err) { const durationMs = Date.now() - started; monitor.record(`${task.stage} (failed)`, durationMs); const message = err instanceof Error ? err.message : String(err); log("ERROR", `❌ stage=${task.stage} failed after ${durationMs}ms: ${message}`); return { agent: task.agent, stage: task.stage, critical: task.critical, ok: false, durationMs, error: message, }; } } // --------------------------------------------------------------------------- // High-level: structured parallel generation with graceful error handling // --------------------------------------------------------------------------- export interface GenerateOptions { verbose?: boolean; /** * When true, run tasks sequentially instead of in parallel. Primarily used * to capture the baseline timing for speedup comparisons. */ sequential?: boolean; /** * Destination for the metrics JSON report. Defaults to * `/generation-metrics.json`. */ metricsPath?: string; } /** * Structured parallel generation pipeline. * * Critical tasks (Prisma, NestJS) block the pipeline on failure; optional * tasks (React Admin, data-access) degrade to partial outputs. Regardless of * outcome, metrics are written to disk and a full GenerationOutput is * returned so callers can run bounded repair or surface the failure. */ export async function generate( tasks: GeneratorTask[], options: GenerateOptions = {} ): Promise { const { verbose = false, sequential = false } = options; const monitor = new PerformanceMonitor(); const wallStart = Date.now(); log("INFO", `starting generation: tasks=${tasks.length} mode=${sequential ? "sequential" : "parallel"}`); let runs: AgentRunResult[]; if (sequential) { // Baseline path — useful for "measure first" speedup comparisons. runs = []; for (const task of tasks) { // Short-circuit on critical failure to match real-world sequential behaviour. const result = await runTask(task, monitor, verbose); runs.push(result); if (!result.ok && result.critical) { log("WARN", `aborting sequential run after critical failure: ${result.stage}`); break; } } } else { // Parallel path — Promise.allSettled is mandatory here: Promise.all would // reject on the first failure and lose partial outputs from siblings. // Since each runTask already catches its own errors, all outcomes arrive // as fulfilled promises, but allSettled keeps us safe against bugs. const settled = await Promise.allSettled( tasks.map((task) => runTask(task, monitor, verbose)) ); runs = settled.map((s, i) => { if (s.status === "fulfilled") return s.value; const task = tasks[i]; const message = s.reason instanceof Error ? s.reason.message : String(s.reason); log("ERROR", `unexpected rejection for stage=${task.stage}: ${message}`); return { agent: task.agent, stage: task.stage, critical: task.critical, ok: false, durationMs: 0, error: message, }; }); } const totalMs = Date.now() - wallStart; // ---- Classify outcome ------------------------------------------------ const criticalFailures = runs.filter((r) => !r.ok && r.critical); const optionalFailures = runs.filter((r) => !r.ok && !r.critical); const anyFailure = criticalFailures.length + optionalFailures.length > 0; let status: GenerationStatus; if (criticalFailures.length > 0) { status = "failed"; } else if (optionalFailures.length > 0) { status = "partial"; } else { status = "success"; } const errors: GenerationError[] = runs .filter((r) => !r.ok) .map((r) => ({ stage: r.stage, agent: r.agent, message: r.error ?? "unknown error", timestamp: new Date().toISOString(), partialOutputAvailable: !r.critical, })); // ---- Speedup accounting --------------------------------------------- // Sequential baseline = sum of per-stage durations (what this would have // cost if we ran each stage one after another). // Parallel lower bound = the longest single stage (best case for pure // parallel execution). `totalMs` is the actual wall-clock observed. const sequentialBaselineMs = runs.reduce((acc, r) => acc + r.durationMs, 0); const parallelLowerBoundMs = runs.reduce( (acc, r) => Math.max(acc, r.durationMs), 0 ); const improvementPct = sequentialBaselineMs > 0 ? ((sequentialBaselineMs - totalMs) / sequentialBaselineMs) * 100 : 0; monitor.summary(sequential ? "Sequential baseline" : "Parallel generation"); // ---- Persist metrics report ----------------------------------------- const metricsPath = options.metricsPath ?? path.join(projectRoot, "generation-metrics.json"); const metricsSnapshot = monitor.getMetrics(); try { const report = { generatedAt: new Date().toISOString(), mode: sequential ? "sequential" : "parallel", status, totalMs, sequentialBaselineMs, parallelLowerBoundMs, improvementPct: Number(improvementPct.toFixed(2)), runs: runs.map(({ output: _output, ...rest }) => rest), errors, metrics: metricsSnapshot, }; fs.writeFileSync(metricsPath, JSON.stringify(report, null, 2)); log("INFO", `metrics written to ${metricsPath}`); } catch (err) { const message = err instanceof Error ? err.message : String(err); log("WARN", `could not write metrics report: ${message}`); } if (status === "success") { log("INFO", `🎉 generation completed successfully in ${totalMs}ms`); } else if (status === "partial") { log( "WARN", `generation completed with ${optionalFailures.length} optional failure(s) in ${totalMs}ms — partial outputs returned` ); } else { log( "ERROR", `generation FAILED with ${criticalFailures.length} critical failure(s) in ${totalMs}ms` ); } if (anyFailure && verbose) { for (const err of errors) { log("WARN", ` ${err.stage} (${err.agent}): ${err.message}`); } } return { status, runs, errors, metrics: metricsSnapshot, totalMs, sequentialBaselineMs, parallelLowerBoundMs, improvementPct, }; } // --------------------------------------------------------------------------- // Backward-compatible monolithic entry point // --------------------------------------------------------------------------- export interface OrchestrateOptions { verbose?: boolean; } /** * Single-prompt orchestration. This is the original entry point — it runs * one `query()` stream and lets the main Claude agent decide when to delegate * to subagents via the Agent tool. Preserved for backward compatibility with * existing scripts that call `orchestrate(prompt)`. * * Prefer `generate(tasks, options)` when you need programmatic per-agent * failure isolation, parallel execution, or structured metrics. */ export async function orchestrate( mainPrompt: string, options?: OrchestrateOptions ): Promise { const verbose = options?.verbose ?? false; const monitor = new PerformanceMonitor(); if (verbose) { log("INFO", `project root: ${projectRoot}`); log("INFO", `agents available: ${Object.keys(agents).length}`); } log("INFO", "🎯 orchestration task:"); // eslint-disable-next-line no-console console.log(mainPrompt); log("INFO", "📡 starting agent coordination"); monitor.mark("orchestrate-start"); try { const result = await runQueryStream({ prompt: `You are the KIS-TOiR master orchestrator for Claude Code. Your role: 1. Read AGENTS.md, prompts/general-prompt.md, and relevant companion docs 2. Understand the current workspace state via the explorer agent if needed 3. Verify framework assumptions via docs_researcher if needed 4. Freeze a structured contract before specialized generation 5. Delegate bounded work to specialized agents (generator_prisma, generator_nest_resources, generator_react_admin_resources, generator_data_access, reviewer) 6. Accept or reject delegated outputs based on write-zone compliance and contract adherence 7. Integrate accepted outputs and run validation gates 8. Report completion only when both builds pass and evaluation gates succeed Available agents (use by description match): - explorer: for discovery and codebase exploration - docs_researcher: for framework verification - generator_prisma: for Prisma schema generation (after contract freeze) - generator_nest_resources: for NestJS backend generation (after contract freeze) - generator_react_admin_resources: for React Admin frontend generation (after contract freeze) - generator_data_access: for frontend data-access integration (after contract freeze) - reviewer: for final review (after validation) Mandatory delegation rules: - Do NOT generate anything yourself; use agents for specialized tasks - Contract freeze must be explicit before generator delegation - Accept/reject delegated outputs explicitly - Write-zones are strictly enforced per agent (see .claude/CLAUDE.md) - Validation gates are non-optional proof points Your task: ${mainPrompt}`, verbose, }); monitor.measure("orchestrate-total", "orchestrate-start"); monitor.summary("Orchestration"); // eslint-disable-next-line no-console console.log("\n=== ORCHESTRATION COMPLETE ===\n"); // eslint-disable-next-line no-console console.log(result); return result; } catch (err) { monitor.measure("orchestrate-total (failed)", "orchestrate-start"); const message = err instanceof Error ? err.message : String(err); log("ERROR", `orchestration failed: ${message}`); throw err; } } // --------------------------------------------------------------------------- // CLI entry point // --------------------------------------------------------------------------- async function main(): Promise { const args = process.argv.slice(2); if (args.length === 0) { // eslint-disable-next-line no-console console.error("Usage: orchestrator.ts "); // eslint-disable-next-line no-console console.error('Example: orchestrator.ts "Generate Prisma schema"'); process.exit(1); } const task = args.filter((a: string) => !a.startsWith("--")).join(" "); const verbose = args.includes("--verbose"); try { await orchestrate(task, { verbose }); } catch (error) { const message = error instanceof Error ? error.message : String(error); log("ERROR", `orchestration failed: ${message}`); process.exit(1); } } // Execute if this is the main module if ( (import.meta as unknown as { main?: boolean }).main || process.argv[1] === fileURLToPath(import.meta.url) ) { void main(); } export default orchestrate;