rebase generation

2026-04-07 19:40:41 +03:00
parent 73ddb1a948
commit aab7bfa691
180 changed files with 15512 additions and 364 deletions
--- a/tools/orchestrator.ts
+++ b/tools/orchestrator.ts
@@ -0,0 +1,527 @@
+#!/usr/bin/env node
+
+/**
+ * KIS-TOiR Orchestrator — Multi-Agent Coordination Engine
+ *
+ * This tool uses the Claude Agent SDK to coordinate specialized subagents
+ * for KIS-TOiR generation workflows. The orchestrator:
+ *
+ *   1. Runs discovery + contract freeze via a single orchestrator prompt
+ *   2. Dispatches bounded generator tasks in parallel streams
+ *      (Prisma / NestJS / React Admin) with graceful error isolation
+ *   3. Collects per-stream outputs with write-zone + contract accountability
+ *   4. Emits performance metrics for baseline-vs-optimized comparison
+ *
+ * The public entry point `orchestrate(prompt, options)` is preserved for
+ * backward compatibility. A new `generate(contract, options)` entry point
+ * exposes the structured graceful-error + parallel pipeline for callers
+ * that want programmatic control over per-agent failure modes.
+ *
+ * USAGE:
+ *   npm run orchestrate "Generate Prisma schema from domain/toir.api.dsl"
+ */
+
+import { query } from "@anthropic-ai/claude-agent-sdk";
+import agents from "../agents/definitions.js";
+import { PerformanceMonitor } from "./performance-monitor.js";
+import { getProjectRoot } from "./lib/project-root.mjs";
+import fs from "fs";
+import path from "path";
+import { fileURLToPath } from "url";
+
+const projectRoot = getProjectRoot();
+
+// ---------------------------------------------------------------------------
+// Types — graceful error handling contract
+// ---------------------------------------------------------------------------
+
+export type GenerationStatus = "success" | "partial" | "failed";
+
+export interface GenerationError {
+  stage: string;
+  agent: string;
+  message: string;
+  timestamp: string;
+  /**
+   * Whether a partial artifact was still returned despite this error.
+   * Reviewers use this flag to decide whether to attempt bounded repair.
+   */
+  partialOutputAvailable: boolean;
+}
+
+export interface AgentRunResult {
+  agent: string;
+  stage: string;
+  /** Whether this stage is allowed to fail without stopping generation. */
+  critical: boolean;
+  ok: boolean;
+  durationMs: number;
+  output?: string;
+  error?: string;
+}
+
+export interface GenerationOutput {
+  status: GenerationStatus;
+  runs: AgentRunResult[];
+  errors: GenerationError[];
+  metrics: ReturnType<PerformanceMonitor["getMetrics"]>;
+  /** Wall-clock ms, end-to-end. */
+  totalMs: number;
+  /** Sum of per-stage durations — a conservative sequential baseline. */
+  sequentialBaselineMs: number;
+  /** Max per-stage duration — the best case for pure parallelism. */
+  parallelLowerBoundMs: number;
+  improvementPct: number;
+}
+
+/**
+ * Description of one generator task. The orchestrator treats each task as a
+ * self-contained query() invocation and records its success/failure in
+ * isolation. Critical tasks block the pipeline on failure; optional ones
+ * degrade to partial outputs.
+ */
+export interface GeneratorTask {
+  stage: string;
+  agent: keyof typeof agents;
+  critical: boolean;
+  /** Human-readable task description that will be embedded in the prompt. */
+  task: string;
+  /** The write-zones this agent is permitted to touch. */
+  writeZones: string[];
+}
+
+// ---------------------------------------------------------------------------
+// Logging helpers — 3 level logger with ISO timestamps, no external deps
+// ---------------------------------------------------------------------------
+
+type LogLevel = "INFO" | "WARN" | "ERROR";
+
+function log(level: LogLevel, message: string): void {
+  const ts = new Date().toISOString();
+  const line = `[${ts}] [${level}] ${message}`;
+  if (level === "ERROR") {
+    // eslint-disable-next-line no-console
+    console.error(line);
+  } else if (level === "WARN") {
+    // eslint-disable-next-line no-console
+    console.warn(line);
+  } else {
+    // eslint-disable-next-line no-console
+    console.log(line);
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Low-level: run a single query() stream and return its final text result
+// ---------------------------------------------------------------------------
+
+interface RunQueryOptions {
+  prompt: string;
+  verbose?: boolean;
+}
+
+/**
+ * Execute one `query()` call end-to-end and return the final text result.
+ *
+ * We iterate the async message stream and capture the `result` message. All
+ * errors — SDK errors, tool errors, transport errors — propagate as thrown
+ * exceptions so the caller's try/catch can classify them as stage failures.
+ */
+async function runQueryStream({
+  prompt,
+  verbose = false,
+}: RunQueryOptions): Promise<string> {
+  let finalResult = "";
+
+  for await (const message of query({
+    prompt,
+    options: {
+      // Agent tool must be in allowedTools for subagent delegation.
+      allowedTools: ["Read", "Write", "Edit", "Bash", "Glob", "Grep", "Agent"],
+      agents,
+    },
+  })) {
+    // The SDK streams a discriminated-union message type; we duck-type the
+    // few shapes we care about via `unknown` to avoid coupling to SDK
+    // internals while still keeping strict-mode safety.
+    const raw = message as unknown as Record<string, unknown>;
+
+    if (verbose && typeof raw.thinking === "string") {
+      log("INFO", `💭 ${raw.thinking.substring(0, 160)}...`);
+    }
+
+    if (verbose && raw.toolUse && typeof raw.toolUse === "object") {
+      const toolUse = raw.toolUse as { name?: string };
+      log("INFO", `🔧 Tool: ${toolUse.name ?? "unknown"}`);
+    }
+
+    if ("result" in raw) {
+      finalResult = String(raw.result ?? "");
+    }
+  }
+
+  return finalResult;
+}
+
+// ---------------------------------------------------------------------------
+// Per-task runner — wraps runQueryStream with timing + graceful error capture
+// ---------------------------------------------------------------------------
+
+function buildAgentPrompt(task: GeneratorTask): string {
+  // Each task receives a bounded prompt naming the target subagent, the
+  // frozen-contract task text, and the explicit write-zones. The main agent
+  // is expected to delegate to the matching subagent via the Agent tool.
+  return `You are the KIS-TOiR orchestrator running a single bounded generator task.
+
+Target subagent: ${task.agent}
+Stage: ${task.stage}
+Write-zones (STRICT — violations must be rejected):
+${task.writeZones.map((z) => `  - ${z}`).join("\n")}
+
+Task (from frozen contract):
+${task.task}
+
+Delegation rules:
+- Use the ${task.agent} agent via the Agent tool for all code generation.
+- Do not touch files outside the declared write-zones.
+- Report back a short summary of what was changed and any blockers.
+- If you cannot complete the task, return a clear error message — do NOT
+  fabricate partial success.`;
+}
+
+async function runTask(
+  task: GeneratorTask,
+  monitor: PerformanceMonitor,
+  verbose: boolean
+): Promise<AgentRunResult> {
+  const started = Date.now();
+  log("INFO", `▶️  stage=${task.stage} agent=${task.agent} critical=${task.critical}`);
+
+  try {
+    const output = await runQueryStream({
+      prompt: buildAgentPrompt(task),
+      verbose,
+    });
+    const durationMs = Date.now() - started;
+    monitor.record(task.stage, durationMs);
+    log("INFO", `✅ stage=${task.stage} completed in ${durationMs}ms`);
+    return {
+      agent: task.agent,
+      stage: task.stage,
+      critical: task.critical,
+      ok: true,
+      durationMs,
+      output,
+    };
+  } catch (err) {
+    const durationMs = Date.now() - started;
+    monitor.record(`${task.stage} (failed)`, durationMs);
+    const message = err instanceof Error ? err.message : String(err);
+    log("ERROR", `❌ stage=${task.stage} failed after ${durationMs}ms: ${message}`);
+    return {
+      agent: task.agent,
+      stage: task.stage,
+      critical: task.critical,
+      ok: false,
+      durationMs,
+      error: message,
+    };
+  }
+}
+
+// ---------------------------------------------------------------------------
+// High-level: structured parallel generation with graceful error handling
+// ---------------------------------------------------------------------------
+
+export interface GenerateOptions {
+  verbose?: boolean;
+  /**
+   * When true, run tasks sequentially instead of in parallel. Primarily used
+   * to capture the baseline timing for speedup comparisons.
+   */
+  sequential?: boolean;
+  /**
+   * Destination for the metrics JSON report. Defaults to
+   * `<projectRoot>/generation-metrics.json`.
+   */
+  metricsPath?: string;
+}
+
+/**
+ * Structured parallel generation pipeline.
+ *
+ * Critical tasks (Prisma, NestJS) block the pipeline on failure; optional
+ * tasks (React Admin, data-access) degrade to partial outputs. Regardless of
+ * outcome, metrics are written to disk and a full GenerationOutput is
+ * returned so callers can run bounded repair or surface the failure.
+ */
+export async function generate(
+  tasks: GeneratorTask[],
+  options: GenerateOptions = {}
+): Promise<GenerationOutput> {
+  const { verbose = false, sequential = false } = options;
+  const monitor = new PerformanceMonitor();
+  const wallStart = Date.now();
+
+  log("INFO", `starting generation: tasks=${tasks.length} mode=${sequential ? "sequential" : "parallel"}`);
+
+  let runs: AgentRunResult[];
+
+  if (sequential) {
+    // Baseline path — useful for "measure first" speedup comparisons.
+    runs = [];
+    for (const task of tasks) {
+      // Short-circuit on critical failure to match real-world sequential behaviour.
+      const result = await runTask(task, monitor, verbose);
+      runs.push(result);
+      if (!result.ok && result.critical) {
+        log("WARN", `aborting sequential run after critical failure: ${result.stage}`);
+        break;
+      }
+    }
+  } else {
+    // Parallel path — Promise.allSettled is mandatory here: Promise.all would
+    // reject on the first failure and lose partial outputs from siblings.
+    // Since each runTask already catches its own errors, all outcomes arrive
+    // as fulfilled promises, but allSettled keeps us safe against bugs.
+    const settled = await Promise.allSettled(
+      tasks.map((task) => runTask(task, monitor, verbose))
+    );
+    runs = settled.map((s, i) => {
+      if (s.status === "fulfilled") return s.value;
+      const task = tasks[i];
+      const message = s.reason instanceof Error ? s.reason.message : String(s.reason);
+      log("ERROR", `unexpected rejection for stage=${task.stage}: ${message}`);
+      return {
+        agent: task.agent,
+        stage: task.stage,
+        critical: task.critical,
+        ok: false,
+        durationMs: 0,
+        error: message,
+      };
+    });
+  }
+
+  const totalMs = Date.now() - wallStart;
+
+  // ---- Classify outcome ------------------------------------------------
+  const criticalFailures = runs.filter((r) => !r.ok && r.critical);
+  const optionalFailures = runs.filter((r) => !r.ok && !r.critical);
+  const anyFailure = criticalFailures.length + optionalFailures.length > 0;
+
+  let status: GenerationStatus;
+  if (criticalFailures.length > 0) {
+    status = "failed";
+  } else if (optionalFailures.length > 0) {
+    status = "partial";
+  } else {
+    status = "success";
+  }
+
+  const errors: GenerationError[] = runs
+    .filter((r) => !r.ok)
+    .map((r) => ({
+      stage: r.stage,
+      agent: r.agent,
+      message: r.error ?? "unknown error",
+      timestamp: new Date().toISOString(),
+      partialOutputAvailable: !r.critical,
+    }));
+
+  // ---- Speedup accounting ---------------------------------------------
+  // Sequential baseline = sum of per-stage durations (what this would have
+  // cost if we ran each stage one after another).
+  // Parallel lower bound = the longest single stage (best case for pure
+  // parallel execution). `totalMs` is the actual wall-clock observed.
+  const sequentialBaselineMs = runs.reduce((acc, r) => acc + r.durationMs, 0);
+  const parallelLowerBoundMs = runs.reduce(
+    (acc, r) => Math.max(acc, r.durationMs),
+    0
+  );
+  const improvementPct =
+    sequentialBaselineMs > 0
+      ? ((sequentialBaselineMs - totalMs) / sequentialBaselineMs) * 100
+      : 0;
+
+  monitor.summary(sequential ? "Sequential baseline" : "Parallel generation");
+
+  // ---- Persist metrics report -----------------------------------------
+  const metricsPath =
+    options.metricsPath ?? path.join(projectRoot, "generation-metrics.json");
+  const metricsSnapshot = monitor.getMetrics();
+  try {
+    const report = {
+      generatedAt: new Date().toISOString(),
+      mode: sequential ? "sequential" : "parallel",
+      status,
+      totalMs,
+      sequentialBaselineMs,
+      parallelLowerBoundMs,
+      improvementPct: Number(improvementPct.toFixed(2)),
+      runs: runs.map(({ output: _output, ...rest }) => rest),
+      errors,
+      metrics: metricsSnapshot,
+    };
+    fs.writeFileSync(metricsPath, JSON.stringify(report, null, 2));
+    log("INFO", `metrics written to ${metricsPath}`);
+  } catch (err) {
+    const message = err instanceof Error ? err.message : String(err);
+    log("WARN", `could not write metrics report: ${message}`);
+  }
+
+  if (status === "success") {
+    log("INFO", `🎉 generation completed successfully in ${totalMs}ms`);
+  } else if (status === "partial") {
+    log(
+      "WARN",
+      `generation completed with ${optionalFailures.length} optional failure(s) in ${totalMs}ms — partial outputs returned`
+    );
+  } else {
+    log(
+      "ERROR",
+      `generation FAILED with ${criticalFailures.length} critical failure(s) in ${totalMs}ms`
+    );
+  }
+
+  if (anyFailure && verbose) {
+    for (const err of errors) {
+      log("WARN", `  ${err.stage} (${err.agent}): ${err.message}`);
+    }
+  }
+
+  return {
+    status,
+    runs,
+    errors,
+    metrics: metricsSnapshot,
+    totalMs,
+    sequentialBaselineMs,
+    parallelLowerBoundMs,
+    improvementPct,
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Backward-compatible monolithic entry point
+// ---------------------------------------------------------------------------
+
+export interface OrchestrateOptions {
+  verbose?: boolean;
+}
+
+/**
+ * Single-prompt orchestration. This is the original entry point — it runs
+ * one `query()` stream and lets the main Claude agent decide when to delegate
+ * to subagents via the Agent tool. Preserved for backward compatibility with
+ * existing scripts that call `orchestrate(prompt)`.
+ *
+ * Prefer `generate(tasks, options)` when you need programmatic per-agent
+ * failure isolation, parallel execution, or structured metrics.
+ */
+export async function orchestrate(
+  mainPrompt: string,
+  options?: OrchestrateOptions
+): Promise<string> {
+  const verbose = options?.verbose ?? false;
+  const monitor = new PerformanceMonitor();
+
+  if (verbose) {
+    log("INFO", `project root: ${projectRoot}`);
+    log("INFO", `agents available: ${Object.keys(agents).length}`);
+  }
+
+  log("INFO", "🎯 orchestration task:");
+  // eslint-disable-next-line no-console
+  console.log(mainPrompt);
+  log("INFO", "📡 starting agent coordination");
+
+  monitor.mark("orchestrate-start");
+
+  try {
+    const result = await runQueryStream({
+      prompt: `You are the KIS-TOiR master orchestrator for Claude Code.
+
+Your role:
+1. Read AGENTS.md, prompts/general-prompt.md, and relevant companion docs
+2. Understand the current workspace state via the explorer agent if needed
+3. Verify framework assumptions via docs_researcher if needed
+4. Freeze a structured contract before specialized generation
+5. Delegate bounded work to specialized agents (generator_prisma, generator_nest_resources, generator_react_admin_resources, generator_data_access, reviewer)
+6. Accept or reject delegated outputs based on write-zone compliance and contract adherence
+7. Integrate accepted outputs and run validation gates
+8. Report completion only when both builds pass and evaluation gates succeed
+
+Available agents (use by description match):
+- explorer: for discovery and codebase exploration
+- docs_researcher: for framework verification
+- generator_prisma: for Prisma schema generation (after contract freeze)
+- generator_nest_resources: for NestJS backend generation (after contract freeze)
+- generator_react_admin_resources: for React Admin frontend generation (after contract freeze)
+- generator_data_access: for frontend data-access integration (after contract freeze)
+- reviewer: for final review (after validation)
+
+Mandatory delegation rules:
+- Do NOT generate anything yourself; use agents for specialized tasks
+- Contract freeze must be explicit before generator delegation
+- Accept/reject delegated outputs explicitly
+- Write-zones are strictly enforced per agent (see .claude/CLAUDE.md)
+- Validation gates are non-optional proof points
+
+Your task:
+${mainPrompt}`,
+      verbose,
+    });
+
+    monitor.measure("orchestrate-total", "orchestrate-start");
+    monitor.summary("Orchestration");
+
+    // eslint-disable-next-line no-console
+    console.log("\n=== ORCHESTRATION COMPLETE ===\n");
+    // eslint-disable-next-line no-console
+    console.log(result);
+    return result;
+  } catch (err) {
+    monitor.measure("orchestrate-total (failed)", "orchestrate-start");
+    const message = err instanceof Error ? err.message : String(err);
+    log("ERROR", `orchestration failed: ${message}`);
+    throw err;
+  }
+}
+
+// ---------------------------------------------------------------------------
+// CLI entry point
+// ---------------------------------------------------------------------------
+
+async function main(): Promise<void> {
+  const args = process.argv.slice(2);
+
+  if (args.length === 0) {
+    // eslint-disable-next-line no-console
+    console.error("Usage: orchestrator.ts <task-description>");
+    // eslint-disable-next-line no-console
+    console.error('Example: orchestrator.ts "Generate Prisma schema"');
+    process.exit(1);
+  }
+
+  const task = args.filter((a: string) => !a.startsWith("--")).join(" ");
+  const verbose = args.includes("--verbose");
+
+  try {
+    await orchestrate(task, { verbose });
+  } catch (error) {
+    const message = error instanceof Error ? error.message : String(error);
+    log("ERROR", `orchestration failed: ${message}`);
+    process.exit(1);
+  }
+}
+
+// Execute if this is the main module
+if (
+  (import.meta as unknown as { main?: boolean }).main ||
+  process.argv[1] === fileURLToPath(import.meta.url)
+) {
+  void main();
+}
+
+export default orchestrate;