architecture

February 8, 2026

Implementation Plan: The Agent Loop (Section 6)

The core execution engine. Every agent in the system runs the same perceive → reason → act → learn loop. This is the most critical code in Forge.

Overview

The agent loop is the beating heart of the system. It's where:

Context is gathered (perceive)
Decisions are made (reason)
Tools are executed (act)
Learning happens (learn)
Safety controls fire (circuit breakers)
Human intervention is requested (gates)

This document specifies the complete implementation of BaseAgent, the abstract class that all specialized agents (Planner, Implementer, Reviewer, Tester, Deployer) extend.

1. BaseAgent Class — Complete Implementation

1.1 Core Structure

typescript
// src/agents/base.ts

import type { Agent, PhaseInput, PhaseOutput, AgentContext, Tool, ToolCall } from '../core/types.ts';
import type { CircuitBreaker } from '../safety/breakers.ts';
import type { EventBus } from '../core/bus.ts';
import type { Memory } from '../memory/store.ts';
import type { LLMProvider } from '../tools/llm.ts';
import { z } from 'zod';

/**
 * BaseAgent — Abstract foundation for all agents in the system.
 *
 * Implements the core perceive → reason → act → learn loop.
 * All specialized agents (Planner, Implementer, etc.) extend this class.
 *
 * The loop runs until:
 * - Agent signals done (returns PhaseOutput)
 * - Circuit breaker trips
 * - Max iterations reached
 * - Stagnation detected
 * - Human escalation requested
 */
export abstract class BaseAgent implements Agent {
  abstract readonly id: string;
  abstract readonly type: AgentType;
  abstract readonly tools: Tool[];
  abstract readonly systemPrompt: string;

  // Configuration
  private readonly maxIterations: number;
  private readonly stagnationThreshold: number = 3;
  private readonly reflectionCostBudget: number = 0.50; // USD

  constructor(config?: AgentConfig) {
    this.maxIterations = config?.maxIterations ?? this.getDefaultMaxIterations();
  }

  /**
   * Main entry point: execute a phase of the pipeline.
   *
   * Runs the agent loop until completion or termination condition.
   */
  async execute(input: PhaseInput, ctx: AgentContext): Promise<PhaseOutput> {
    const startTime = Date.now();
    let iteration = 0;
    let stagnationCount = 0;
    let lastProgressHash = '';

    // Emit start event
    ctx.bus.emit({
      type: `${this.type}.started`,
      source: this.id,
      traceId: ctx.traceId,
      payload: { input }
    });

    // PERCEIVE: Gather initial context
    let workingMemory = await this.perceive(input, ctx);

    // Main loop
    while (true) {
      iteration++;

      // ── SAFETY: Circuit breaker check ──
      const breakerResult = await this.checkCircuitBreakers(ctx, iteration, startTime);
      if (breakerResult.shouldBreak) {
        return this.handleBreakerTrip(breakerResult, ctx, workingMemory);
      }

      // ── SAFETY: Stagnation detection ──
      const progressHash = this.hashWorkingMemory(workingMemory);
      if (progressHash === lastProgressHash) {
        stagnationCount++;
        if (stagnationCount >= this.stagnationThreshold) {
          return this.handleStagnation(ctx, workingMemory, iteration);
        }
      } else {
        stagnationCount = 0; // Reset on progress
        lastProgressHash = progressHash;
      }

      // Emit iteration event
      ctx.bus.emit({
        type: `${this.type}.iteration`,
        source: this.id,
        traceId: ctx.traceId,
        payload: { iteration, stagnationCount }
      });

      // ── REASON: Ask LLM what to do ──
      const decision = await this.reason(workingMemory, ctx);

      // ── Check if done ──
      if (decision.done) {
        const output = await this.finalize(decision, ctx, workingMemory);

        // Post-execution reflection
        await this.reflect(ctx, workingMemory, 'success', iteration);

        ctx.bus.emit({
          type: `${this.type}.completed`,
          source: this.id,
          traceId: ctx.traceId,
          payload: { output, iterations: iteration }
        });

        return output;
      }

      // ── Check if human input needed ──
      if (decision.needsHuman) {
        return this.escalateToHuman(ctx, workingMemory, decision.reason, iteration);
      }

      // ── ACT: Execute the tool ──
      const toolResult = await this.act(decision.toolCall, ctx);

      // ── LEARN: Update working memory ──
      workingMemory = this.learn(workingMemory, decision, toolResult, ctx);

      // ── Post-iteration reflection on errors ──
      if (toolResult.error) {
        await this.reflect(ctx, workingMemory, 'error', iteration, toolResult.error);
      }
    }
  }

  // ─────────────────────────────────────────────────────────
  // PERCEIVE PHASE
  // ─────────────────────────────────────────────────────────

  /**
   * Gather context for this execution.
   *
   * Priority allocation (from research):
   * - 60% memories (relevant past learnings)
   * - 30% conversation (task description, previous iteration results)
   * - 10% environment (codebase state, file contents)
   */
  private async perceive(input: PhaseInput, ctx: AgentContext): Promise<WorkingMemory> {
    const tokenBudget = this.getTokenBudget(ctx);

    // Allocate token budget
    const memoryTokens = Math.floor(tokenBudget * 0.6);
    const conversationTokens = Math.floor(tokenBudget * 0.3);
    const environmentTokens = Math.floor(tokenBudget * 0.1);

    // 1. Recall relevant memories
    const memories = await ctx.memory.recall({
      context: this.buildMemoryQuery(input),
      type: ['semantic', 'procedural'], // Episodic memory for reflection only
      limit: this.estimateMemoryLimit(memoryTokens),
      agentType: this.type
    });

    // 2. Build initial prompt
    const initialPrompt = await this.buildInitialPrompt(input, memories, environmentTokens);

    // 3. Create working memory
    return {
      messages: [
        {
          role: 'user' as const,
          content: initialPrompt
        }
      ],
      tokenCount: this.estimateTokens(initialPrompt),
      tokenBudget,
      iteration: 0,
      context: {
        input,
        memories,
        relevantFiles: [], // Populated by tools during execution
      }
    };
  }

  /**
   * Build the initial user prompt that kicks off reasoning.
   */
  private async buildInitialPrompt(
    input: PhaseInput,
    memories: Memory[],
    environmentTokenBudget: number
  ): Promise<string> {
    const parts: string[] = [];

    // Task description
    parts.push(`# Task\n\n${input.task}`);

    // Context from previous phase (if any)
    if (input.previousPhaseOutput) {
      parts.push(`\n# Previous Phase Output\n\n${this.summarizePreviousOutput(input.previousPhaseOutput)}`);
    }

    // Relevant memories
    if (memories.length > 0) {
      parts.push('\n# Relevant Past Learnings\n');
      for (const memory of memories) {
        parts.push(`- ${memory.content} (confidence: ${memory.confidence.toFixed(2)})`);
      }
    }

    // Environment context (if within budget)
    const envContext = await this.gatherEnvironmentContext(input, environmentTokenBudget);
    if (envContext) {
      parts.push(`\n# Current Codebase Context\n\n${envContext}`);
    }

    // Instructions
    parts.push('\n# Instructions\n');
    parts.push(this.getPhaseSpecificInstructions(input));

    return parts.join('\n');
  }

  /**
   * Gather codebase context (file contents, directory structure, etc.)
   * within the allocated token budget.
   */
  protected async gatherEnvironmentContext(
    input: PhaseInput,
    tokenBudget: number
  ): Promise<string | null> {
    // Subclasses override this to gather phase-specific context
    // e.g., Implementer might read relevant source files
    // Reviewer might read the diff
    return null;
  }

  /**
   * Phase-specific instructions appended to the initial prompt.
   */
  protected abstract getPhaseSpecificInstructions(input: PhaseInput): string;

  /**
   * Build a query string for memory recall based on the input.
   */
  protected buildMemoryQuery(input: PhaseInput): string {
    return `${this.type} phase: ${input.task}`;
  }

  // ─────────────────────────────────────────────────────────
  // REASON PHASE
  // ─────────────────────────────────────────────────────────

  /**
   * Call the LLM to decide what to do next.
   *
   * The LLM can:
   * - Use a tool (returns ToolCall)
   * - Signal done (returns result with done=true)
   * - Request human input (returns needsHuman=true)
   */
  private async reason(
    workingMemory: WorkingMemory,
    ctx: AgentContext
  ): Promise<Decision> {
    const startTime = Date.now();

    // Construct tool schemas for the LLM
    const toolSchemas = this.tools.map(tool => ({
      name: tool.name,
      description: tool.description,
      parameters: tool.schema.input
    }));

    // Check if we need to compress context
    if (workingMemory.tokenCount > workingMemory.tokenBudget * 0.9) {
      workingMemory = await this.compressWorkingMemory(workingMemory, ctx);
    }

    // Call LLM
    const response = await ctx.llm.chat({
      system: this.systemPrompt,
      messages: workingMemory.messages,
      tools: toolSchemas,
      temperature: 0.1, // Low temperature for consistency
      maxTokens: 4096
    });

    // Track cost
    const cost = response.cost;
    ctx.cost.add(cost);

    // Emit reasoning event
    ctx.bus.emit({
      type: `${this.type}.reasoned`,
      source: this.id,
      traceId: ctx.traceId,
      payload: {
        tokensUsed: response.usage.promptTokens + response.usage.completionTokens,
        cost
      },
      cost: {
        tokens: response.usage.promptTokens + response.usage.completionTokens,
        usd: cost
      },
      durationMs: Date.now() - startTime
    });

    // Parse response
    if (response.toolCalls && response.toolCalls.length > 0) {
      // Agent wants to use a tool
      return {
        done: false,
        needsHuman: false,
        toolCall: response.toolCalls[0], // Use first tool call
        reasoning: response.content
      };
    } else if (response.done && response.result) {
      // Agent signals completion
      return {
        done: true,
        needsHuman: false,
        result: response.result,
        reasoning: response.content
      };
    } else {
      // Check if LLM is requesting human help
      const needsHuman = this.detectHumanRequest(response.content);
      if (needsHuman) {
        return {
          done: false,
          needsHuman: true,
          reason: response.content,
          reasoning: response.content
        };
      }

      // LLM didn't return a valid decision — treat as error
      throw new AgentError(
        `LLM returned invalid decision: no tool call, no done signal, no human request`,
        { response }
      );
    }
  }

  /**
   * Detect if the LLM is asking for human help.
   *
   * Looks for phrases like:
   * - "I need human input"
   * - "This requires human decision"
   * - "Escalating to human"
   */
  private detectHumanRequest(content: string): boolean {
    const patterns = [
      /need human/i,
      /require human/i,
      /escalat(e|ing) to human/i,
      /cannot proceed without/i,
      /human decision required/i
    ];

    return patterns.some(pattern => pattern.test(content));
  }

  /**
   * Compress working memory when approaching token limit.
   *
   * Strategy:
   * 1. Summarize older messages
   * 2. Keep recent messages in full
   * 3. Preserve critical context (task, learnings)
   */
  private async compressWorkingMemory(
    memory: WorkingMemory,
    ctx: AgentContext
  ): Promise<WorkingMemory> {
    // Keep first message (task + context) and last N messages
    const keepRecent = 3;
    const messages = memory.messages;

    if (messages.length <= keepRecent + 1) {
      // Already minimal
      return memory;
    }

    // Summarize middle messages
    const toSummarize = messages.slice(1, -keepRecent);
    const summary = await this.summarizeMessages(toSummarize, ctx);

    const compressed = [
      messages[0], // Keep task
      {
        role: 'assistant' as const,
        content: `[Previous iterations summarized: ${summary}]`
      },
      ...messages.slice(-keepRecent) // Keep recent
    ];

    return {
      ...memory,
      messages: compressed,
      tokenCount: this.estimateTokenCount(compressed)
    };
  }

  /**
   * Summarize a sequence of messages into a brief summary.
   */
  private async summarizeMessages(
    messages: Message[],
    ctx: AgentContext
  ): Promise<string> {
    const content = messages.map(m => `${m.role}: ${m.content}`).join('\n\n');

    const response = await ctx.llm.chat({
      system: 'Summarize the following conversation in 2-3 sentences, preserving key decisions and outcomes.',
      messages: [{ role: 'user', content }],
      temperature: 0,
      maxTokens: 200
    });

    ctx.cost.add(response.cost);

    return response.content;
  }

  // ─────────────────────────────────────────────────────────
  // ACT PHASE
  // ─────────────────────────────────────────────────────────

  /**
   * Execute a tool call.
   *
   * Steps:
   * 1. Find the tool
   * 2. Validate input (Zod)
   * 3. Check sandbox constraints
   * 4. Execute
   * 5. Capture result + metrics
   * 6. Handle errors (retry vs escalate vs fail)
   */
  private async act(toolCall: ToolCall, ctx: AgentContext): Promise<ToolResult> {
    const startTime = Date.now();

    // 1. Find tool
    const tool = this.tools.find(t => t.name === toolCall.name);
    if (!tool) {
      return {
        error: true,
        message: `Tool not found: ${toolCall.name}`,
        retryable: false
      };
    }

    // 2. Validate input
    let validatedInput: unknown;
    try {
      validatedInput = tool.schema.input.parse(toolCall.input);
    } catch (err) {
      return {
        error: true,
        message: `Input validation failed: ${err instanceof z.ZodError ? err.message : String(err)}`,
        retryable: true, // LLM can fix input and retry
        validationErrors: err instanceof z.ZodError ? err.errors : undefined
      };
    }

    // 3. Check sandbox (safety constraints)
    const sandboxCheck = await ctx.safety.checkToolExecution(tool, validatedInput);
    if (!sandboxCheck.allowed) {
      return {
        error: true,
        message: `Sandbox violation: ${sandboxCheck.reason}`,
        retryable: false
      };
    }

    // 4. Execute with retry logic
    const result = await this.executeWithRetry(tool, validatedInput, ctx);

    // 5. Emit event
    ctx.bus.emit({
      type: `${this.type}.tool_executed`,
      source: this.id,
      traceId: ctx.traceId,
      payload: {
        tool: tool.name,
        success: !result.error,
        durationMs: Date.now() - startTime
      },
      durationMs: Date.now() - startTime
    });

    return result;
  }

  /**
   * Execute a tool with retry logic.
   *
   * Retry strategy:
   * - Transient errors (network, timeout): retry with backoff
   * - Validation errors: no retry (LLM needs to fix input)
   * - Permission errors: no retry (sandbox violation)
   * - Unknown errors: retry once
   */
  private async executeWithRetry(
    tool: Tool,
    input: unknown,
    ctx: AgentContext,
    maxRetries: number = 3
  ): Promise<ToolResult> {
    let attempt = 0;
    let lastError: Error | undefined;

    while (attempt < maxRetries) {
      attempt++;

      try {
        const output = await tool.execute(input, {
          bus: ctx.bus,
          traceId: ctx.traceId,
          agentId: this.id
        });

        // Validate output
        tool.schema.output.parse(output);

        return {
          error: false,
          data: output,
          toolName: tool.name
        };

      } catch (err) {
        lastError = err instanceof Error ? err : new Error(String(err));

        // Classify error
        const classification = this.classifyError(lastError);

        if (!classification.retryable) {
          // Don't retry non-retryable errors
          return {
            error: true,
            message: lastError.message,
            retryable: false,
            source: classification.source
          };
        }

        // Wait before retry (exponential backoff)
        if (attempt < maxRetries) {
          const delay = Math.min(1000 * Math.pow(2, attempt - 1), 10000);
          await this.sleep(delay);
        }
      }
    }

    // Max retries exceeded
    return {
      error: true,
      message: `Tool execution failed after ${maxRetries} attempts: ${lastError?.message}`,
      retryable: false,
      source: 'tool'
    };
  }

  /**
   * Classify an error to determine if it's retryable.
   */
  private classifyError(error: Error): ErrorClassification {
    // Network/timeout errors → retryable
    if (error.message.includes('timeout') || error.message.includes('ECONNREFUSED')) {
      return { retryable: true, source: 'network', severity: 'medium' };
    }

    // Validation errors → not retryable (LLM needs to fix input)
    if (error instanceof z.ZodError) {
      return { retryable: false, source: 'validation', severity: 'low' };
    }

    // Permission errors → not retryable
    if (error.message.includes('permission denied') || error.message.includes('EACCES')) {
      return { retryable: false, source: 'permission', severity: 'high' };
    }

    // Tool-specific errors
    if (error.name === 'ToolError') {
      return { retryable: false, source: 'tool', severity: 'medium' };
    }

    // Unknown → retry once
    return { retryable: true, source: 'unknown', severity: 'medium' };
  }

  // ─────────────────────────────────────────────────────────
  // LEARN PHASE
  // ─────────────────────────────────────────────────────────

  /**
   * Update working memory after each iteration.
   *
   * Adds:
   * - The decision (what the LLM chose to do)
   * - The tool result (what happened)
   */
  private learn(
    memory: WorkingMemory,
    decision: Decision,
    result: ToolResult,
    ctx: AgentContext
  ): WorkingMemory {
    // Add assistant message (the decision)
    const assistantMessage: Message = {
      role: 'assistant',
      content: decision.reasoning || '',
      toolCalls: decision.toolCall ? [decision.toolCall] : undefined
    };

    // Add tool result message
    const resultMessage: Message = {
      role: 'tool',
      content: result.error
        ? `Error: ${result.message}`
        : JSON.stringify(result.data, null, 2),
      toolCallId: decision.toolCall?.id
    };

    const updatedMessages = [
      ...memory.messages,
      assistantMessage,
      resultMessage
    ];

    return {
      ...memory,
      messages: updatedMessages,
      tokenCount: this.estimateTokenCount(updatedMessages),
      iteration: memory.iteration + 1
    };
  }

  // ─────────────────────────────────────────────────────────
  // REFLECTION
  // ─────────────────────────────────────────────────────────

  /**
   * Post-execution reflection: extract learnings.
   *
   * When to reflect:
   * - On success: what worked well?
   * - On error: what went wrong? How to prevent?
   * - On human escalation: why couldn't the agent proceed?
   *
   * Skip reflection for:
   * - Very cheap operations (< $0.10)
   * - Low-complexity phases
   */
  private async reflect(
    ctx: AgentContext,
    memory: WorkingMemory,
    outcome: 'success' | 'error' | 'escalation',
    iterations: number,
    error?: Error
  ): Promise<void> {
    // Skip reflection if we're over budget
    if (ctx.cost.current > this.reflectionCostBudget) {
      return;
    }

    // Skip for trivial operations
    if (ctx.cost.current < 0.10 && outcome === 'success') {
      return;
    }

    const reflectionPrompt = this.buildReflectionPrompt(outcome, memory, iterations, error);

    const response = await ctx.llm.chat({
      system: REFLECTION_PROMPT,
      messages: [{ role: 'user', content: reflectionPrompt }],
      temperature: 0.2,
      maxTokens: 1000
    });

    ctx.cost.add(response.cost);

    // Parse learnings from response
    const learnings = this.parseLearnings(response.content);

    // Store each learning in memory
    for (const learning of learnings) {
      await ctx.memory.store({
        type: learning.type,
        content: learning.content,
        context: learning.context,
        confidence: learning.confidence,
        source: `${this.type}.reflection`,
        tags: [this.type, outcome, ...(learning.tags || [])]
      });
    }

    // Emit reflection event
    ctx.bus.emit({
      type: `${this.type}.reflected`,
      source: this.id,
      traceId: ctx.traceId,
      payload: {
        outcome,
        learningsCount: learnings.length
      }
    });
  }

  /**
   * Build the reflection prompt based on outcome.
   */
  private buildReflectionPrompt(
    outcome: 'success' | 'error' | 'escalation',
    memory: WorkingMemory,
    iterations: number,
    error?: Error
  ): string {
    const parts: string[] = [];

    parts.push(`# ${this.type} Phase Reflection\n`);
    parts.push(`Outcome: ${outcome}`);
    parts.push(`Iterations: ${iterations}`);

    if (error) {
      parts.push(`\nError: ${error.message}`);
    }

    parts.push('\n# Conversation History\n');
    parts.push(this.summarizeConversation(memory.messages));

    parts.push('\n# Reflection Questions\n');

    if (outcome === 'success') {
      parts.push('- What approach worked well here?');
      parts.push('- What patterns emerged that could be reused?');
      parts.push('- Were there any inefficiencies or wasted iterations?');
      parts.push('- What would make this faster next time?');
    } else if (outcome === 'error') {
      parts.push('- What was the root cause of the error?');
      parts.push('- Could this have been detected earlier?');
      parts.push('- What should be done differently next time?');
      parts.push('- Is there a pattern to prevent this class of error?');
    } else if (outcome === 'escalation') {
      parts.push('- Why couldn\'t the agent complete this task?');
      parts.push('- What information was missing?');
      parts.push('- What capability would have enabled success?');
      parts.push('- Was escalation appropriate, or could the agent have continued?');
    }

    parts.push('\nExtract 1-3 key learnings as structured JSON.');

    return parts.join('\n');
  }

  /**
   * Parse learnings from LLM reflection response.
   */
  private parseLearnings(content: string): Learning[] {
    // Attempt to extract JSON
    const jsonMatch = content.match(/```json\n([\s\S]*?)\n```/) || content.match(/\[[\s\S]*\]/);

    if (jsonMatch) {
      try {
        const parsed = JSON.parse(jsonMatch[1] || jsonMatch[0]);
        return Array.isArray(parsed) ? parsed : [parsed];
      } catch {
        // Fall through to fallback
      }
    }

    // Fallback: create a single learning from the text
    return [{
      type: 'procedural',
      content: content.slice(0, 500), // Truncate
      context: `${this.type} phase`,
      confidence: 0.5 // Low confidence for unparsed reflection
    }];
  }

  // ─────────────────────────────────────────────────────────
  // LOOP TERMINATION
  // ─────────────────────────────────────────────────────────

  /**
   * Check circuit breakers.
   *
   * Breakers that can trip:
   * 1. Iteration count exceeded
   * 2. Cost exceeded
   * 3. Time exceeded
   * 4. Error rate exceeded
   */
  private async checkCircuitBreakers(
    ctx: AgentContext,
    iteration: number,
    startTime: number
  ): Promise<BreakerResult> {
    const elapsed = Date.now() - startTime;

    // 1. Iteration breaker
    if (iteration >= this.maxIterations) {
      return {
        shouldBreak: true,
        reason: 'MAX_ITERATIONS_EXCEEDED',
        details: { iteration, max: this.maxIterations }
      };
    }

    // 2. Cost breaker
    const costResult = await ctx.safety.checkCost({
      current: ctx.cost.current,
      phase: this.type
    });
    if (costResult.shouldBreak) {
      return costResult;
    }

    // 3. Time breaker
    const timeResult = await ctx.safety.checkTime({
      elapsed,
      phase: this.type
    });
    if (timeResult.shouldBreak) {
      return timeResult;
    }

    // 4. Error rate breaker
    const errorResult = await ctx.safety.checkErrorRate({
      phase: this.type,
      traceId: ctx.traceId
    });
    if (errorResult.shouldBreak) {
      return errorResult;
    }

    return { shouldBreak: false };
  }

  /**
   * Handle circuit breaker trip.
   */
  private async handleBreakerTrip(
    result: BreakerResult,
    ctx: AgentContext,
    memory: WorkingMemory
  ): Promise<PhaseOutput> {
    ctx.bus.emit({
      type: `${this.type}.breaker_tripped`,
      source: this.id,
      traceId: ctx.traceId,
      payload: result
    });

    // Attempt reflection on the breaker trip
    await this.reflect(ctx, memory, 'error', memory.iteration, new Error(result.reason));

    // Return error output
    throw new CircuitBreakerError(result.reason, result.details);
  }

  /**
   * Handle stagnation (no progress across multiple iterations).
   */
  private async handleStagnation(
    ctx: AgentContext,
    memory: WorkingMemory,
    iterations: number
  ): Promise<PhaseOutput> {
    ctx.bus.emit({
      type: `${this.type}.stagnation_detected`,
      source: this.id,
      traceId: ctx.traceId,
      payload: { iterations }
    });

    // Reflect on stagnation
    await this.reflect(
      ctx,
      memory,
      'error',
      iterations,
      new Error('Agent appears stuck, no progress in recent iterations')
    );

    // Escalate to human
    return this.escalateToHuman(
      ctx,
      memory,
      'Agent detected stagnation: same state across multiple iterations. Human intervention needed.',
      iterations
    );
  }

  /**
   * Escalate to human when the agent cannot proceed.
   */
  private async escalateToHuman(
    ctx: AgentContext,
    memory: WorkingMemory,
    reason: string,
    iterations: number
  ): Promise<PhaseOutput> {
    ctx.bus.emit({
      type: `${this.type}.escalated`,
      source: this.id,
      traceId: ctx.traceId,
      payload: { reason, iterations }
    });

    // Reflect on escalation
    await this.reflect(ctx, memory, 'escalation', iterations);

    // Return output requesting human input
    throw new HumanEscalationError(reason, {
      phase: this.type,
      iterations,
      lastState: this.summarizeConversation(memory.messages)
    });
  }

  /**
   * Finalize the output when the agent signals done.
   */
  private async finalize(
    decision: Decision,
    ctx: AgentContext,
    memory: WorkingMemory
  ): Promise<PhaseOutput> {
    // Validate the result matches the expected schema for this phase
    const validated = await this.validateOutput(decision.result);

    return validated;
  }

  /**
   * Validate that the output matches the phase's expected schema.
   * Subclasses override this.
   */
  protected abstract validateOutput(result: unknown): Promise<PhaseOutput>;

  // ─────────────────────────────────────────────────────────
  // UTILITIES
  // ─────────────────────────────────────────────────────────

  /**
   * Get default max iterations for this agent type.
   */
  private getDefaultMaxIterations(): number {
    const defaults: Record<AgentType, number> = {
      planning: 20,
      implementation: 50,
      review: 10,
      testing: 5,
      deployment: 3
    };

    return defaults[this.type] ?? 10;
  }

  /**
   * Get token budget based on remaining cost budget.
   */
  private getTokenBudget(ctx: AgentContext): number {
    const remainingBudget = ctx.cost.budget - ctx.cost.current;
    const avgCostPerToken = 0.00003; // Rough estimate for Claude Sonnet

    const maxTokens = Math.floor(remainingBudget / avgCostPerToken);

    // Cap at model's context window
    const modelContextWindow = 200_000; // Claude Sonnet context window
    const safeLimit = modelContextWindow * 0.7; // Leave room for output

    return Math.min(maxTokens, safeLimit);
  }

  /**
   * Estimate memory limit based on token budget.
   */
  private estimateMemoryLimit(tokenBudget: number): number {
    const avgTokensPerMemory = 100;
    return Math.floor(tokenBudget / avgTokensPerMemory);
  }

  /**
   * Estimate token count for a string or messages.
   */
  private estimateTokens(content: string | Message[]): number {
    if (typeof content === 'string') {
      // Rough heuristic: 1 token ≈ 4 characters
      return Math.ceil(content.length / 4);
    } else {
      return content.reduce((sum, msg) => {
        return sum + this.estimateTokens(msg.content);
      }, 0);
    }
  }

  private estimateTokenCount = this.estimateTokens;

  /**
   * Hash working memory to detect stagnation.
   */
  private hashWorkingMemory(memory: WorkingMemory): string {
    // Hash the last few messages to detect if we're repeating
    const recent = memory.messages.slice(-3);
    const content = recent.map(m => m.content).join('|');

    // Simple hash (good enough for stagnation detection)
    let hash = 0;
    for (let i = 0; i < content.length; i++) {
      hash = ((hash << 5) - hash) + content.charCodeAt(i);
      hash = hash & hash; // Convert to 32-bit integer
    }

    return hash.toString(36);
  }

  /**
   * Summarize conversation for reflection.
   */
  private summarizeConversation(messages: Message[]): string {
    return messages
      .map((m, i) => `[${i + 1}] ${m.role}: ${m.content.slice(0, 200)}...`)
      .join('\n');
  }

  /**
   * Summarize previous phase output for context.
   */
  protected summarizePreviousOutput(output: PhaseOutput): string {
    // Subclasses can override to provide phase-specific summaries
    return JSON.stringify(output, null, 2);
  }

  /**
   * Sleep utility.
   */
  private sleep(ms: number): Promise<void> {
    return new Promise(resolve => setTimeout(resolve, ms));
  }
}

// ─────────────────────────────────────────────────────────
// CONSTANTS
// ─────────────────────────────────────────────────────────

const REFLECTION_PROMPT = `You are a metacognitive assistant analyzing an AI agent's execution.

Your job is to extract actionable learnings from the execution trace.

Focus on:
1. **What worked well** — strategies and approaches that led to success
2. **What didn't work** — errors, inefficiencies, dead ends
3. **Root causes** — why things succeeded or failed
4. **Generalizable patterns** — insights that apply beyond this specific case

Output format (JSON array):
[
  {
    "type": "procedural" | "semantic" | "episodic",
    "content": "Clear, actionable statement of the learning",
    "context": "When is this learning relevant?",
    "confidence": 0.0-1.0,
    "tags": ["optional", "tags"]
  }
]

Guidelines:
- **Procedural**: How to do something (strategies, approaches, techniques)
- **Semantic**: Facts or patterns (this codebase uses X, error Y means Z)
- **Episodic**: Specific event (PR #42 failed because of null check)

Be concise. Extract 1-3 high-value learnings, not a list of everything that happened.`;

// ─────────────────────────────────────────────────────────
// TYPE DEFINITIONS
// ─────────────────────────────────────────────────────────

interface WorkingMemory {
  messages: Message[];
  tokenCount: number;
  tokenBudget: number;
  iteration: number;
  context: {
    input: PhaseInput;
    memories: Memory[];
    relevantFiles: string[];
  };
}

interface Message {
  role: 'user' | 'assistant' | 'tool';
  content: string;
  toolCalls?: ToolCall[];
  toolCallId?: string;
}

interface Decision {
  done: boolean;
  needsHuman: boolean;
  toolCall?: ToolCall;
  result?: unknown;
  reason?: string;
  reasoning?: string;
}

interface ToolResult {
  error: boolean;
  data?: unknown;
  message?: string;
  toolName?: string;
  retryable?: boolean;
  validationErrors?: unknown;
  source?: string;
}

interface BreakerResult {
  shouldBreak: boolean;
  reason?: string;
  details?: Record<string, unknown>;
}

interface ErrorClassification {
  retryable: boolean;
  source: 'network' | 'validation' | 'permission' | 'tool' | 'unknown';
  severity: 'low' | 'medium' | 'high';
}

interface Learning {
  type: 'episodic' | 'semantic' | 'procedural';
  content: string;
  context: string;
  confidence: number;
  tags?: string[];
}

interface AgentConfig {
  maxIterations?: number;
}

type AgentType = 'planning' | 'implementation' | 'review' | 'testing' | 'deployment';

// ─────────────────────────────────────────────────────────
// ERRORS
// ─────────────────────────────────────────────────────────

class AgentError extends Error {
  constructor(message: string, public details?: Record<string, unknown>) {
    super(message);
    this.name = 'AgentError';
  }
}

class CircuitBreakerError extends Error {
  constructor(message: string, public details?: Record<string, unknown>) {
    super(message);
    this.name = 'CircuitBreakerError';
  }
}

class HumanEscalationError extends Error {
  constructor(message: string, public details?: Record<string, unknown>) {
    super(message);
    this.name = 'HumanEscalationError';
  }
}

2. AgentContext — Everything Passed Into an Agent

The AgentContext is the execution environment provided to every agent. It contains:

Event bus for logging
Memory system for recall/store
LLM client for reasoning
Safety controls (breakers, gates)
Tool registry
Configuration
Trace ID for correlation
Cost accumulator
Elapsed timer

typescript
// src/core/types.ts (AgentContext definition)

interface AgentContext {
  // Tracing
  traceId: string;                    // Groups all events in this pipeline run
  runId: string;                      // Same as traceId for now

  // Core services
  bus: EventBus;                      // Event emission and subscription
  memory: MemoryStore;                // Recall/store memories
  llm: LLMProvider;                   // LLM client for reasoning
  tools: ToolRegistry;                // Available tools

  // Safety
  safety: SafetyController;           // Circuit breakers, gates, sandbox checks

  // Resource tracking
  cost: CostAccumulator;              // Track spending
  elapsed: ElapsedTimer;              // Track time

  // Configuration
  config: ForgeConfig;                // Runtime config

  // Phase-specific context (optional)
  phaseContext?: Record<string, unknown>;
}

interface CostAccumulator {
  current: number;                    // Current spend (USD)
  budget: number;                     // Budget limit (USD)

  add(cost: number): void;            // Add to current
  reset(): void;                      // Reset to zero
}

interface ElapsedTimer {
  start: Date;                        // Start time

  elapsed(): number;                  // Milliseconds elapsed
}

How AgentContext is Constructed

typescript
// src/orchestrator/context.ts

import { ulid } from 'ulid';
import { EventBus } from '../core/bus.ts';
import { MemoryStore } from '../memory/store.ts';
import { LLMProvider } from '../tools/llm.ts';
import { ToolRegistry } from '../tools/registry.ts';
import { SafetyController } from '../safety/controller.ts';
import type { ForgeConfig } from '../core/config.ts';

/**
 * Create an AgentContext for a pipeline run.
 */
export function createAgentContext(config: ForgeConfig): AgentContext {
  const traceId = ulid();

  return {
    traceId,
    runId: traceId,

    bus: new EventBus(config.dbPath),
    memory: new MemoryStore(config.dbPath),
    llm: createLLMProvider(config.llm),
    tools: new ToolRegistry(),

    safety: new SafetyController(config.safety),

    cost: {
      current: 0,
      budget: config.safety.costPerRun,
      add(cost: number) {
        this.current += cost;
      },
      reset() {
        this.current = 0;
      }
    },

    elapsed: {
      start: new Date(),
      elapsed() {
        return Date.now() - this.start.getTime();
      }
    },

    config
  };
}

/**
 * Create an LLM provider based on config.
 */
function createLLMProvider(config: LLMConfig): LLMProvider {
  switch (config.provider) {
    case 'anthropic':
      return new AnthropicProvider(config);
    case 'openai':
      return new OpenAIProvider(config);
    case 'ollama':
      return new OllamaProvider(config);
    default:
      throw new Error(`Unknown LLM provider: ${config.provider}`);
  }
}

3. Working Memory — Conversation History

Working memory is the agent's short-term context: the conversation with the LLM so far.

Structure

typescript
interface WorkingMemory {
  messages: Message[];                // Conversation history
  tokenCount: number;                 // Current token usage
  tokenBudget: number;                // Max tokens allowed
  iteration: number;                  // Current iteration
  context: {
    input: PhaseInput;                // Original task
    memories: Memory[];               // Recalled long-term memories
    relevantFiles: string[];          // Files accessed during execution
  };
}

interface Message {
  role: 'user' | 'assistant' | 'tool';
  content: string;
  toolCalls?: ToolCall[];             // For assistant messages with tool calls
  toolCallId?: string;                // For tool messages
}

Context Window Management

When working memory approaches the token budget (90%), the agent compresses it:

Keep first message (task + context)
Summarize middle messages into a brief summary
Keep recent N messages in full (N=3)

This ensures:

Task context is never lost
Recent iterations are preserved (for reflection)
Total token count stays under budget

Token Counting

Token counting uses a heuristic:

1 token ≈ 4 characters (for estimation)
Exact counting via LLM provider's tokenizer (optional, slower)

4. Perceive Phase — Gathering Context

The perceive phase runs once at the start of execution. It gathers:

Relevant memories from long-term storage
Task description and previous phase output
Current codebase state (file contents, etc.)

Token Budget Allocation

From the research (topics/01-agentic-loops.md):

60% memories — Past learnings relevant to this task
30% conversation — Task description, previous outputs
10% environment — Codebase state (files, structure)

Memory Recall Strategy

typescript
// Recall memories relevant to this task
const memories = await ctx.memory.recall({
  context: `${this.type} phase: ${input.task}`,
  type: ['semantic', 'procedural'], // Skip episodic during perceive
  limit: estimateMemoryLimit(memoryTokenBudget),
  agentType: this.type
});

Memory recall uses:

Embedding similarity (if available) — Find semantically similar learnings
Tag matching — Find memories tagged with relevant keywords
Recency — Recent memories ranked higher
Confidence — High-confidence memories ranked higher

Environment Context

Each agent subclass can override gatherEnvironmentContext() to load phase-specific data:

Planner: Reads package.json, directory structure, existing architecture docs
Implementer: Reads target files, imports, related modules
Reviewer: Reads the diff, affected files, test files
Tester: Reads test files, coverage reports
Deployer: Reads build artifacts, deployment config

5. Reason Phase — LLM Call with Tool Use

The reason phase calls the LLM to decide what to do next.

System Prompt Construction

Each agent has a systemPrompt that defines its role and capabilities.

Example (Implementer):

typescript
const IMPLEMENTER_SYSTEM_PROMPT = `You are an expert software engineer implementing code changes.

Your goal: Complete the implementation tasks from the plan.

You have access to tools for:
- Reading files
- Writing files
- Running shell commands (linters, formatters, tests)
- Searching the codebase

Guidelines:
1. Read relevant files before modifying them
2. Follow existing code style and patterns
3. Write tests for new functionality
4. Run linters/formatters after changes
5. Self-validate by running affected tests

When you've completed all tasks, signal done with the result.

If you encounter an issue you can't resolve, explain the problem and request human input.`;

Tool Presentation

Tools are presented to the LLM as JSON schemas:

typescript
const toolSchemas = this.tools.map(tool => ({
  name: tool.name,
  description: tool.description,
  parameters: tool.schema.input // Zod schema converted to JSON schema
}));

LLM Response Handling

The LLM can respond in three ways:

Use a tool: Returns toolCalls array
- Extract first tool call
- Execute it in the Act phase
- Loop continues
Signal done: Returns done: true and result
- Validate result against expected schema
- Return output
- Loop terminates
Request human input: Returns text like "I need human input for X"
- Detect via pattern matching
- Escalate to human
- Loop terminates

Streaming Responses

For long responses, the LLM can stream tokens. The agent accumulates them:

typescript
let accumulatedContent = '';
for await (const chunk of llmStream) {
  accumulatedContent += chunk.content;

  // Emit progress event
  ctx.bus.emit({
    type: `${this.type}.reasoning_progress`,
    payload: { tokens: chunk.tokens }
  });
}

6. Act Phase — Tool Execution

The act phase executes a tool call.

Input Validation (Zod)

Before execution, the tool input is validated:

typescript
let validatedInput: unknown;
try {
  validatedInput = tool.schema.input.parse(toolCall.input);
} catch (err) {
  return {
    error: true,
    message: `Input validation failed: ${err.message}`,
    retryable: true // LLM can fix input and retry
  };
}

Sandbox Enforcement

The safety controller checks if the tool execution is allowed:

typescript
const sandboxCheck = await ctx.safety.checkToolExecution(tool, validatedInput);
if (!sandboxCheck.allowed) {
  return {
    error: true,
    message: `Sandbox violation: ${sandboxCheck.reason}`,
    retryable: false
  };
}

Sandbox checks include:

File path restrictions (no access outside project directory)
Command whitelist (only safe shell commands)
Network access limits (no arbitrary HTTP requests)
Cost limits (expensive operations require approval)

Result Capture

Tool execution returns:

typescript
interface ToolResult {
  error: boolean;
  data?: unknown;              // Success result
  message?: string;            // Error message
  toolName?: string;
  retryable?: boolean;         // Can the agent retry?
  validationErrors?: unknown;
  source?: string;             // Error source
}

Error Handling

Errors are classified:

Error Type	Retryable	Strategy
Network timeout	Yes	Retry with backoff
Validation error	No	LLM must fix input
Permission denied	No	Sandbox violation, escalate
Tool execution failure	Maybe	Retry once, then escalate
Unknown error	Yes	Retry once

Retry uses exponential backoff:

Attempt 1: immediate
Attempt 2: 1s delay
Attempt 3: 2s delay
Attempt 4: 4s delay
Max: 10s delay

Metrics Collection

Each tool execution emits an event:

typescript
ctx.bus.emit({
  type: `${this.type}.tool_executed`,
  source: this.id,
  traceId: ctx.traceId,
  payload: {
    tool: tool.name,
    success: !result.error,
    durationMs: Date.now() - startTime
  },
  durationMs: Date.now() - startTime
});

7. Learn Phase — Post-Iteration Learning

The learn phase updates working memory after each iteration.

Working Memory Update

Two messages are added:

Assistant message: The decision

typescript
{
  role: 'assistant',
  content: decision.reasoning,
  toolCalls: [decision.toolCall]
}

Tool result message: What happened

typescript
{
  role: 'tool',
  content: result.error ? `Error: ${result.message}` : JSON.stringify(result.data),
  toolCallId: decision.toolCall.id
}

This creates a conversation history:

User: [task]
Assistant: I'll read the file X [toolCall: read_file]
Tool: [file contents]
Assistant: I'll modify line 10 [toolCall: write_file]
Tool: Success
Assistant: Done [done: true, result: {...}]

Event Emission

The learn phase emits an iteration event:

typescript
ctx.bus.emit({
  type: `${this.type}.iteration`,
  source: this.id,
  traceId: ctx.traceId,
  payload: { iteration, stagnationCount }
});

Error Reflection

If the tool execution resulted in an error, trigger immediate reflection:

typescript
if (toolResult.error) {
  await this.reflect(ctx, workingMemory, 'error', iteration, toolResult.error);
}

8. Reflection — Post-Execution Learning

Reflection extracts durable learnings from the execution.

When Reflection Runs

Trigger	When
Success	After agent completes successfully
Error	After tool error (immediate)
Escalation	After human escalation
Breaker trip	After circuit breaker fires

When Reflection is Skipped

Operations costing < $0.10 (trivial)
Reflection budget exhausted (> $0.50 spent on reflection)
Low-complexity phases (deployment with no issues)

The Reflection Prompt

You are a metacognitive assistant analyzing an AI agent's execution.

Extract actionable learnings from the execution trace.

Focus on:
1. What worked well
2. What didn't work
3. Root causes
4. Generalizable patterns

Output format (JSON array):
[
  {
    "type": "procedural" | "semantic" | "episodic",
    "content": "Clear, actionable statement",
    "context": "When is this relevant?",
    "confidence": 0.0-1.0,
    "tags": ["optional", "tags"]
  }
]

Learning Storage

Each extracted learning is stored in the memory system:

typescript
await ctx.memory.store({
  type: learning.type,
  content: learning.content,
  context: learning.context,
  confidence: learning.confidence,
  source: `${this.type}.reflection`,
  tags: [this.type, outcome, ...learning.tags]
});

Reflection Cost Budget

Reflection can cost money (LLM call). The agent has a budget:

Per-reflection limit: $0.10
Total reflection budget: $0.50 per phase

If the budget is exceeded, reflection is skipped.

9. Loop Termination Conditions

The loop can end in 5 ways:

1. Agent Signals Done

The LLM returns done: true with a result.

typescript
if (decision.done) {
  const output = await this.finalize(decision, ctx, workingMemory);
  await this.reflect(ctx, workingMemory, 'success', iteration);
  return output;
}

2. Circuit Breaker Trips

A safety limit is exceeded (iterations, cost, time, error rate).

typescript
const breakerResult = await this.checkCircuitBreakers(ctx, iteration, startTime);
if (breakerResult.shouldBreak) {
  return this.handleBreakerTrip(breakerResult, ctx, workingMemory);
}

Throws CircuitBreakerError with details.

3. Max Iterations Reached

The iteration count exceeds the configured maximum.

typescript
if (iteration >= this.maxIterations) {
  return {
    shouldBreak: true,
    reason: 'MAX_ITERATIONS_EXCEEDED',
    details: { iteration, max: this.maxIterations }
  };
}

4. Human Escalation Requested

The agent (LLM) requests human input.

typescript
if (decision.needsHuman) {
  return this.escalateToHuman(ctx, workingMemory, decision.reason, iteration);
}

Throws HumanEscalationError with context.

5. Stagnation Detected

The agent is stuck (same state across N iterations).

typescript
const progressHash = this.hashWorkingMemory(workingMemory);
if (progressHash === lastProgressHash) {
  stagnationCount++;
  if (stagnationCount >= this.stagnationThreshold) {
    return this.handleStagnation(ctx, workingMemory, iteration);
  }
}

Escalates to human with "stagnation detected" message.

10. Stagnation Detection

Stagnation = the agent is doing the same thing repeatedly with no progress.

Detection Strategy

Hash the last N messages (N=3) in working memory:

typescript
private hashWorkingMemory(memory: WorkingMemory): string {
  const recent = memory.messages.slice(-3);
  const content = recent.map(m => m.content).join('|');

  let hash = 0;
  for (let i = 0; i < content.length; i++) {
    hash = ((hash << 5) - hash) + content.charCodeAt(i);
    hash = hash & hash;
  }

  return hash.toString(36);
}

If the hash is identical across 3+ iterations → stagnation.

What Happens on Stagnation

Emit event:

typescript
ctx.bus.emit({
  type: `${this.type}.stagnation_detected`,
  source: this.id,
  traceId: ctx.traceId,
  payload: { iterations }
});

Reflect on stagnation:

typescript
await this.reflect(
  ctx,
  workingMemory,
  'error',
  iterations,
  new Error('Agent appears stuck')
);

Escalate to human:

typescript
return this.escalateToHuman(
  ctx,
  workingMemory,
  'Agent detected stagnation: same state across multiple iterations.',
  iterations
);

Preventing Stagnation

Agents can avoid stagnation by:

Trying different approaches (not repeating the same tool call)
Requesting human input when uncertain
Simplifying the approach when stuck
Using reflection to identify dead ends

11. Error Handling in the Loop

Errors can occur at multiple points:

Tool Errors

A tool execution fails (network, validation, permission, etc.).

Handling:

Classify error (retryable vs non-retryable)
If retryable: retry with backoff (max 3 attempts)
If non-retryable: return error to LLM in tool result message
LLM can:
- Try a different approach
- Request human help
- Signal failure

LLM Errors

The LLM provider fails (timeout, rate limit, API error).

Handling:

Retry once with exponential backoff
If retry fails: trip circuit breaker
Emit error event
Escalate to human

Validation Errors

Tool input or output doesn't match schema.

Handling:

Input validation failure:
- Return error to LLM with validation details
- LLM can fix input and retry
- Non-retryable (no point retrying with same input)
Output validation failure:
- Tool implementation bug
- Log error
- Trip circuit breaker
- Escalate to human

Retry Strategy Summary

Error Type	Max Retries	Backoff	Escalate On Failure
Network timeout	3	Exponential (1s, 2s, 4s)	Yes
LLM API error	1	5s	Yes
Tool execution error	3	Exponential	Yes
Input validation	0	N/A	No (LLM fixes)
Output validation	0	N/A	Yes (bug)
Permission denied	0	N/A	Yes (security)

12. Testing the Agent Loop

Unit Testing Strategy

Mock dependencies:

LLMProvider: Return scripted responses
Tool: Return controlled results
EventBus: Capture emitted events
Memory: Return pre-populated memories
SafetyController: Return controlled breaker states

Test Scenarios

1. Happy Path

typescript
test('agent completes successfully', async () => {
  const mockLLM = createMockLLM([
    { toolCall: { name: 'read_file', input: { path: 'test.ts' } } },
    { toolCall: { name: 'write_file', input: { path: 'test.ts', content: '...' } } },
    { done: true, result: { success: true } }
  ]);

  const agent = new TestAgent({ llm: mockLLM });
  const result = await agent.execute(testInput, testContext);

  expect(result.success).toBe(true);
  expect(mockLLM.callCount).toBe(3);
});

2. Tool Error with Retry

typescript
test('retries on transient tool error', async () => {
  const mockTool = createMockTool([
    { error: true, message: 'Network timeout', retryable: true },
    { error: true, message: 'Network timeout', retryable: true },
    { error: false, data: { success: true } }
  ]);

  const agent = new TestAgent({ tools: [mockTool] });
  const result = await agent.execute(testInput, testContext);

  expect(result.success).toBe(true);
  expect(mockTool.executeCount).toBe(3);
});

3. Circuit Breaker Trip

typescript
test('trips breaker on max iterations', async () => {
  const mockLLM = createMockLLM(
    Array(100).fill({ toolCall: { name: 'noop' } }) // Infinite loop
  );

  const agent = new TestAgent({ llm: mockLLM, maxIterations: 10 });

  await expect(agent.execute(testInput, testContext))
    .rejects.toThrow(CircuitBreakerError);

  expect(mockLLM.callCount).toBe(10);
});

4. Stagnation Detection

typescript
test('detects stagnation and escalates', async () => {
  const mockLLM = createMockLLM([
    { toolCall: { name: 'read_file', input: { path: 'test.ts' } } },
    { toolCall: { name: 'read_file', input: { path: 'test.ts' } } }, // Repeat
    { toolCall: { name: 'read_file', input: { path: 'test.ts' } } }, // Repeat
    { toolCall: { name: 'read_file', input: { path: 'test.ts' } } }  // Repeat
  ]);

  const agent = new TestAgent({ llm: mockLLM });

  await expect(agent.execute(testInput, testContext))
    .rejects.toThrow(HumanEscalationError);

  const events = testContext.bus.getEvents();
  expect(events).toContainEqual(
    expect.objectContaining({ type: 'test.stagnation_detected' })
  );
});

5. Human Escalation

typescript
test('escalates when LLM requests human input', async () => {
  const mockLLM = createMockLLM([
    { toolCall: { name: 'analyze_complexity' } },
    {
      content: 'This task requires human decision on architecture approach.',
      needsHuman: true
    }
  ]);

  const agent = new TestAgent({ llm: mockLLM });

  await expect(agent.execute(testInput, testContext))
    .rejects.toThrow(HumanEscalationError);

  const events = testContext.bus.getEvents();
  expect(events).toContainEqual(
    expect.objectContaining({ type: 'test.escalated' })
  );
});

6. Reflection on Success

typescript
test('reflects on successful completion', async () => {
  const mockLLM = createMockLLM([
    { toolCall: { name: 'implement_feature' } },
    { done: true, result: { success: true } }
  ]);

  const agent = new TestAgent({ llm: mockLLM });
  const result = await agent.execute(testInput, testContext);

  expect(result.success).toBe(true);

  // Check that reflection happened
  const reflectionCalls = mockLLM.calls.filter(
    call => call.system.includes('metacognitive')
  );
  expect(reflectionCalls.length).toBe(1);

  // Check that learnings were stored
  const memories = await testContext.memory.getRecent();
  expect(memories.length).toBeGreaterThan(0);
  expect(memories[0].source).toBe('test.reflection');
});

7. Reflection on Error

typescript
test('reflects on error', async () => {
  const mockLLM = createMockLLM([
    { toolCall: { name: 'dangerous_operation' } }
  ]);

  const mockTool = createMockTool([
    { error: true, message: 'Permission denied', retryable: false }
  ]);

  const agent = new TestAgent({ llm: mockLLM, tools: [mockTool] });

  await agent.execute(testInput, testContext);

  // Check that error reflection happened
  const reflectionCalls = mockLLM.calls.filter(
    call => call.messages[0].content.includes('Error:')
  );
  expect(reflectionCalls.length).toBe(1);

  // Check that error learning was stored
  const memories = await testContext.memory.getRecent();
  const errorLearnings = memories.filter(m => m.tags.includes('error'));
  expect(errorLearnings.length).toBeGreaterThan(0);
});

8. Cost Budget Enforcement

typescript
test('stops when cost budget exceeded', async () => {
  const expensiveLLM = createMockLLM(
    Array(50).fill({ toolCall: { name: 'noop' } }),
    { costPerCall: 2.00 } // Expensive model
  );

  const agent = new TestAgent({ llm: expensiveLLM });
  const context = createTestContext({ costBudget: 5.00 });

  await expect(agent.execute(testInput, context))
    .rejects.toThrow(CircuitBreakerError);

  expect(context.cost.current).toBeGreaterThanOrEqual(5.00);
});

9. Context Window Management

typescript
test('compresses working memory when approaching token limit', async () => {
  const mockLLM = createMockLLM(
    Array(20).fill({ toolCall: { name: 'generate_large_output' } })
  );

  const agent = new TestAgent({ llm: mockLLM });
  const context = createTestContext({ tokenBudget: 10000 });

  await agent.execute(testInput, context);

  // Check that compression happened
  const compressionEvents = context.bus.getEvents().filter(
    e => e.type === 'test.memory_compressed'
  );
  expect(compressionEvents.length).toBeGreaterThan(0);
});

Integration Testing

Test the full loop with real LLM and tools (in a sandbox):

typescript
test('integration: implement simple feature', async () => {
  const agent = new ImplementerAgent();
  const context = createRealContext();

  const input: PhaseInput = {
    task: 'Add a function `add(a, b)` to math.ts that returns a + b'
  };

  const result = await agent.execute(input, context);

  expect(result.success).toBe(true);

  // Verify file was written
  const fileContent = await fs.readFile('math.ts', 'utf-8');
  expect(fileContent).toContain('function add(a: number, b: number)');
  expect(fileContent).toContain('return a + b');
});

Implementation Checklist

Week 3: Base Agent Foundation

Define core types (AgentContext, WorkingMemory, Decision, etc.)
Implement BaseAgent class skeleton
Implement perceive() phase
Implement reason() phase (LLM call)
Implement act() phase (tool execution)
Implement learn() phase (memory update)
Add error classification logic
Add retry with backoff

Week 4: Safety and Termination

Implement circuit breaker checks (iteration, cost, time, error rate)
Implement stagnation detection
Implement human escalation
Add handleBreakerTrip()
Add handleStagnation()
Add escalateToHuman()

Week 5: Reflection and Memory

Implement post-execution reflection
Define REFLECTION_PROMPT
Add learning extraction logic
Add learning storage integration
Implement reflection cost budget enforcement
Skip reflection for trivial operations

Week 6: Context Management

Implement working memory compression
Add token estimation
Implement token budget allocation (60/30/10)
Add environment context gathering (per-agent)
Implement memory recall with prioritization

Week 7: Testing

Write unit tests for all phases
Test happy path
Test error handling and retry
Test circuit breakers
Test stagnation detection
Test human escalation
Test reflection
Write integration tests with real LLM

Week 8: Polish

Add detailed logging
Optimize token usage
Add metrics dashboard
Tune default parameters (max iterations, budgets)
Write developer documentation
Conduct load testing

Configuration Reference

typescript
// src/core/config.ts (agent loop configuration)

interface AgentLoopConfig {
  // Iteration limits per agent type
  maxIterations: {
    planning: 20;
    implementation: 50;
    review: 10;
    testing: 5;
    deployment: 3;
  };

  // Stagnation detection
  stagnation: {
    threshold: 3;              // Iterations with no progress
    hashDepth: 3;              // Messages to hash
  };

  // Reflection
  reflection: {
    costBudget: 0.50;          // USD per phase
    skipThreshold: 0.10;       // Skip if operation cost < $0.10
    enabled: true;
  };

  // Context management
  context: {
    tokenBudgetRatio: {
      memories: 0.6;
      conversation: 0.3;
      environment: 0.1;
    };
    compressionThreshold: 0.9; // Compress at 90% of budget
    keepRecentMessages: 3;
  };

  // Retry
  retry: {
    maxAttempts: 3;
    baseDelay: 1000;           // ms
    maxDelay: 10000;           // ms
    backoffMultiplier: 2;
  };
}

Appendix: Complete Type Definitions

typescript
// src/core/types.ts (complete agent types)

export interface Agent {
  id: string;
  type: AgentType;
  execute(input: PhaseInput, ctx: AgentContext): Promise<PhaseOutput>;
}

export interface PhaseInput {
  task: string;
  previousPhaseOutput?: PhaseOutput;
  context?: Record<string, unknown>;
}

export interface PhaseOutput {
  success: boolean;
  data?: unknown;
  error?: string;
  metadata?: Record<string, unknown>;
}

export interface ToolCall {
  id: string;
  name: string;
  input: unknown;
}

export interface Tool<TInput = unknown, TOutput = unknown> {
  name: string;
  description: string;
  schema: {
    input: z.ZodSchema<TInput>;
    output: z.ZodSchema<TOutput>;
  };
  execute(input: TInput, ctx: ToolContext): Promise<TOutput>;
}

export interface ToolContext {
  bus: EventBus;
  traceId: string;
  agentId: string;
}

export interface Memory {
  id: string;
  type: 'episodic' | 'semantic' | 'procedural';
  content: string;
  context: string;
  confidence: number;
  source?: string;
  tags?: string[];
  createdAt: Date;
  lastAccessed: Date;
  accessCount: number;
}

export type AgentType = 'planning' | 'implementation' | 'review' | 'testing' | 'deployment';

This implementation plan provides the complete specification for the agent loop, the core execution engine of Forge. All specialized agents (Planner, Implementer, Reviewer, Tester, Deployer) extend BaseAgent and inherit this perceive → reason → act → learn cycle.