security

February 8, 2026

Safety Controls and Circuit Breakers Specification

Executive Summary

This document specifies safety controls, circuit breakers, and governance mechanisms for agentic SDLC orchestration systems. Based on research across agentic loops, feedback mechanisms, code review, testing, CI/CD, and end-to-end orchestration, these controls prevent runaway processes, ensure human oversight at critical junctures, and maintain cost/quality guardrails.

1. Circuit Breaker Patterns

1.1 Iteration Limiters

Prevent infinite loops and runaway agent execution.

typescript
interface IterationCircuitBreaker {
  // Configuration
  maxIterations: {
    default: 10;
    planning: 20;        // Architecture can be complex
    implementation: 50;  // Code generation with retries
    testing: 5;          // Fix/retry cycles
    deployment: 3;       // Deployment attempts
  };
  
  // Per-loop tracking
  loopState: Map<LoopId, {
    iterationCount: number;
    startTime: Date;
    lastProgress: Date;
    stagnationCount: number;
  }>;
  
  // Breaker logic
  async checkAndBreak(loopId: LoopId): Promise<BreakDecision> {
    const state = this.loopState.get(loopId);
    
    // Hard limit check
    if (state.iterationCount >= this.maxIterations.default) {
      return {
        shouldBreak: true,
        reason: 'MAX_ITERATIONS_EXCEEDED',
        action: 'ESCALATE_TO_HUMAN'
      };
    }
    
    // Stagnation detection
    const timeSinceProgress = Date.now() - state.lastProgress.getTime();
    if (timeSinceProgress > 300000) { // 5 minutes
      state.stagnationCount++;
      if (state.stagnationCount >= 3) {
        return {
          shouldBreak: true,
          reason: 'STAGNATION_DETECTED',
          action: 'ESCALATE_TO_HUMAN'
        };
      }
    }
    
    return { shouldBreak: false };
  }
}

1.2 Error Rate Circuit Breaker

Halt operations when error frequency exceeds thresholds.

typescript
interface ErrorRateBreaker {
  // Sliding window configuration
  windowSize: 300000; // 5 minutes
  thresholds: {
    warning: 0.10;     // 10% error rate
    critical: 0.25;    // 25% error rate
    catastrophic: 0.50; // 50% error rate
  };
  
  // State tracking
  errorWindow: CircularBuffer<ErrorEvent>;
  
  async evaluate(): Promise<BreakerState> {
    const now = Date.now();
    const windowStart = now - this.windowSize;
    
    // Clean old events
    this.errorWindow.removeOlderThan(windowStart);
    
    const errorCount = this.errorWindow.filter(e => e.severity === 'error').length;
    const totalEvents = this.errorWindow.length;
    const errorRate = totalEvents > 0 ? errorCount / totalEvents : 0;
    
    if (errorRate >= this.thresholds.catastrophic) {
      return {
        state: 'OPEN',
        action: 'HALT_ALL_OPERATIONS',
        reason: `Error rate ${errorRate.toFixed(2)} exceeds catastrophic threshold`,
        recoveryTime: 600000 // 10 min cooldown
      };
    }
    
    if (errorRate >= this.thresholds.critical) {
      return {
        state: 'HALF_OPEN',
        action: 'REDUCE_CONCURRENCY',
        reason: `Error rate ${errorRate.toFixed(2)} exceeds critical threshold`,
        nextEvaluation: now + 60000
      };
    }
    
    if (errorRate >= this.thresholds.warning) {
      return {
        state: 'WARNING',
        action: 'INCREASE_MONITORING',
        reason: `Error rate ${errorRate.toFixed(2)} exceeds warning threshold`
      };
    }
    
    return { state: 'CLOSED' };
  }
}

1.3 Cost Accumulation Breaker

Prevent runaway spending on API calls and compute.

typescript
interface CostCircuitBreaker {
  // Budget tiers
  budgets: {
    perTask: {
      planning: 5.00;      // USD
      implementation: 10.00;
      testing: 3.00;
      deployment: 2.00;
    };
    perSession: 50.00;
    perDay: 200.00;
    perMonth: 2000.00;
  };
  
  // Tracking
  currentSpend: {
    task: number;
    session: number;
    day: number;
    month: number;
  };
  
  async checkBudget(
    operation: Operation,
    estimatedCost: number
  ): Promise<CostDecision> {
    const projected = {
      task: this.currentSpend.task + estimatedCost,
      session: this.currentSpend.session + estimatedCost,
      day: this.currentSpend.day + estimatedCost,
      month: this.currentSpend.month + estimatedCost
    };
    
    // Check limits
    if (projected.task > this.budgets.perTask[operation.type]) {
      return {
        allowed: false,
        reason: 'TASK_BUDGET_EXCEEDED',
        suggestion: 'SIMPLIFY_APPROACH_OR_ESCALATE'
      };
    }
    
    if (projected.day > this.budgets.perDay) {
      return {
        allowed: false,
        reason: 'DAILY_BUDGET_EXCEEDED',
        suggestion: 'PAUSE_OPERATIONS_UNTIL_TOMORROW'
      };
    }
    
    if (projected.month > this.budgets.perMonth) {
      return {
        allowed: false,
        reason: 'MONTHLY_BUDGET_EXCEEDED',
        action: 'REQUIRE_ADMIN_APPROVAL'
      };
    }
    
    // Warning at 80%
    if (projected.task > this.budgets.perTask[operation.type] * 0.8) {
      return {
        allowed: true,
        warning: 'APPROACHING_TASK_BUDGET_LIMIT',
        remaining: this.budgets.perTask[operation.type] - projected.task
      };
    }
    
    return { allowed: true };
  }
}

1.4 Time-Based Circuit Breaker

Enforce maximum execution times per phase.

typescript
interface TimeCircuitBreaker {
  timeouts: {
    planning: 1800000;      // 30 minutes
    implementation: 3600000; // 1 hour
    testing: 1200000;        // 20 minutes
    deployment: 900000;      // 15 minutes
    review: 1800000;         // 30 minutes
    totalPipeline: 7200000;  // 2 hours
  };
  
  startTimes: Map<OperationId, Date>;
  
  async checkTimeout(operationId: OperationId): Promise<TimeoutDecision> {
    const startTime = this.startTimes.get(operationId);
    const elapsed = Date.now() - startTime.getTime();
    const operation = this.getOperationType(operationId);
    const limit = this.timeouts[operation];
    
    const remaining = limit - elapsed;
    const percentUsed = elapsed / limit;
    
    if (elapsed > limit) {
      return {
        shouldBreak: true,
        reason: 'TIMEOUT_EXCEEDED',
        action: 'CANCEL_AND_ESCALATE',
        elapsed,
        limit
      };
    }
    
    if (percentUsed > 0.9) {
      return {
        shouldBreak: false,
        warning: 'APPROACHING_TIMEOUT',
        remaining,
        suggestion: 'WRAP_UP_OR_EXTEND'
      };
    }
    
    return { shouldBreak: false, remaining };
  }
}

1.5 Resource Exhaustion Breaker

Prevent system overload from agent operations.

typescript
interface ResourceCircuitBreaker {
  thresholds: {
    cpu: { warning: 70, critical: 85, max: 95 };
    memory: { warning: 75, critical: 85, max: 95 };
    disk: { warning: 80, critical: 90, max: 95 };
    connections: { warning: 100, critical: 150, max: 200 };
  };
  
  async checkResources(): Promise<ResourceDecision> {
    const metrics = await this.collectMetrics();
    
    const violations = [];
    
    if (metrics.cpu > this.thresholds.cpu.max) {
      violations.push({ resource: 'cpu', level: 'critical', value: metrics.cpu });
    }
    if (metrics.memory > this.thresholds.memory.max) {
      violations.push({ resource: 'memory', level: 'critical', value: metrics.memory });
    }
    if (metrics.disk > this.thresholds.disk.max) {
      violations.push({ resource: 'disk', level: 'critical', value: metrics.disk });
    }
    
    if (violations.length > 0) {
      return {
        shouldThrottle: true,
        severity: 'CRITICAL',
        violations,
        action: 'PAUSE_NEW_OPERATIONS',
        retryAfter: 30000
      };
    }
    
    // Check for multiple warning-level violations
    const warnings = [];
    if (metrics.cpu > this.thresholds.cpu.critical) warnings.push('cpu');
    if (metrics.memory > this.thresholds.memory.critical) warnings.push('memory');
    if (metrics.disk > this.thresholds.disk.critical) warnings.push('disk');
    
    if (warnings.length >= 2) {
      return {
        shouldThrottle: true,
        severity: 'ELEVATED',
        warnings,
        action: 'REDUCE_CONCURRENCY'
      };
    }
    
    return { shouldThrottle: false };
  }
}

2. Human-in-the-Loop Integration Points

2.1 Mandatory Human Gateways

Human approval is required before proceeding at these points:

typescript
interface HumanGateway {
  // Critical decision points requiring human approval
  mandatoryGates: [
    {
      id: 'ARCHITECTURE_APPROVAL';
      trigger: 'planning_complete';
      condition: (design) => design.complexity > COMPLEXITY_THRESHOLD 
                         || design.securityImpact === 'high';
      timeout: 86400000; // 24 hours
      escalation: 'ARCHITECT_REVIEW';
    },
    {
      id: 'PRODUCTION_DEPLOY';
      trigger: 'pre_deploy';
      condition: (deployment) => deployment.environment === 'production';
      timeout: 3600000; // 1 hour
      escalation: 'DEPLOYMENT_LEAD';
    },
    {
      id: 'SECURITY_EXCEPTION';
      trigger: 'security_scan_failure';
      condition: (scan) => scan.severity === 'critical';
      timeout: 43200000; // 12 hours
      escalation: 'SECURITY_TEAM';
    },
    {
      id: 'COST_THRESHOLD';
      trigger: 'budget_warning';
      condition: (cost) => cost.projected > cost.allocated * 1.5;
      timeout: 7200000; // 2 hours
      escalation: 'BUDGET_OWNER';
    },
    {
      id: 'BREAKING_CHANGE';
      trigger: 'api_analysis';
      condition: (api) => api.breakingChanges.length > 0;
      timeout: 86400000; // 24 hours
      escalation: 'PRODUCT_OWNER';
    },
    {
      id: 'DATA_MIGRATION';
      trigger: 'schema_change';
      condition: (schema) => schema.requiresMigration;
      timeout: 172800000; // 48 hours
      escalation: 'DBA_TEAM';
    }
  ];
  
  async requestApproval(gate: Gate, context: Context): Promise<ApprovalResult> {
    const request = {
      id: generateId(),
      gateId: gate.id,
      timestamp: new Date(),
      context: this.summarizeContext(context),
      details: this.generateDetails(context),
      timeout: gate.timeout,
      escalation: gate.escalation
    };
    
    // Send notification
    await this.notify(request);
    
    // Wait for response (with timeout)
    const response = await this.waitForResponse(request.id, gate.timeout);
    
    if (response.timedOut) {
      await this.escalate(request);
      return { approved: false, reason: 'TIMEOUT' };
    }
    
    return {
      approved: response.decision === 'approve',
      approver: response.userId,
      timestamp: response.timestamp,
      conditions: response.conditions,
      reason: response.reason
    };
  }
}

2.2 Risk-Based Human Triggers

Human review triggered by risk assessment:

typescript
interface RiskBasedHumanTrigger {
  // Risk scoring dimensions
  riskFactors: {
    codeComplexity: {
      cyclomatic: { high: 15, critical: 25 };
      cognitive: { high: 20, critical: 30 };
      linesChanged: { high: 500, critical: 1000 };
    };
    businessImpact: {
      revenue: { high: 10000, critical: 50000 }; // Daily $ impact
      users: { high: 1000, critical: 10000 };
      compliance: ['gdpr', 'hipaa', 'sox', 'pci'];
    };
    security: {
      secretsDetected: 1; // Any = trigger
      vulnerabilities: { high: 1, critical: 1 };
      newDependencies: { high: 5, critical: 10 };
    };
    operational: {
      newInfrastructure: true;
      databaseChanges: true;
      externalAPIs: { high: 3, critical: 5 };
    };
  };
  
  calculateRiskScore(change: Change): RiskScore {
    const scores = {
      complexity: this.scoreComplexity(change),
      impact: this.scoreBusinessImpact(change),
      security: this.scoreSecurity(change),
      operational: this.scoreOperational(change)
    };
    
    const total = weightedSum(scores, {
      complexity: 0.25,
      impact: 0.35,
      security: 0.25,
      operational: 0.15
    });
    
    return {
      total,
      breakdown: scores,
      level: this.classifyRisk(total),
      requiresHuman: total > RISK_THRESHOLD
    };
  }
  
  classifyRisk(score: number): RiskLevel {
    if (score >= 80) return 'CRITICAL';
    if (score >= 60) return 'HIGH';
    if (score >= 40) return 'MEDIUM';
    return 'LOW';
  }
}

2.3 Human Override Mechanisms

Allow humans to intervene during agent execution:

typescript
interface HumanOverride {
  // Real-time intervention capabilities
  overrideTypes: {
    PAUSE: {
      description: 'Pause current operation';
      effect: 'Operations queued, agent awaits input';
      resumeOptions: ['continue', 'modify', 'cancel'];
    };
    MODIFY: {
      description: 'Change parameters mid-execution';
      effect: 'Apply new parameters, continue';
      allowedMods: ['budget', 'timeout', 'scope', 'approach'];
    };
    CANCEL: {
      description: 'Stop current operation';
      effect: 'Graceful shutdown with cleanup';
      requiresReason: true;
    };
    ESCALATE: {
      description: 'Escalate to senior human';
      effect: 'Transfer to designated expert';
      autoEscalateAfter: 1800000; // 30 min
    };
    APPROVE_CONDITIONAL: {
      description: 'Approve with conditions';
      effect: 'Continue with constraints applied';
      conditions: ['max_cost', 'max_time', 'additional_tests'];
    };
  };
  
  // Override channels
  channels: [
    'in_app_notification',
    'slack_alert',
    'email_urgent',
    'pager_duty_critical'
  ];
  
  async registerOverrideHandler(operationId: string): Promise<void> {
    // Set up real-time listeners for human commands
    this.commandBus.subscribe(`override:${operationId}`, async (command) => {
      switch (command.type) {
        case 'PAUSE':
          await this.pauseOperation(operationId);
          break;
        case 'MODIFY':
          await this.modifyOperation(operationId, command.params);
          break;
        case 'CANCEL':
          await this.cancelOperation(operationId, command.reason);
          break;
        case 'ESCALATE':
          await this.escalateOperation(operationId, command.to);
          break;
      }
    });
  }
}

2.4 Feedback Collection Integration

Capture human feedback for learning:

typescript
interface HumanFeedbackCollector {
  // Feedback touchpoints
  touchpoints: [
    {
      id: 'REVIEW_FEEDBACK';
      trigger: 'human_completes_review';
      collect: ['accuracy', 'helpfulness', 'false_positives', 'missed_issues'];
    },
    {
      id: 'DEPLOY_FEEDBACK';
      trigger: 'post_deployment';
      collect: ['stability', 'performance', 'rollbacks_required'];
    },
    {
      id: 'INTERRUPTION_FEEDBACK';
      trigger: 'human_intervenes';
      collect: ['reason', 'agent_behavior', 'suggestion'];
    }
  ];
  
  async collectFeedback(touchpoint: Touchpoint, context: Context): Promise<Feedback> {
    const form = this.generateFeedbackForm(touchpoint, context);
    
    // Don't block - collect asynchronously
    this.sendFeedbackRequest(form).catch(err => {
      this.logger.warn('Feedback collection failed', err);
    });
    
    // Also capture implicit feedback
    return {
      explicit: null, // Filled later if user responds
      implicit: this.inferImplicitFeedback(context),
      timestamp: new Date()
    };
  }
  
  inferImplicitFeedback(context: Context): ImplicitFeedback {
    return {
      timeToApprove: context.approvalDuration,
      modificationsMade: context.humanEdits.length,
      approvalRate: this.calculateApprovalRate(context.agent),
      repeatInteractions: this.countRepeatInteractions(context.user)
    };
  }
}

3. Auto-Rollback Triggers

3.1 Deployment Health Triggers

typescript
interface DeploymentRollbackTriggers {
  // Metrics to monitor post-deployment
  healthChecks: {
    errorRate: {
      baseline: number;      // Historical baseline
      threshold: 1.5;        // 1.5x baseline = warning
      critical: 2.0;         // 2x baseline = rollback
      window: 300000;        // 5 minute evaluation window
    };
    latency: {
      p50: { threshold: 1.5, critical: 2.0 };
      p95: { threshold: 1.5, critical: 2.0 };
      p99: { threshold: 2.0, critical: 3.0 };
    };
    throughput: {
      threshold: 0.8;        // 80% of baseline = warning
      critical: 0.6;         // 60% of baseline = rollback
    };
    saturation: {
      cpu: { warning: 80, critical: 90 };
      memory: { warning: 85, critical: 95 };
    };
  };
  
  // Composite health score
  async evaluateHealth(deployment: Deployment): Promise<HealthDecision> {
    const metrics = await this.collectMetrics(deployment);
    const baseline = await this.getBaseline(deployment);
    
    const checks = {
      errorRate: this.checkErrorRate(metrics, baseline),
      latency: this.checkLatency(metrics, baseline),
      throughput: this.checkThroughput(metrics, baseline),
      saturation: this.checkSaturation(metrics)
    };
    
    // Any critical = immediate rollback
    const criticalFailures = Object.entries(checks)
      .filter(([_, result]) => result.level === 'CRITICAL');
    
    if (criticalFailures.length > 0) {
      return {
        action: 'ROLLBACK_IMMEDIATE',
        reason: `Critical failures in: ${criticalFailures.map(f => f[0]).join(', ')}`,
        checks
      };
    }
    
    // Multiple warnings = consider rollback
    const warnings = Object.entries(checks)
      .filter(([_, result]) => result.level === 'WARNING');
    
    if (warnings.length >= 2) {
      return {
        action: 'ESCALATE_FOR_DECISION',
        reason: `Multiple warnings: ${warnings.map(w => w[0]).join(', ')}`,
        checks,
        timeout: 300000 // 5 minutes for human decision
      };
    }
    
    return { action: 'CONTINUE_MONITORING', checks };
  }
}

3.2 Business Metric Triggers

typescript
interface BusinessMetricTriggers {
  // Business-level rollback conditions
  businessMetrics: {
    conversionRate: {
      baseline: number;
      threshold: 0.9;  // 10% drop
      critical: 0.8;   // 20% drop = rollback
      minSample: 1000; // Minimum events before evaluating
    };
    revenuePerMinute: {
      threshold: 0.85;
      critical: 0.7;
      window: 600000;  // 10 minutes
    };
    userErrors: {
      supportTickets: { threshold: 1.5, critical: 2.0 };
      errorReports: { threshold: 2.0, critical: 3.0 };
    };
    engagement: {
      sessionDuration: { threshold: 0.8, critical: 0.7 };
      bounceRate: { threshold: 1.2, critical: 1.5 };
    };
  };
  
  async evaluateBusinessImpact(deployment: Deployment): Promise<BusinessDecision> {
    // Only evaluate if we have sufficient data
    const events = await this.getEventCount(deployment);
    if (events < this.minSample) {
      return { action: 'INSUFFICIENT_DATA', waitFor: this.minSample - events };
    }
    
    const metrics = await this.collectBusinessMetrics(deployment);
    const impacts = this.calculateImpacts(metrics);
    
    // Revenue protection - critical threshold
    if (impacts.revenue.drop > 0.3) { // 30% revenue drop
      return {
        action: 'ROLLBACK_IMMEDIATE',
        priority: 'P0',
        reason: `Revenue drop of ${(impacts.revenue.drop * 100).toFixed(1)}% detected`,
        estimatedLoss: impacts.revenue.estimatedLoss
      };
    }
    
    // Compound impact check
    const impactedMetrics = Object.entries(impacts)
      .filter(([_, v]) => v.severity === 'critical')
      .length;
    
    if (impactedMetrics >= 2) {
      return {
        action: 'ROLLBACK_WITH_APPROVAL',
        reason: `Multiple critical business metrics affected`,
        impacts,
        approvalTimeout: 600000 // 10 minutes
      };
    }
    
    return { action: 'CONTINUE', impacts };
  }
}

3.3 Synthetic Test Triggers

typescript
interface SyntheticTestTriggers {
  // Continuous synthetic testing
  syntheticTests: [
    {
      name: 'critical_user_flow';
      frequency: 60000; // Every minute
      assertions: ['login', 'checkout', 'search'];
      failureThreshold: 2; // Consecutive failures
    },
    {
      name: 'api_health';
      frequency: 30000; // Every 30 seconds
      endpoints: ['/health', '/api/v1/status'];
      failureThreshold: 3;
    },
    {
      name: 'database_connectivity';
      frequency: 60000;
      failureThreshold: 1; // Immediate rollback
    }
  ];
  
  async runSyntheticTests(deployment: Deployment): Promise<TestDecision> {
    const results = await Promise.all(
      this.syntheticTests.map(async test => {
        const result = await this.executeTest(test, deployment);
        return { test: test.name, ...result };
      })
    );
    
    const failures = results.filter(r => !r.passed);
    
    // Critical path failure = immediate rollback
    const criticalFailures = failures.filter(f => 
      f.test === 'critical_user_flow' || f.test === 'database_connectivity'
    );
    
    if (criticalFailures.length > 0) {
      return {
        action: 'ROLLBACK_IMMEDIATE',
        reason: `Critical synthetic tests failing: ${criticalFailures.map(f => f.test).join(', ')}`,
        results
      };
    }
    
    // Multiple API failures
    const apiFailures = failures.filter(f => f.test === 'api_health');
    if (apiFailures.length >= 2) {
      return {
        action: 'ROLLBACK_WITH_GRACE_PERIOD',
        reason: 'Multiple API health checks failing',
        gracePeriod: 180000, // 3 minutes to recover
        results
      };
    }
    
    return { action: 'HEALTHY', results };
  }
}

3.4 Rollback Execution Procedures

typescript
interface RollbackProcedure {
  // Rollback strategies
  strategies: {
    IMMEDIATE: {
      description: 'Instant rollback to previous version';
      duration: '< 1 minute';
      risk: 'May interrupt active requests';
      useWhen: 'Critical failure, data corruption';
    };
    GRADUAL: {
      description: 'Gradual traffic shift back';
      duration: '5-10 minutes';
      risk: 'Extended degraded state';
      useWhen: 'Non-critical degradation';
    };
    FEATURE_FLAG: {
      description: 'Disable specific features';
      duration: '< 30 seconds';
      risk: 'Partial functionality loss';
      useWhen: 'Feature-specific issue';
    };
    DATA_ROLLBACK: {
      description: 'Revert data changes';
      duration: 'Variable';
      risk: 'Data loss if new data written';
      useWhen: 'Data corruption detected';
    };
  };
  
  async executeRollback(
    deployment: Deployment,
    trigger: Trigger,
    strategy: RollbackStrategy
  ): Promise<RollbackResult> {
    const startTime = Date.now();
    
    // Pre-rollback checklist
    await this.preRollbackChecks(deployment);
    
    // Notify stakeholders
    await this.notifyRollback(deployment, trigger, strategy);
    
    // Execute based on strategy
    switch (strategy) {
      case 'IMMEDIATE':
        await this.immediateRollback(deployment);
        break;
      case 'GRADUAL':
        await this.gradualRollback(deployment);
        break;
      case 'FEATURE_FLAG':
        await this.featureFlagRollback(deployment, trigger.affectedFeatures);
        break;
      case 'DATA_ROLLBACK':
        await this.dataRollback(deployment);
        break;
    }
    
    // Verify rollback success
    const verification = await this.verifyRollback(deployment);
    
    // Post-rollback actions
    await this.postRollback(deployment, verification);
    
    return {
      success: verification.healthy,
      duration: Date.now() - startTime,
      strategy,
      verification
    };
  }
  
  async postRollback(deployment: Deployment, verification: Verification): Promise<void> {
    // Scale up previous version if needed
    if (verification.loadCapacity < EXPECTED_CAPACITY) {
      await this.scaleUp(deployment.previousVersion);
    }
    
    // Capture diagnostic data
    await this.captureDiagnostics(deployment);
    
    // Schedule post-mortem
    await this.schedulePostMortem(deployment);
    
    // Update incident response
    await this.updateIncident(deployment, verification);
  }
}

4. Cost Controls

4.1 API Cost Budgeting

typescript
interface APICostController {
  // Model cost tiers (per 1K tokens)
  modelCosts: {
    'gpt-4': { input: 0.03, output: 0.06 };
    'gpt-4-turbo': { input: 0.01, output: 0.03 };
    'claude-3-opus': { input: 0.015, output: 0.075 };
    'claude-3-sonnet': { input: 0.003, output: 0.015 };
    'claude-3-haiku': { input: 0.00025, output: 0.00125 };
  };
  
  // Budget allocation per phase
  phaseBudgets: {
    planning: {
      total: 5.00;
      model: 'claude-3-sonnet'; // Cost-effective for planning
      maxTokens: 100000;
    };
    implementation: {
      total: 10.00;
      model: 'claude-3-sonnet';
      maxTokens: 200000;
      iterations: {
        initial: { model: 'claude-3-sonnet', budget: 0.50 };
        refinement: { model: 'claude-3-haiku', budget: 0.25 };
        final: { model: 'claude-3-sonnet', budget: 0.50 };
      };
    };
    testing: {
      total: 3.00;
      model: 'claude-3-haiku'; // Fast, cheap for test generation
      maxTokens: 50000;
    };
    review: {
      total: 2.00;
      model: 'claude-3-haiku';
      maxTokens: 30000;
    };
  };
  
  async optimizeModelSelection(
    task: Task,
    remainingBudget: number
  ): Promise<ModelSelection> {
    // Select cheapest adequate model
    const candidates = this.getCandidateModels(task.complexity);
    
    for (const model of candidates) {
      const estimatedCost = this.estimateCost(task, model);
      if (estimatedCost <= remainingBudget * 0.8) { // Leave 20% buffer
        return {
          model,
          estimatedCost,
          confidence: this.assessModelAdequacy(model, task)
        };
      }
    }
    
    // If no model fits budget, escalate
    return {
      model: null,
      reason: 'INSUFFICIENT_BUDGET',
      required: this.estimateCost(task, candidates[0]),
      available: remainingBudget
    };
  }
}

4.2 Compute Cost Management

typescript
interface ComputeCostController {
  // Resource pricing (example AWS costs)
  resourcePricing: {
    compute: {
      'c6i.large': 0.085;   // per hour
      'c6i.xlarge': 0.17;
      'c6i.2xlarge': 0.34;
    };
    storage: {
      ssd: 0.10;  // per GB-month
      archive: 0.012;
    };
    network: {
      ingress: 0;
      egress: 0.09; // per GB
    };
  };
  
  // Task resource allocation
  taskResources: {
    build: {
      instance: 'c6i.large';
      maxDuration: 600; // seconds
      parallelization: 4;
    };
    test: {
      instance: 'c6i.xlarge';
      maxDuration: 1800;
      parallelization: 8;
    };
    deploy: {
      instance: 'c6i.large';
      maxDuration: 300;
      parallelization: 2;
    };
  };
  
  async optimizeResourceAllocation(
    task: Task,
    urgency: Urgency
  ): Promise<ResourcePlan> {
    const baseConfig = this.taskResources[task.type];
    
    // For urgent tasks, use more resources to complete faster
    if (urgency === 'HIGH') {
      return {
        ...baseConfig,
        instance: this.upgradeInstance(baseConfig.instance),
        parallelization: baseConfig.parallelization * 2,
        estimatedCost: this.calculateCost({
          ...baseConfig,
          duration: baseConfig.maxDuration / 2
        })
      };
    }
    
    // For non-urgent, use spot instances
    if (urgency === 'LOW') {
      return {
        ...baseConfig,
        instance: baseConfig.instance,
        spot: true,
        estimatedCost: this.calculateCost({
          ...baseConfig,
          spotDiscount: 0.7
        })
      };
    }
    
    return {
      ...baseConfig,
      estimatedCost: this.calculateCost(baseConfig)
    };
  }
}

4.3 Token Usage Optimization

typescript
interface TokenOptimizer {
  // Context compression strategies
  compressionStrategies: {
    SUMMARIZE_HISTORY: {
      when: 'context > 50% of limit';
      action: 'Summarize older context, keep recent full';
      savings: '~30-40%';
    };
    SELECTIVE_CONTEXT: {
      when: 'large codebase';
      action: 'Include only relevant files';
      savings: '~50-70%';
    };
    EMBEDDING_RETRIEVAL: {
      when: 'knowledge base queries';
      action: 'Retrieve relevant chunks vs full docs';
      savings: '~80-90%';
    };
    PROGRESSIVE_DISCLOSURE: {
      when: 'complex tasks';
      action: 'Start with outline, detail on request';
      savings: '~40-60%';
    };
  };
  
  async optimizePrompt(prompt: Prompt, model: Model): Promise<OptimizedPrompt> {
    const currentTokens = this.estimateTokens(prompt);
    const maxTokens = model.contextWindow;
    
    if (currentTokens <= maxTokens * 0.7) {
      return { prompt, tokens: currentTokens, optimization: 'NONE' };
    }
    
    // Apply optimizations
    let optimized = prompt;
    
    if (currentTokens > maxTokens * 0.9) {
      // Aggressive compression needed
      optimized = await this.summarizeHistory(optimized);
      optimized = await this.selectiveContext(optimized);
    } else {
      // Moderate optimization
      optimized = await this.removeRedundancy(optimized);
      optimized = await this.compressCode(optimized);
    }
    
    const newTokens = this.estimateTokens(optimized);
    
    return {
      prompt: optimized,
      tokens: newTokens,
      originalTokens: currentTokens,
      savings: currentTokens - newTokens,
      optimization: currentTokens > maxTokens * 0.9 ? 'AGGRESSIVE' : 'MODERATE'
    };
  }
  
  async compressCode(code: string): Promise<string> {
    // Remove comments (preserve JSDoc)
    let compressed = code.replace(/\/\/.*$/gm, '');
    
    // Remove extra whitespace
    compressed = compressed.replace(/\n\s*\n/g, '\n');
    
    // Minimize imports (keep structure, remove unused)
    // This is done by AST analysis
    
    return compressed;
  }
}

4.4 Cost Monitoring and Alerting

typescript
interface CostMonitor {
  // Alert thresholds
  alertThresholds: {
    task: {
      warning: 0.75;  // 75% of budget
      critical: 0.90; // 90% of budget
      halt: 1.00;     // 100% of budget
    };
    daily: {
      warning: 150;   // $150
      critical: 180;  // $180
      halt: 200;      // $200
    };
    anomalous: {
      // Alert if spend is Nx normal for this task type
      multiplier: 2.0;
      minSampleSize: 5;
    };
  };
  
  async trackAndAlert(
    operation: Operation,
    cost: Cost
  ): Promise<Alert[]> {
    await this.recordCost(operation, cost);
    
    const alerts = [];
    
    // Check task budget
    const taskSpend = await this.getTaskSpend(operation.taskId);
    const taskBudget = this.getTaskBudget(operation.type);
    const taskPercent = taskSpend / taskBudget;
    
    if (taskPercent >= this.alertThresholds.task.halt) {
      alerts.push({
        severity: 'CRITICAL',
        type: 'TASK_BUDGET_EXHAUSTED',
        action: 'HALT_OPERATION',
        details: { taskSpend, taskBudget, percent: taskPercent }
      });
    } else if (taskPercent >= this.alertThresholds.task.critical) {
      alerts.push({
        severity: 'HIGH',
        type: 'TASK_BUDGET_CRITICAL',
        action: 'NOTIFY_AND_MONITOR',
        details: { taskSpend, taskBudget, percent: taskPercent }
      });
    }
    
    // Check daily budget
    const dailySpend = await this.getDailySpend();
    if (dailySpend >= this.alertThresholds.daily.halt) {
      alerts.push({
        severity: 'CRITICAL',
        type: 'DAILY_BUDGET_EXHAUSTED',
        action: 'HALT_ALL_OPERATIONS',
        details: { dailySpend, limit: this.alertThresholds.daily.halt }
      });
    }
    
    // Check for anomalies
    const historical = await this.getHistoricalCost(operation.type);
    if (historical.count >= this.alertThresholds.anomalous.minSampleSize) {
      const avgCost = historical.total / historical.count;
      if (cost.amount > avgCost * this.alertThresholds.anomalous.multiplier) {
        alerts.push({
          severity: 'MEDIUM',
          type: 'ANOMALOUS_COST',
          action: 'REVIEW_OPERATION',
          details: { current: cost.amount, average: avgCost }
        });
      }
    }
    
    // Send alerts
    for (const alert of alerts) {
      await this.sendAlert(alert);
    }
    
    return alerts;
  }
}

5. Decision Matrices

5.1 Circuit Breaker Decision Matrix

Condition	Iteration	Error Rate	Cost	Time	Resource	Action
Normal	< 50% max	< 10%	< 75% budget	< 75% limit	< 70%	Continue
Warning	50-75%	10-25%	75-90%	75-90%	70-85%	Increase monitoring
Critical	75-100%	25-50%	90-100%	90-100%	85-95%	Reduce concurrency, notify
Break	≥ 100%	≥ 50%	≥ 100%	≥ 100%	≥ 95%	Halt, escalate to human

5.2 Human-in-the-Loop Decision Matrix

Risk Factor	Low (0-40)	Medium (40-60)	High (60-80)	Critical (80+)
Auto-approve	Yes	No	No	No
Notify human	Optional	Yes	Yes	Yes
Require approval	No	Optional	Yes	Yes + escalation
Review depth	Automated	Spot check	Comprehensive	Expert review
Response time	N/A	24 hours	4 hours	30 minutes

5.3 Rollback Decision Matrix

Trigger Type	Single Warning	Multiple Warnings	Single Critical	Multiple Critical
Error Rate	Monitor	Escalate	Rollback (5 min)	Rollback (immediate)
Latency	Monitor	Escalate	Rollback (10 min)	Rollback (immediate)
Business metric	Alert	Escalate	Rollback (approval)	Rollback (immediate)
Synthetic test	Retry	Escalate	Rollback (immediate)	Rollback (immediate)

5.4 Cost Control Decision Matrix

Budget Status	< 50%	50-75%	75-90%	90-100%	> 100%
Model selection	Optimal	Optimal	Cheapest	Cheapest	Block
Parallelization	Full	Full	Limited	Minimal	None
Notifications	None	Daily	Immediate	Immediate	Critical
Action required	None	None	Review	Approve extension	Halt

6. Recovery Procedures

6.1 Post-Breaker Recovery

typescript
interface RecoveryProcedure {
  async recoverFromBreaker(
    breakerType: BreakerType,
    context: Context
  ): Promise<RecoveryResult> {
    switch (breakerType) {
      case 'ITERATION':
        return this.recoverFromIterationBreak(context);
      case 'ERROR_RATE':
        return this.recoverFromErrorBreak(context);
      case 'COST':
        return this.recoverFromCostBreak(context);
      case 'TIMEOUT':
        return this.recoverFromTimeout(context);
      case 'RESOURCE':
        return this.recoverFromResourceExhaustion(context);
    }
  }
  
  async recoverFromIterationBreak(context: Context): Promise<RecoveryResult> {
    // 1. Capture diagnostic information
    const diagnostics = await this.captureDiagnostics(context);
    
    // 2. Summarize progress for human review
    const summary = await this.summarizeProgress(context);
    
    // 3. Offer recovery options
    const options = [
      {
        id: 'CONTINUE_WITH_MODIFICATIONS',
        description: 'Continue with human-specified changes',
        requiresInput: true
      },
      {
        id: 'SIMPLIFY_APPROACH',
        description: 'Restart with simplified approach',
        requiresInput: false
      },
      {
        id: 'ESCALATE_TO_EXPERT',
        description: 'Hand off to senior engineer',
        requiresInput: false
      },
      {
        id: 'ABANDON',
        description: 'Abandon this task',
        requiresInput: false
      }
    ];
    
    // 4. Wait for human decision
    const decision = await this.requestHumanDecision(context, summary, options);
    
    // 5. Execute decision
    return this.executeRecoveryDecision(decision, context);
  }
  
  async recoverFromErrorBreak(context: Context): Promise<RecoveryResult> {
    // 1. Analyze error patterns
    const errorAnalysis = await this.analyzeErrors(context);
    
    // 2. Check if transient
    if (errorAnalysis.transient) {
      // Wait and retry
      await this.delay(300000); // 5 minutes
      return { action: 'RETRY', confidence: 0.7 };
    }
    
    // 3. Check if upstream issue
    if (errorAnalysis.upstreamDependency) {
      await this.notifyUpstreamTeam(errorAnalysis.dependency);
      return { action: 'WAIT_FOR_UPSTREAM', eta: errorAnalysis.estimatedResolution };
    }
    
    // 4. Requires human investigation
    return {
      action: 'ESCALATE',
      reason: 'Non-transient errors requiring investigation',
      diagnostics: errorAnalysis
    };
  }
}

6.2 State Preservation and Resume

typescript
interface CheckpointSystem {
  // Automatic checkpointing at key milestones
  checkpoints: [
    { phase: 'PLANNING_COMPLETE', autoSave: true },
    { phase: 'IMPLEMENTATION_MILESTONE', autoSave: true, interval: 'every_30_min' },
    { phase: 'TEST_RESULTS', autoSave: true },
    { phase: 'PRE_DEPLOY', autoSave: true }
  ];
  
  async createCheckpoint(
    phase: Phase,
    state: SystemState
  ): Promise<Checkpoint> {
    const checkpoint = {
      id: generateId(),
      timestamp: new Date(),
      phase,
      state: await this.serializeState(state),
      metadata: {
        cost: state.accumulatedCost,
        duration: state.elapsedTime,
        iteration: state.iterationCount
      }
    };
    
    await this.persistCheckpoint(checkpoint);
    return checkpoint;
  }
  
  async resumeFromCheckpoint(checkpointId: string): Promise<ResumedState> {
    const checkpoint = await this.loadCheckpoint(checkpointId);
    
    // Validate checkpoint integrity
    const valid = await this.validateCheckpoint(checkpoint);
    if (!valid) {
      // Try previous checkpoint
      const previous = await this.findPreviousCheckpoint(checkpoint);
      return this.resumeFromCheckpoint(previous.id);
    }
    
    // Restore state
    const state = await this.deserializeState(checkpoint.state);
    
    return {
      state,
      resumedAt: new Date(),
      originalCheckpoint: checkpoint,
      recoveryActions: this.generateRecoveryActions(checkpoint)
    };
  }
}

7. Monitoring Requirements

7.1 Metrics Dashboard

Required metrics for operational visibility:

yaml
metrics:
  circuit_breakers:
    - name: breaker_activations
      type: counter
      labels: [breaker_type, reason]
      
    - name: breaker_recovery_time
      type: histogram
      labels: [breaker_type]
      buckets: [1m, 5m, 15m, 1h, 4h, 24h]
      
  human_interactions:
    - name: human_gates_triggered
      type: counter
      labels: [gate_type, outcome]
      
    - name: human_response_time
      type: histogram
      labels: [gate_type]
      buckets: [5m, 15m, 1h, 4h, 24h]
      
    - name: approval_rate
      type: gauge
      labels: [gate_type]
      
  rollbacks:
    - name: rollback_executions
      type: counter
      labels: [trigger_type, strategy]
      
    - name: rollback_duration
      type: histogram
      labels: [strategy]
      buckets: [30s, 1m, 5m, 15m]
      
    - name: rollback_success_rate
      type: gauge
      
  costs:
    - name: cost_per_task
      type: histogram
      labels: [task_type]
      buckets: [1, 5, 10, 25, 50, 100]
      
    - name: daily_spend
      type: gauge
      
    - name: budget_utilization
      type: gauge
      labels: [scope]  # task, daily, monthly
      
  performance:
    - name: task_duration
      type: histogram
      labels: [task_type, outcome]
      
    - name: iteration_count
      type: histogram
      labels: [task_type]
      
    - name: success_rate
      type: gauge
      labels: [task_type]

7.2 Alerting Rules

yaml
alerts:
  critical:
    - name: CircuitBreakerActivated
      condition: rate(breaker_activations[5m]) > 0
      action: page_oncall
      
    - name: RollbackExecuted
      condition: rate(rollback_executions[5m]) > 0
      action: page_oncall + create_incident
      
    - name: DailyBudgetExceeded
      condition: daily_spend > 200
      action: halt_operations + notify_manager
      
    - name: HumanGateTimeout
      condition: human_response_time > gate_timeout
      action: escalate + notify_manager
      
  warning:
    - name: HighErrorRate
      condition: error_rate > 0.1
      duration: 5m
      action: slack_alert
      
    - name: ApproachingBudgetLimit
      condition: budget_utilization > 0.8
      action: slack_alert
      
    - name: SlowHumanResponse
      condition: human_response_time > 1h
      action: slack_reminder
      
  info:
    - name: TaskCompleted
      condition: task_status == 'completed'
      action: log_metrics
      
    - name: CostAnomaly
      condition: cost_per_task > 2 * avg(cost_per_task[7d])
      action: log_review

7.3 Audit Logging

typescript
interface AuditLog {
  // All safety events must be logged
  events: [
    {
      type: 'CIRCUIT_BREAKER';
      fields: ['breaker_type', 'trigger_condition', 'action_taken', 'timestamp'];
      retention: '7years';
    },
    {
      type: 'HUMAN_DECISION';
      fields: ['gate_type', 'decision', 'approver', 'timestamp', 'context_hash'];
      retention: '7years';
    },
    {
      type: 'ROLLBACK';
      fields: ['deployment_id', 'trigger', 'strategy', 'duration', 'outcome'];
      retention: '7years';
    },
    {
      type: 'COST';
      fields: ['operation_id', 'amount', 'budget_scope', 'alert_level'];
      retention: '3years';
    }
  ];
}

8. Implementation Checklist

Phase 1: Foundation (Week 1-2)

Implement iteration circuit breakers
Set up basic cost tracking
Configure mandatory human gates
Create monitoring dashboard skeleton

Phase 2: Safety (Week 3-4)

Implement error rate breakers
Set up time-based breakers
Configure resource monitoring
Implement checkpoint system

Phase 3: Intelligence (Week 5-6)

Implement risk scoring
Set up auto-rollback triggers
Configure cost optimization
Implement feedback collection

Phase 4: Refinement (Week 7-8)

Tune thresholds based on data
Optimize alert rules
Create runbooks for common scenarios
Conduct chaos engineering exercises

9. Appendix: Configuration Reference

Default Thresholds

yaml
# Circuit Breaker Defaults
iterations:
  planning: 20
  implementation: 50
  testing: 5
  deployment: 3

error_rates:
  warning: 0.10
  critical: 0.25
  catastrophic: 0.50

costs:
  per_task:
    planning: 5.00
    implementation: 10.00
    testing: 3.00
    deployment: 2.00
  per_day: 200.00
  per_month: 2000.00

timeouts:
  planning: 30m
  implementation: 1h
  testing: 20m
  deployment: 15m
  total_pipeline: 2h

# Rollback Defaults
health:
  error_rate_multiplier: 2.0
  latency_multiplier: 2.0
  throughput_minimum: 0.6
  evaluation_window: 5m

Document version: 1.0 Last updated: 2026-02-06 Based on research corpus: agentic-dev-research/topics/