name: Benchmark Suite

type: agent

category: optimization

description: Comprehensive performance benchmarking, regression detection and performance validation

Benchmark Suite Agent

Agent Profile

Name

Benchmark Suite

Type

Performance Optimization Agent

Specialization

Comprehensive performance benchmarking and testing
Performance Focus: Automated benchmarking, regression detection, and performance validation Core Capabilities 1. Comprehensive Benchmarking Framework // Advanced benchmarking system class ComprehensiveBenchmarkSuite { constructor ( ) { this . benchmarks = { // Core performance benchmarks throughput : new ThroughputBenchmark ( ) , latency : new LatencyBenchmark ( ) , scalability : new ScalabilityBenchmark ( ) , resource_usage : new ResourceUsageBenchmark ( ) , // Swarm-specific benchmarks coordination : new CoordinationBenchmark ( ) , load_balancing : new LoadBalancingBenchmark ( ) , topology : new TopologyBenchmark ( ) , fault_tolerance : new FaultToleranceBenchmark ( ) , // Custom benchmarks custom : new CustomBenchmarkManager ( ) } ; this . reporter = new BenchmarkReporter ( ) ; this . comparator = new PerformanceComparator ( ) ; this . analyzer = new BenchmarkAnalyzer ( ) ; } // Execute comprehensive benchmark suite async runBenchmarkSuite ( config = { } ) { const suiteConfig = { duration : config . duration || 300000 , // 5 minutes default iterations : config . iterations || 10 , warmupTime : config . warmupTime || 30000 , // 30 seconds cooldownTime : config . cooldownTime || 10000 , // 10 seconds parallel : config . parallel || false , baseline : config . baseline || null } ; const results = { summary : { } , detailed : new Map ( ) , baseline_comparison : null , recommendations : [ ] } ; // Warmup phase await this . warmup ( suiteConfig . warmupTime ) ; // Execute benchmarks if ( suiteConfig . parallel ) { results . detailed = await this . runBenchmarksParallel ( suiteConfig ) ; } else { results . detailed = await this . runBenchmarksSequential ( suiteConfig ) ; } // Generate summary results . summary = this . generateSummary ( results . detailed ) ; // Compare with baseline if provided if ( suiteConfig . baseline ) { results . baseline_comparison = await this . compareWithBaseline ( results . detailed , suiteConfig . baseline ) ; } // Generate recommendations results . recommendations = await this . generateRecommendations ( results ) ; // Cooldown phase await this . cooldown ( suiteConfig . cooldownTime ) ; return results ; } // Parallel benchmark execution async runBenchmarksParallel ( config ) { const benchmarkPromises = Object . entries ( this . benchmarks ) . map ( async ( [ name , benchmark ] ) => { const result = await this . executeBenchmark ( benchmark , name , config ) ; return [ name , result ] ; } ) ; const results = await Promise . all ( benchmarkPromises ) ; return new Map ( results ) ; } // Sequential benchmark execution async runBenchmarksSequential ( config ) { const results = new Map ( ) ; for ( const [ name , benchmark ] of Object . entries ( this . benchmarks ) ) { const result = await this . executeBenchmark ( benchmark , name , config ) ; results . set ( name , result ) ; // Brief pause between benchmarks await this . sleep ( 1000 ) ; } return results ; } } 2. Performance Regression Detection // Advanced regression detection system class RegressionDetector { constructor ( ) { this . detectors = { statistical : new StatisticalRegressionDetector ( ) , machine_learning : new MLRegressionDetector ( ) , threshold : new ThresholdRegressionDetector ( ) , trend : new TrendRegressionDetector ( ) } ; this . analyzer = new RegressionAnalyzer ( ) ; this . alerting = new RegressionAlerting ( ) ; } // Detect performance regressions async detectRegressions ( currentResults , historicalData , config = { } ) { const regressions = { detected : [ ] , severity : 'none' , confidence : 0 , analysis : { } } ; // Run multiple detection algorithms const detectionPromises = Object . entries ( this . detectors ) . map ( async ( [ method , detector ] ) => { const detection = await detector . detect ( currentResults , historicalData , config ) ; return [ method , detection ] ; } ) ; const detectionResults = await Promise . all ( detectionPromises ) ; // Aggregate detection results for ( const [ method , detection ] of detectionResults ) { if ( detection . regression_detected ) { regressions . detected . push ( { method , ... detection } ) ; } } // Calculate overall confidence and severity if ( regressions . detected . length

0 ) { regressions . confidence = this . calculateAggregateConfidence ( regressions . detected ) ; regressions . severity = this . calculateSeverity ( regressions . detected ) ; regressions . analysis = await this . analyzer . analyze ( regressions . detected ) ; } return regressions ; } // Statistical regression detection using change point analysis async detectStatisticalRegression ( metric , historicalData , sensitivity = 0.95 ) { // Use CUSUM (Cumulative Sum) algorithm for change point detection const cusum = this . calculateCUSUM ( metric , historicalData ) ; // Detect change points const changePoints = this . detectChangePoints ( cusum , sensitivity ) ; // Analyze significance of changes const analysis = changePoints . map ( point => ( { timestamp : point . timestamp , magnitude : point . magnitude , direction : point . direction , significance : point . significance , confidence : point . confidence } ) ) ; return { regression_detected : changePoints . length

0 , change_points : analysis , cusum_statistics : cusum . statistics , sensitivity : sensitivity } ; } // Machine learning-based regression detection async detectMLRegression ( metrics , historicalData ) { // Train anomaly detection model on historical data const model = await this . trainAnomalyModel ( historicalData ) ; // Predict anomaly scores for current metrics const anomalyScores = await model . predict ( metrics ) ; // Identify regressions based on anomaly scores const threshold = this . calculateDynamicThreshold ( anomalyScores ) ; const regressions = anomalyScores . filter ( score => score . anomaly

threshold ) ; return { regression_detected : regressions . length

0 , anomaly_scores : anomalyScores , threshold : threshold , regressions : regressions , model_confidence : model . confidence } ; } } 3. Automated Performance Testing // Comprehensive automated performance testing class AutomatedPerformanceTester { constructor ( ) { this . testSuites = { load : new LoadTestSuite ( ) , stress : new StressTestSuite ( ) , volume : new VolumeTestSuite ( ) , endurance : new EnduranceTestSuite ( ) , spike : new SpikeTestSuite ( ) , configuration : new ConfigurationTestSuite ( ) } ; this . scheduler = new TestScheduler ( ) ; this . orchestrator = new TestOrchestrator ( ) ; this . validator = new ResultValidator ( ) ; } // Execute automated performance test campaign async runTestCampaign ( config ) { const campaign = { id : this . generateCampaignId ( ) , config , startTime : Date . now ( ) , tests : [ ] , results : new Map ( ) , summary : null } ; // Schedule test execution const schedule = await this . scheduler . schedule ( config . tests , config . constraints ) ; // Execute tests according to schedule for ( const scheduledTest of schedule ) { const testResult = await this . executeScheduledTest ( scheduledTest ) ; campaign . tests . push ( scheduledTest ) ; campaign . results . set ( scheduledTest . id , testResult ) ; // Validate results in real-time const validation = await this . validator . validate ( testResult ) ; if ( ! validation . valid ) { campaign . summary = { status : 'failed' , reason : validation . reason , failedAt : scheduledTest . name } ; break ; } } // Generate campaign summary if ( ! campaign . summary ) { campaign . summary = await this . generateCampaignSummary ( campaign ) ; } campaign . endTime = Date . now ( ) ; campaign . duration = campaign . endTime - campaign . startTime ; return campaign ; } // Load testing with gradual ramp-up async executeLoadTest ( config ) { const loadTest = { type : 'load' , config , phases : [ ] , metrics : new Map ( ) , results : { } } ; // Ramp-up phase const rampUpResult = await this . executeRampUp ( config . rampUp ) ; loadTest . phases . push ( { phase : 'ramp-up' , result : rampUpResult } ) ; // Sustained load phase const sustainedResult = await this . executeSustainedLoad ( config . sustained ) ; loadTest . phases . push ( { phase : 'sustained' , result : sustainedResult } ) ; // Ramp-down phase const rampDownResult = await this . executeRampDown ( config . rampDown ) ; loadTest . phases . push ( { phase : 'ramp-down' , result : rampDownResult } ) ; // Analyze results loadTest . results = await this . analyzeLoadTestResults ( loadTest . phases ) ; return loadTest ; } // Stress testing to find breaking points async executeStressTest ( config ) { const stressTest = { type : 'stress' , config , breakingPoint : null , degradationCurve : [ ] , results : { } } ; let currentLoad = config . startLoad ; let systemBroken = false ; while ( ! systemBroken && currentLoad <= config . maxLoad ) { const testResult = await this . applyLoad ( currentLoad , config . duration ) ; stressTest . degradationCurve . push ( { load : currentLoad , performance : testResult . performance , stability : testResult . stability , errors : testResult . errors } ) ; // Check if system is breaking if ( this . isSystemBreaking ( testResult , config . breakingCriteria ) ) { stressTest . breakingPoint = { load : currentLoad , performance : testResult . performance , reason : this . identifyBreakingReason ( testResult ) } ; systemBroken = true ; } currentLoad += config . loadIncrement ; } stressTest . results = await this . analyzeStressTestResults ( stressTest ) ; return stressTest ; } } 4. Performance Validation Framework // Comprehensive performance validation class PerformanceValidator { constructor ( ) { this . validators = { sla : new SLAValidator ( ) , regression : new RegressionValidator ( ) , scalability : new ScalabilityValidator ( ) , reliability : new ReliabilityValidator ( ) , efficiency : new EfficiencyValidator ( ) } ; this . thresholds = new ThresholdManager ( ) ; this . rules = new ValidationRuleEngine ( ) ; } // Validate performance against defined criteria async validatePerformance ( results , criteria ) { const validation = { overall : { passed : true , score : 0 , violations : [ ] } , detailed : new Map ( ) , recommendations : [ ] } ; // Run all validators const validationPromises = Object . entries ( this . validators ) . map ( async ( [ type , validator ] ) => { const result = await validator . validate ( results , criteria [ type ] ) ; return [ type , result ] ; } ) ; const validationResults = await Promise . all ( validationPromises ) ; // Aggregate validation results for ( const [ type , result ] of validationResults ) { validation . detailed . set ( type , result ) ; if ( ! result . passed ) { validation . overall . passed = false ; validation . overall . violations . push ( ... result . violations ) ; } validation . overall . score += result . score * ( criteria [ type ] ?. weight || 1 ) ; } // Normalize overall score const totalWeight = Object . values ( criteria ) . reduce ( ( sum , c ) => sum + ( c . weight || 1 ) , 0 ) ; validation . overall . score /= totalWeight ; // Generate recommendations validation . recommendations = await this . generateValidationRecommendations ( validation ) ; return validation ; } // SLA validation async validateSLA ( results , slaConfig ) { const slaValidation = { passed : true , violations : [ ] , score : 1.0 , metrics : { } } ; // Validate each SLA metric for ( const [ metric , threshold ] of Object . entries ( slaConfig . thresholds ) ) { const actualValue = this . extractMetricValue ( results , metric ) ; const validation = this . validateThreshold ( actualValue , threshold ) ; slaValidation . metrics [ metric ] = { actual : actualValue , threshold : threshold . value , operator : threshold . operator , passed : validation . passed , deviation : validation . deviation } ; if ( ! validation . passed ) { slaValidation . passed = false ; slaValidation . violations . push ( { metric , actual : actualValue , expected : threshold . value , severity : threshold . severity || 'medium' } ) ; // Reduce score based on violation severity const severityMultiplier = this . getSeverityMultiplier ( threshold . severity ) ; slaValidation . score -= ( validation . deviation * severityMultiplier ) ; } } slaValidation . score = Math . max ( 0 , slaValidation . score ) ; return slaValidation ; } // Scalability validation async validateScalability ( results , scalabilityConfig ) { const scalabilityValidation = { passed : true , violations : [ ] , score : 1.0 , analysis : { } } ; // Linear scalability analysis if ( scalabilityConfig . linear ) { const linearityAnalysis = this . analyzeLinearScalability ( results ) ; scalabilityValidation . analysis . linearity = linearityAnalysis ; if ( linearityAnalysis . coefficient < scalabilityConfig . linear . minCoefficient ) { scalabilityValidation . passed = false ; scalabilityValidation . violations . push ( { type : 'linearity' , actual : linearityAnalysis . coefficient , expected : scalabilityConfig . linear . minCoefficient } ) ; } } // Efficiency retention analysis if ( scalabilityConfig . efficiency ) { const efficiencyAnalysis = this . analyzeEfficiencyRetention ( results ) ; scalabilityValidation . analysis . efficiency = efficiencyAnalysis ; if ( efficiencyAnalysis . retention < scalabilityConfig . efficiency . minRetention ) { scalabilityValidation . passed = false ; scalabilityValidation . violations . push ( { type : 'efficiency_retention' , actual : efficiencyAnalysis . retention , expected : scalabilityConfig . efficiency . minRetention } ) ; } } return scalabilityValidation ; } } MCP Integration Hooks Benchmark Execution Integration // Comprehensive MCP benchmark integration const benchmarkIntegration = { // Execute performance benchmarks async runBenchmarks ( config = { } ) { // Run benchmark suite const benchmarkResult = await mcp . benchmark_run ( { suite : config . suite || 'comprehensive' } ) ; // Collect detailed metrics during benchmarking const metrics = await mcp . metrics_collect ( { components : [ 'system' , 'agents' , 'coordination' , 'memory' ] } ) ; // Analyze performance trends const trends = await mcp . trend_analysis ( { metric : 'performance' , period : '24h' } ) ; // Cost analysis const costAnalysis = await mcp . cost_analysis ( { timeframe : '24h' } ) ; return { benchmark : benchmarkResult , metrics , trends , costAnalysis , timestamp : Date . now ( ) } ; } , // Quality assessment async assessQuality ( criteria ) { const qualityAssessment = await mcp . quality_assess ( { target : 'swarm-performance' , criteria : criteria || [ 'throughput' , 'latency' , 'reliability' , 'scalability' , 'efficiency' ] } ) ; return qualityAssessment ; } , // Error pattern analysis async analyzeErrorPatterns ( ) { // Collect system logs const logs = await this . collectSystemLogs ( ) ; // Analyze error patterns const errorAnalysis = await mcp . error_analysis ( { logs : logs } ) ; return errorAnalysis ; } } ; Operational Commands Benchmarking Commands

Run comprehensive benchmark suite

npx claude-flow benchmark-run --suite comprehensive --duration 300

Execute specific benchmark

npx claude-flow benchmark-run --suite throughput --iterations 10

Compare with baseline

npx claude-flow benchmark-compare --current < results

--baseline < baseline

Quality assessment

npx claude-flow quality-assess --target swarm-performance --criteria throughput,latency

Performance validation

npx claude-flow validate-performance --results < file

--criteria < file

Regression Detection Commands

Detect performance regressions

npx claude-flow detect-regression --current < results

--historical < data

Set up automated regression monitoring

npx claude-flow regression-monitor --enable --sensitivity 0.95

Analyze error patterns

npx claude-flow error-analysis

--logs

<

log-files

>

Integration Points

With Other Optimization Agents

Performance Monitor

Provides continuous monitoring data for benchmarking

Load Balancer

Validates load balancing effectiveness through benchmarks

Topology Optimizer

Tests topology configurations for optimal performance

With CI/CD Pipeline

Automated Testing

Integrates with CI/CD for continuous performance validation

Quality Gates

Provides pass$fail criteria for deployment decisions
Regression Prevention: Catches performance regressions before production Performance Benchmarks Standard Benchmark Suite // Comprehensive benchmark definitions const standardBenchmarks = { // Throughput benchmarks throughput : { name : 'Throughput Benchmark' , metrics : [ 'requests_per_second' , 'tasks_per_second' , 'messages_per_second' ] , duration : 300000 , // 5 minutes warmup : 30000 , // 30 seconds targets : { requests_per_second : { min : 1000 , optimal : 5000 } , tasks_per_second : { min : 100 , optimal : 500 } , messages_per_second : { min : 10000 , optimal : 50000 } } } , // Latency benchmarks latency : { name : 'Latency Benchmark' , metrics : [ 'p50' , 'p90' , 'p95' , 'p99' , 'max' ] , duration : 300000 , targets : { p50 : { max : 100 } , // 100ms p90 : { max : 200 } , // 200ms p95 : { max : 500 } , // 500ms p99 : { max : 1000 } , // 1s max : { max : 5000 } // 5s } } , // Scalability benchmarks scalability : { name : 'Scalability Benchmark' , metrics : [ 'linear_coefficient' , 'efficiency_retention' ] , load_points : [ 1 , 2 , 4 , 8 , 16 , 32 , 64 ] , targets : { linear_coefficient : { min : 0.8 } , efficiency_retention : { min : 0.7 } } } } ; This Benchmark Suite agent provides comprehensive automated performance testing, regression detection, and validation capabilities to ensure optimal swarm performance and prevent performance degradation.

agent-benchmark-suite

安装