Backend EngineeringPerformanceMonitoringOptimization

Backend Performance Monitoring & Optimization

Satyam Parmar
January 12, 2025
10 min read

Backend Performance Monitoring & Optimization

Performance monitoring is crucial for maintaining scalable, responsive backend applications. This guide covers essential monitoring techniques, tools, and optimization strategies.

Performance Metrics to Track

Key Performance Indicators (KPIs)

// Core performance metrics
const performanceMetrics = {
  // Response time metrics
  responseTime: {
    p50: 0,    // 50th percentile
    p95: 0,    // 95th percentile
    p99: 0,    // 99th percentile
    max: 0     // Maximum response time
  },
  
  // Throughput metrics
  throughput: {
    requestsPerSecond: 0,
    requestsPerMinute: 0,
    concurrentUsers: 0
  },
  
  // Error metrics
  errors: {
    errorRate: 0,        // Percentage of failed requests
    errorCount: 0,       // Total error count
    errorTypes: {}       // Breakdown by error type
  },
  
  // Resource utilization
  resources: {
    cpuUsage: 0,         // CPU utilization percentage
    memoryUsage: 0,      // Memory usage in MB
    diskUsage: 0,        // Disk usage percentage
    networkIO: 0         // Network I/O in bytes
  }
};

Java/Spring Boot:

@Component
public class PerformanceMetrics {
    
    private final MeterRegistry meterRegistry;
    private final Timer.Sample sample;
    
    public PerformanceMetrics(MeterRegistry meterRegistry) {
        this.meterRegistry = meterRegistry;
        this.sample = Timer.start(meterRegistry);
    }
    
    // Core performance metrics using Micrometer
    public void recordResponseTime(String operation, long durationMs) {
        Timer.builder("operation.duration")
            .tag("operation", operation)
            .register(meterRegistry)
            .record(durationMs, TimeUnit.MILLISECONDS);
    }
    
    public void recordRequestCount(String endpoint, String method, int statusCode) {
        Counter.builder("http.requests")
            .tag("endpoint", endpoint)
            .tag("method", method)
            .tag("status", String.valueOf(statusCode))
            .register(meterRegistry)
            .increment();
    }
    
    public void recordErrorCount(String operation, String errorType) {
        Counter.builder("errors.total")
            .tag("operation", operation)
            .tag("type", errorType)
            .register(meterRegistry)
            .increment();
    }
    
    // Custom gauge for memory usage
    @EventListener
    public void recordMemoryUsage() {
        MemoryMXBean memoryBean = ManagementFactory.getMemoryMXBean();
        MemoryUsage heapUsage = memoryBean.getHeapMemoryUsage();
        
        Gauge.builder("jvm.memory.used")
            .register(meterRegistry, heapUsage, MemoryUsage::getUsed);
            
        Gauge.builder("jvm.memory.max")
            .register(meterRegistry, heapUsage, MemoryUsage::getMax);
    }
    
    // Database query performance
    public void recordDatabaseQuery(String query, long durationMs) {
        Timer.builder("database.query.duration")
            .tag("query", query)
            .register(meterRegistry)
            .record(durationMs, TimeUnit.MILLISECONDS);
    }
}

// Performance monitoring aspect
@Aspect
@Component
public class PerformanceMonitoringAspect {
    
    private final PerformanceMetrics performanceMetrics;
    private final MeterRegistry meterRegistry;
    
    public PerformanceMonitoringAspect(PerformanceMetrics performanceMetrics, 
                                     MeterRegistry meterRegistry) {
        this.performanceMetrics = performanceMetrics;
        this.meterRegistry = meterRegistry;
    }
    
    @Around("@annotation(Monitored)")
    public Object monitorMethod(ProceedingJoinPoint joinPoint) throws Throwable {
        String methodName = joinPoint.getSignature().getName();
        Timer.Sample sample = Timer.start(meterRegistry);
        
        try {
            Object result = joinPoint.proceed();
            return result;
        } catch (Exception e) {
            performanceMetrics.recordErrorCount(methodName, e.getClass().getSimpleName());
            throw e;
        } finally {
            sample.stop(Timer.builder("method.duration")
                .tag("method", methodName)
                .register(meterRegistry));
        }
    }
}

// Custom annotation for monitoring
@Target(ElementType.METHOD)
@Retention(RetentionPolicy.RUNTIME)
public @interface Monitored {
    String value() default "";
}

Custom Performance Monitoring

class PerformanceMonitor {
  constructor() {
    this.metrics = new Map();
    this.startTimes = new Map();
  }
  
  // Start timing an operation
  startTimer(operationId) {
    this.startTimes.set(operationId, process.hrtime.bigint());
  }
  
  // End timing and record metric
  endTimer(operationId, tags = {}) {
    const startTime = this.startTimes.get(operationId);
    if (!startTime) return;
    
    const endTime = process.hrtime.bigint();
    const duration = Number(endTime - startTime) / 1000000; // Convert to milliseconds
    
    this.recordMetric('operation_duration', duration, {
      operation: operationId,
      ...tags
    });
    
    this.startTimes.delete(operationId);
  }
  
  // Record a metric value
  recordMetric(name, value, tags = {}) {
    const key = `${name}_${JSON.stringify(tags)}`;
    
    if (!this.metrics.has(key)) {
      this.metrics.set(key, {
        name,
        tags,
        values: [],
        count: 0,
        sum: 0,
        min: Infinity,
        max: -Infinity
      });
    }
    
    const metric = this.metrics.get(key);
    metric.values.push(value);
    metric.count++;
    metric.sum += value;
    metric.min = Math.min(metric.min, value);
    metric.max = Math.max(metric.max, value);
    
    // Keep only last 1000 values to prevent memory leaks
    if (metric.values.length > 1000) {
      metric.values = metric.values.slice(-1000);
    }
  }
  
  // Get percentile value
  getPercentile(values, percentile) {
    const sorted = [...values].sort((a, b) => a - b);
    const index = Math.ceil((percentile / 100) * sorted.length) - 1;
    return sorted[index] || 0;
  }
  
  // Get metrics summary
  getMetricsSummary() {
    const summary = {};
    
    for (const [key, metric] of this.metrics) {
      if (metric.count === 0) continue;
      
      summary[metric.name] = {
        count: metric.count,
        sum: metric.sum,
        avg: metric.sum / metric.count,
        min: metric.min,
        max: metric.max,
        p50: this.getPercentile(metric.values, 50),
        p95: this.getPercentile(metric.values, 95),
        p99: this.getPercentile(metric.values, 99),
        tags: metric.tags
      };
    }
    
    return summary;
  }
}

// Global performance monitor instance
const perfMonitor = new PerformanceMonitor();

Application Performance Monitoring (APM)

Express.js Middleware for Request Tracking

const express = require('express');
const app = express();

// Request timing middleware
app.use((req, res, next) => {
  const requestId = `${req.method}_${req.path}_${Date.now()}`;
  req.requestId = requestId;
  
  perfMonitor.startTimer(requestId);
  
  // Override res.end to capture response time
  const originalEnd = res.end;
  res.end = function(...args) {
    perfMonitor.endTimer(requestId, {
      method: req.method,
      path: req.path,
      statusCode: res.statusCode
    });
    
    originalEnd.apply(this, args);
  };
  
  next();
});

// Database query monitoring
function monitorDatabaseQuery(query, params) {
  const queryId = `db_query_${Date.now()}`;
  perfMonitor.startTimer(queryId);
  
  return query.then(result => {
    perfMonitor.endTimer(queryId, {
      type: 'database',
      query: query.sql || 'unknown'
    });
    return result;
  }).catch(error => {
    perfMonitor.endTimer(queryId, {
      type: 'database',
      query: query.sql || 'unknown',
      error: true
    });
    throw error;
  });
}

// Usage example
app.get('/api/users', async (req, res) => {
  try {
    const users = await monitorDatabaseQuery(
      db.query('SELECT * FROM users')
    );
    
    res.json(users);
  } catch (error) {
    res.status(500).json({ error: 'Internal server error' });
  }
});

Memory and CPU Monitoring

const os = require('os');
const process = require('process');

class SystemMonitor {
  constructor() {
    this.startTime = Date.now();
    this.startCpuUsage = process.cpuUsage();
  }
  
  getSystemMetrics() {
    const memUsage = process.memoryUsage();
    const cpuUsage = process.cpuUsage(this.startCpuUsage);
    
    return {
      // Memory metrics
      memory: {
        rss: Math.round(memUsage.rss / 1024 / 1024), // MB
        heapTotal: Math.round(memUsage.heapTotal / 1024 / 1024),
        heapUsed: Math.round(memUsage.heapUsed / 1024 / 1024),
        external: Math.round(memUsage.external / 1024 / 1024),
        systemTotal: Math.round(os.totalmem() / 1024 / 1024),
        systemFree: Math.round(os.freemem() / 1024 / 1024)
      },
      
      // CPU metrics
      cpu: {
        user: cpuUsage.user / 1000000, // Convert to seconds
        system: cpuUsage.system / 1000000,
        cores: os.cpus().length,
        loadAverage: os.loadavg()
      },
      
      // Process metrics
      process: {
        uptime: Math.round((Date.now() - this.startTime) / 1000),
        pid: process.pid,
        version: process.version,
        platform: process.platform
      }
    };
  }
  
  // Check if system is under stress
  isSystemStressed() {
    const metrics = this.getSystemMetrics();
    const loadAvg = metrics.cpu.loadAverage[0];
    const memoryUsagePercent = (metrics.memory.heapUsed / metrics.memory.heapTotal) * 100;
    
    return {
      highLoad: loadAvg > os.cpus().length * 0.8,
      highMemory: memoryUsagePercent > 80,
      lowMemory: metrics.memory.systemFree < 100 // Less than 100MB free
    };
  }
}

const systemMonitor = new SystemMonitor();

// Periodic system monitoring
setInterval(() => {
  const metrics = systemMonitor.getSystemMetrics();
  const stress = systemMonitor.isSystemStressed();
  
  // Log metrics every 30 seconds
  console.log('System Metrics:', {
    memory: metrics.memory,
    cpu: metrics.cpu,
    stress
  });
  
  // Alert on high stress
  if (stress.highLoad || stress.highMemory || stress.lowMemory) {
    console.warn('System under stress:', stress);
  }
}, 30000);

Database Performance Monitoring

Query Performance Tracking

const { Pool } = require('pg');

class MonitoredPool extends Pool {
  constructor(config) {
    super(config);
    this.queryCount = 0;
    this.slowQueries = [];
    this.queryTimes = [];
  }
  
  async query(text, params) {
    const startTime = process.hrtime.bigint();
    this.queryCount++;
    
    try {
      const result = await super.query(text, params);
      const endTime = process.hrtime.bigint();
      const duration = Number(endTime - startTime) / 1000000; // Convert to ms
      
      // Record query performance
      this.queryTimes.push(duration);
      
      // Track slow queries
      if (duration > 1000) { // Queries slower than 1 second
        this.slowQueries.push({
          query: text,
          params,
          duration,
          timestamp: new Date()
        });
        
        console.warn(`Slow query detected: ${duration}ms`, {
          query: text.substring(0, 100) + '...',
          params
        });
      }
      
      return result;
    } catch (error) {
      const endTime = process.hrtime.bigint();
      const duration = Number(endTime - startTime) / 1000000;
      
      console.error('Database query error:', {
        query: text,
        params,
        duration,
        error: error.message
      });
      
      throw error;
    }
  }
  
  getQueryStats() {
    const avgTime = this.queryTimes.length > 0 
      ? this.queryTimes.reduce((a, b) => a + b, 0) / this.queryTimes.length 
      : 0;
    
    return {
      totalQueries: this.queryCount,
      averageQueryTime: Math.round(avgTime),
      slowQueries: this.slowQueries.length,
      recentSlowQueries: this.slowQueries.slice(-10)
    };
  }
}

// Use monitored pool
const pool = new MonitoredPool({
  connectionString: process.env.DATABASE_URL
});

// Log query stats every minute
setInterval(() => {
  const stats = pool.getQueryStats();
  console.log('Database Performance:', stats);
}, 60000);

Real-time Monitoring Dashboard

Express.js Health Check Endpoint

app.get('/health', (req, res) => {
  const systemMetrics = systemMonitor.getSystemMetrics();
  const performanceMetrics = perfMonitor.getMetricsSummary();
  const dbStats = pool.getQueryStats();
  const stress = systemMonitor.isSystemStressed();
  
  const health = {
    status: stress.highLoad || stress.highMemory ? 'degraded' : 'healthy',
    timestamp: new Date().toISOString(),
    uptime: process.uptime(),
    system: systemMetrics,
    performance: performanceMetrics,
    database: dbStats,
    alerts: {
      highLoad: stress.highLoad,
      highMemory: stress.highMemory,
      lowMemory: stress.lowMemory
    }
  };
  
  const statusCode = health.status === 'healthy' ? 200 : 503;
  res.status(statusCode).json(health);
});

// Detailed metrics endpoint
app.get('/metrics', (req, res) => {
  const metrics = {
    performance: perfMonitor.getMetricsSummary(),
    system: systemMonitor.getSystemMetrics(),
    database: pool.getQueryStats()
  };
  
  res.json(metrics);
});

Performance Optimization Strategies

Caching Implementation

const NodeCache = require('node-cache');
const Redis = require('redis');

class CacheManager {
  constructor() {
    this.memoryCache = new NodeCache({ stdTTL: 600 }); // 10 minutes
    this.redis = Redis.createClient({
      host: process.env.REDIS_HOST,
      port: process.env.REDIS_PORT
    });
  }
  
  async get(key) {
    // Try memory cache first
    let value = this.memoryCache.get(key);
    if (value) {
      perfMonitor.recordMetric('cache_hit', 1, { type: 'memory' });
      return value;
    }
    
    // Try Redis cache
    try {
      const redisValue = await this.redis.get(key);
      if (redisValue) {
        const parsed = JSON.parse(redisValue);
        this.memoryCache.set(key, parsed); // Populate memory cache
        perfMonitor.recordMetric('cache_hit', 1, { type: 'redis' });
        return parsed;
      }
    } catch (error) {
      console.error('Redis cache error:', error);
    }
    
    perfMonitor.recordMetric('cache_miss', 1, { type: 'all' });
    return null;
  }
  
  async set(key, value, ttl = 3600) {
    // Set in memory cache
    this.memoryCache.set(key, value);
    
    // Set in Redis cache
    try {
      await this.redis.setex(key, ttl, JSON.stringify(value));
    } catch (error) {
      console.error('Redis cache set error:', error);
    }
  }
  
  async del(key) {
    this.memoryCache.del(key);
    try {
      await this.redis.del(key);
    } catch (error) {
      console.error('Redis cache delete error:', error);
    }
  }
}

const cache = new CacheManager();

// Cached database query
async function getCachedUser(userId) {
  const cacheKey = `user:${userId}`;
  
  let user = await cache.get(cacheKey);
  if (user) {
    return user;
  }
  
  // Fetch from database
  const result = await pool.query('SELECT * FROM users WHERE id = $1', [userId]);
  user = result.rows[0];
  
  if (user) {
    await cache.set(cacheKey, user, 1800); // 30 minutes
  }
  
  return user;
}

Connection Pooling Optimization

// Optimized database connection pool
const optimizedPool = new Pool({
  connectionString: process.env.DATABASE_URL,
  max: 20, // Maximum number of clients in the pool
  min: 5,  // Minimum number of clients in the pool
  idleTimeoutMillis: 30000, // Close idle clients after 30 seconds
  connectionTimeoutMillis: 2000, // Return an error after 2 seconds
  acquireTimeoutMillis: 30000, // Return an error after 30 seconds
  createTimeoutMillis: 30000,
  destroyTimeoutMillis: 5000,
  reapIntervalMillis: 1000,
  createRetryIntervalMillis: 200,
  // SSL configuration for production
  ssl: process.env.NODE_ENV === 'production' ? {
    rejectUnauthorized: false
  } : false
});

// Pool monitoring
setInterval(() => {
  const poolStats = {
    totalCount: optimizedPool.totalCount,
    idleCount: optimizedPool.idleCount,
    waitingCount: optimizedPool.waitingCount
  };
  
  console.log('Pool Stats:', poolStats);
  
  // Alert if pool is exhausted
  if (poolStats.waitingCount > 5) {
    console.warn('High connection pool wait time!');
  }
}, 30000);

Alerting and Notifications

Performance Alert System

class AlertManager {
  constructor() {
    this.alerts = new Map();
    this.thresholds = {
      responseTime: 2000, // 2 seconds
      errorRate: 0.05,    // 5%
      memoryUsage: 0.8,   // 80%
      cpuUsage: 0.8       // 80%
    };
  }
  
  checkAlerts(metrics) {
    const alerts = [];
    
    // Response time alert
    if (metrics.performance?.operation_duration?.p95 > this.thresholds.responseTime) {
      alerts.push({
        type: 'HIGH_RESPONSE_TIME',
        message: `95th percentile response time is ${metrics.performance.operation_duration.p95}ms`,
        severity: 'warning'
      });
    }
    
    // Error rate alert
    const errorRate = metrics.errors?.errorRate || 0;
    if (errorRate > this.thresholds.errorRate) {
      alerts.push({
        type: 'HIGH_ERROR_RATE',
        message: `Error rate is ${(errorRate * 100).toFixed(2)}%`,
        severity: 'critical'
      });
    }
    
    // Memory usage alert
    const memoryUsage = metrics.system?.memory?.heapUsed / metrics.system?.memory?.heapTotal || 0;
    if (memoryUsage > this.thresholds.memoryUsage) {
      alerts.push({
        type: 'HIGH_MEMORY_USAGE',
        message: `Memory usage is ${(memoryUsage * 100).toFixed(2)}%`,
        severity: 'warning'
      });
    }
    
    return alerts;
  }
  
  async sendAlert(alert) {
    // Send to monitoring service (e.g., DataDog, New Relic, etc.)
    console.warn('ALERT:', alert);
    
    // You can integrate with external services here
    // await sendToSlack(alert);
    // await sendToEmail(alert);
  }
}

const alertManager = new AlertManager();

// Check alerts every 30 seconds
setInterval(() => {
  const metrics = {
    performance: perfMonitor.getMetricsSummary(),
    system: systemMonitor.getSystemMetrics(),
    errors: { errorRate: 0 } // Calculate from your error tracking
  };
  
  const alerts = alertManager.checkAlerts(metrics);
  alerts.forEach(alert => alertManager.sendAlert(alert));
}, 30000);

Conclusion

Effective performance monitoring requires:

  1. Comprehensive metrics collection - Track all relevant KPIs
  2. Real-time monitoring - Detect issues as they happen
  3. Proactive alerting - Get notified before problems escalate
  4. Performance optimization - Continuously improve based on data
  5. Regular analysis - Review trends and patterns

Remember: Monitoring without action is just data collection. Use the insights to continuously improve your application's performance and reliability.

Related Articles

Home