Backend Performance Monitoring & Optimization

Performance monitoring is crucial for maintaining scalable, responsive backend applications. This guide covers essential monitoring techniques, tools, and optimization strategies.

Performance Metrics to Track

Key Performance Indicators (KPIs)

// Core performance metrics
const performanceMetrics = {
  // Response time metrics
  responseTime: {
    p50: 0,    // 50th percentile
    p95: 0,    // 95th percentile
    p99: 0,    // 99th percentile
    max: 0     // Maximum response time
  },
  
  // Throughput metrics
  throughput: {
    requestsPerSecond: 0,
    requestsPerMinute: 0,
    concurrentUsers: 0
  },
  
  // Error metrics
  errors: {
    errorRate: 0,        // Percentage of failed requests
    errorCount: 0,       // Total error count
    errorTypes: {}       // Breakdown by error type
  },
  
  // Resource utilization
  resources: {
    cpuUsage: 0,         // CPU utilization percentage
    memoryUsage: 0,      // Memory usage in MB
    diskUsage: 0,        // Disk usage percentage
    networkIO: 0         // Network I/O in bytes
  }
};

Java/Spring Boot:

@Component
public class PerformanceMetrics {
    
    private final MeterRegistry meterRegistry;
    private final Timer.Sample sample;
    
    public PerformanceMetrics(MeterRegistry meterRegistry) {
        this.meterRegistry = meterRegistry;
        this.sample = Timer.start(meterRegistry);
    }
    
    // Core performance metrics using Micrometer
    public void recordResponseTime(String operation, long durationMs) {
        Timer.builder("operation.duration")
            .tag("operation", operation)
            .register(meterRegistry)
            .record(durationMs, TimeUnit.MILLISECONDS);
    }
    
    public void recordRequestCount(String endpoint, String method, int statusCode) {
        Counter.builder("http.requests")
            .tag("endpoint", endpoint)
            .tag("method", method)
            .tag("status", String.valueOf(statusCode))
            .register(meterRegistry)
            .increment();
    }
    
    public void recordErrorCount(String operation, String errorType) {
        Counter.builder("errors.total")
            .tag("operation", operation)
            .tag("type", errorType)
            .register(meterRegistry)
            .increment();
    }
    
    // Custom gauge for memory usage
    @EventListener
    public void recordMemoryUsage() {
        MemoryMXBean memoryBean = ManagementFactory.getMemoryMXBean();
        MemoryUsage heapUsage = memoryBean.getHeapMemoryUsage();
        
        Gauge.builder("jvm.memory.used")
            .register(meterRegistry, heapUsage, MemoryUsage::getUsed);
            
        Gauge.builder("jvm.memory.max")
            .register(meterRegistry, heapUsage, MemoryUsage::getMax);
    }
    
    // Database query performance
    public void recordDatabaseQuery(String query, long durationMs) {
        Timer.builder("database.query.duration")
            .tag("query", query)
            .register(meterRegistry)
            .record(durationMs, TimeUnit.MILLISECONDS);
    }
}

// Performance monitoring aspect
@Aspect
@Component
public class PerformanceMonitoringAspect {
    
    private final PerformanceMetrics performanceMetrics;
    private final MeterRegistry meterRegistry;
    
    public PerformanceMonitoringAspect(PerformanceMetrics performanceMetrics, 
                                     MeterRegistry meterRegistry) {
        this.performanceMetrics = performanceMetrics;
        this.meterRegistry = meterRegistry;
    }
    
    @Around("@annotation(Monitored)")
    public Object monitorMethod(ProceedingJoinPoint joinPoint) throws Throwable {
        String methodName = joinPoint.getSignature().getName();
        Timer.Sample sample = Timer.start(meterRegistry);
        
        try {
            Object result = joinPoint.proceed();
            return result;
        } catch (Exception e) {
            performanceMetrics.recordErrorCount(methodName, e.getClass().getSimpleName());
            throw e;
        } finally {
            sample.stop(Timer.builder("method.duration")
                .tag("method", methodName)
                .register(meterRegistry));
        }
    }
}

// Custom annotation for monitoring
@Target(ElementType.METHOD)
@Retention(RetentionPolicy.RUNTIME)
public @interface Monitored {
    String value() default "";
}

Custom Performance Monitoring

class PerformanceMonitor {
  constructor() {
    this.metrics = new Map();
    this.startTimes = new Map();
  }
  
  // Start timing an operation
  startTimer(operationId) {
    this.startTimes.set(operationId, process.hrtime.bigint());
  }
  
  // End timing and record metric
  endTimer(operationId, tags = {}) {
    const startTime = this.startTimes.get(operationId);
    if (!startTime) return;
    
    const endTime = process.hrtime.bigint();
    const duration = Number(endTime - startTime) / 1000000; // Convert to milliseconds
    
    this.recordMetric('operation_duration', duration, {
      operation: operationId,
      ...tags
    });
    
    this.startTimes.delete(operationId);
  }
  
  // Record a metric value
  recordMetric(name, value, tags = {}) {
    const key = `${name}_${JSON.stringify(tags)}`;
    
    if (!this.metrics.has(key)) {
      this.metrics.set(key, {
        name,
        tags,
        values: [],
        count: 0,
        sum: 0,
        min: Infinity,
        max: -Infinity
      });
    }
    
    const metric = this.metrics.get(key);
    metric.values.push(value);
    metric.count++;
    metric.sum += value;
    metric.min = Math.min(metric.min, value);
    metric.max = Math.max(metric.max, value);
    
    // Keep only last 1000 values to prevent memory leaks
    if (metric.values.length > 1000) {
      metric.values = metric.values.slice(-1000);
    }
  }
  
  // Get percentile value
  getPercentile(values, percentile) {
    const sorted = [...values].sort((a, b) => a - b);
    const index = Math.ceil((percentile / 100) * sorted.length) - 1;
    return sorted[index] || 0;
  }
  
  // Get metrics summary
  getMetricsSummary() {
    const summary = {};
    
    for (const [key, metric] of this.metrics) {
      if (metric.count === 0) continue;
      
      summary[metric.name] = {
        count: metric.count,
        sum: metric.sum,
        avg: metric.sum / metric.count,
        min: metric.min,
        max: metric.max,
        p50: this.getPercentile(metric.values, 50),
        p95: this.getPercentile(metric.values, 95),
        p99: this.getPercentile(metric.values, 99),
        tags: metric.tags
      };
    }
    
    return summary;
  }
}

// Global performance monitor instance
const perfMonitor = new PerformanceMonitor();

Application Performance Monitoring (APM)

Express.js Middleware for Request Tracking

const express = require('express');
const app = express();

// Request timing middleware
app.use((req, res, next) => {
  const requestId = `${req.method}_${req.path}_${Date.now()}`;
  req.requestId = requestId;
  
  perfMonitor.startTimer(requestId);
  
  // Override res.end to capture response time
  const originalEnd = res.end;
  res.end = function(...args) {
    perfMonitor.endTimer(requestId, {
      method: req.method,
      path: req.path,
      statusCode: res.statusCode
    });
    
    originalEnd.apply(this, args);
  };
  
  next();
});

// Database query monitoring
function monitorDatabaseQuery(query, params) {
  const queryId = `db_query_${Date.now()}`;
  perfMonitor.startTimer(queryId);
  
  return query.then(result => {
    perfMonitor.endTimer(queryId, {
      type: 'database',
      query: query.sql || 'unknown'
    });
    return result;
  }).catch(error => {
    perfMonitor.endTimer(queryId, {
      type: 'database',
      query: query.sql || 'unknown',
      error: true
    });
    throw error;
  });
}

// Usage example
app.get('/api/users', async (req, res) => {
  try {
    const users = await monitorDatabaseQuery(
      db.query('SELECT * FROM users')
    );
    
    res.json(users);
  } catch (error) {
    res.status(500).json({ error: 'Internal server error' });
  }
});

Memory and CPU Monitoring

const os = require('os');
const process = require('process');

class SystemMonitor {
  constructor() {
    this.startTime = Date.now();
    this.startCpuUsage = process.cpuUsage();
  }
  
  getSystemMetrics() {
    const memUsage = process.memoryUsage();
    const cpuUsage = process.cpuUsage(this.startCpuUsage);
    
    return {
      // Memory metrics
      memory: {
        rss: Math.round(memUsage.rss / 1024 / 1024), // MB
        heapTotal: Math.round(memUsage.heapTotal / 1024 / 1024),
        heapUsed: Math.round(memUsage.heapUsed / 1024 / 1024),
        external: Math.round(memUsage.external / 1024 / 1024),
        systemTotal: Math.round(os.totalmem() / 1024 / 1024),
        systemFree: Math.round(os.freemem() / 1024 / 1024)
      },
      
      // CPU metrics
      cpu: {
        user: cpuUsage.user / 1000000, // Convert to seconds
        system: cpuUsage.system / 1000000,
        cores: os.cpus().length,
        loadAverage: os.loadavg()
      },
      
      // Process metrics
      process: {
        uptime: Math.round((Date.now() - this.startTime) / 1000),
        pid: process.pid,
        version: process.version,
        platform: process.platform
      }
    };
  }
  
  // Check if system is under stress
  isSystemStressed() {
    const metrics = this.getSystemMetrics();
    const loadAvg = metrics.cpu.loadAverage[0];
    const memoryUsagePercent = (metrics.memory.heapUsed / metrics.memory.heapTotal) * 100;
    
    return {
      highLoad: loadAvg > os.cpus().length * 0.8,
      highMemory: memoryUsagePercent > 80,
      lowMemory: metrics.memory.systemFree < 100 // Less than 100MB free
    };
  }
}

const systemMonitor = new SystemMonitor();

// Periodic system monitoring
setInterval(() => {
  const metrics = systemMonitor.getSystemMetrics();
  const stress = systemMonitor.isSystemStressed();
  
  // Log metrics every 30 seconds
  console.log('System Metrics:', {
    memory: metrics.memory,
    cpu: metrics.cpu,
    stress
  });
  
  // Alert on high stress
  if (stress.highLoad || stress.highMemory || stress.lowMemory) {
    console.warn('System under stress:', stress);
  }
}, 30000);

Database Performance Monitoring

Query Performance Tracking

const { Pool } = require('pg');

class MonitoredPool extends Pool {
  constructor(config) {
    super(config);
    this.queryCount = 0;
    this.slowQueries = [];
    this.queryTimes = [];
  }
  
  async query(text, params) {
    const startTime = process.hrtime.bigint();
    this.queryCount++;
    
    try {
      const result = await super.query(text, params);
      const endTime = process.hrtime.bigint();
      const duration = Number(endTime - startTime) / 1000000; // Convert to ms
      
      // Record query performance
      this.queryTimes.push(duration);
      
      // Track slow queries
      if (duration > 1000) { // Queries slower than 1 second
        this.slowQueries.push({
          query: text,
          params,
          duration,
          timestamp: new Date()
        });
        
        console.warn(`Slow query detected: ${duration}ms`, {
          query: text.substring(0, 100) + '...',
          params
        });
      }
      
      return result;
    } catch (error) {
      const endTime = process.hrtime.bigint();
      const duration = Number(endTime - startTime) / 1000000;
      
      console.error('Database query error:', {
        query: text,
        params,
        duration,
        error: error.message
      });
      
      throw error;
    }
  }
  
  getQueryStats() {
    const avgTime = this.queryTimes.length > 0 
      ? this.queryTimes.reduce((a, b) => a + b, 0) / this.queryTimes.length 
      : 0;
    
    return {
      totalQueries: this.queryCount,
      averageQueryTime: Math.round(avgTime),
      slowQueries: this.slowQueries.length,
      recentSlowQueries: this.slowQueries.slice(-10)
    };
  }
}

// Use monitored pool
const pool = new MonitoredPool({
  connectionString: process.env.DATABASE_URL
});

// Log query stats every minute
setInterval(() => {
  const stats = pool.getQueryStats();
  console.log('Database Performance:', stats);
}, 60000);

Real-time Monitoring Dashboard

Express.js Health Check Endpoint

app.get('/health', (req, res) => {
  const systemMetrics = systemMonitor.getSystemMetrics();
  const performanceMetrics = perfMonitor.getMetricsSummary();
  const dbStats = pool.getQueryStats();
  const stress = systemMonitor.isSystemStressed();
  
  const health = {
    status: stress.highLoad || stress.highMemory ? 'degraded' : 'healthy',
    timestamp: new Date().toISOString(),
    uptime: process.uptime(),
    system: systemMetrics,
    performance: performanceMetrics,
    database: dbStats,
    alerts: {
      highLoad: stress.highLoad,
      highMemory: stress.highMemory,
      lowMemory: stress.lowMemory
    }
  };
  
  const statusCode = health.status === 'healthy' ? 200 : 503;
  res.status(statusCode).json(health);
});

// Detailed metrics endpoint
app.get('/metrics', (req, res) => {
  const metrics = {
    performance: perfMonitor.getMetricsSummary(),
    system: systemMonitor.getSystemMetrics(),
    database: pool.getQueryStats()
  };
  
  res.json(metrics);
});

Performance Optimization Strategies

Caching Implementation

const NodeCache = require('node-cache');
const Redis = require('redis');

class CacheManager {
  constructor() {
    this.memoryCache = new NodeCache({ stdTTL: 600 }); // 10 minutes
    this.redis = Redis.createClient({
      host: process.env.REDIS_HOST,
      port: process.env.REDIS_PORT
    });
  }
  
  async get(key) {
    // Try memory cache first
    let value = this.memoryCache.get(key);
    if (value) {
      perfMonitor.recordMetric('cache_hit', 1, { type: 'memory' });
      return value;
    }
    
    // Try Redis cache
    try {
      const redisValue = await this.redis.get(key);
      if (redisValue) {
        const parsed = JSON.parse(redisValue);
        this.memoryCache.set(key, parsed); // Populate memory cache
        perfMonitor.recordMetric('cache_hit', 1, { type: 'redis' });
        return parsed;
      }
    } catch (error) {
      console.error('Redis cache error:', error);
    }
    
    perfMonitor.recordMetric('cache_miss', 1, { type: 'all' });
    return null;
  }
  
  async set(key, value, ttl = 3600) {
    // Set in memory cache
    this.memoryCache.set(key, value);
    
    // Set in Redis cache
    try {
      await this.redis.setex(key, ttl, JSON.stringify(value));
    } catch (error) {
      console.error('Redis cache set error:', error);
    }
  }
  
  async del(key) {
    this.memoryCache.del(key);
    try {
      await this.redis.del(key);
    } catch (error) {
      console.error('Redis cache delete error:', error);
    }
  }
}

const cache = new CacheManager();

// Cached database query
async function getCachedUser(userId) {
  const cacheKey = `user:${userId}`;
  
  let user = await cache.get(cacheKey);
  if (user) {
    return user;
  }
  
  // Fetch from database
  const result = await pool.query('SELECT * FROM users WHERE id = $1', [userId]);
  user = result.rows[0];
  
  if (user) {
    await cache.set(cacheKey, user, 1800); // 30 minutes
  }
  
  return user;
}

Connection Pooling Optimization

// Optimized database connection pool
const optimizedPool = new Pool({
  connectionString: process.env.DATABASE_URL,
  max: 20, // Maximum number of clients in the pool
  min: 5,  // Minimum number of clients in the pool
  idleTimeoutMillis: 30000, // Close idle clients after 30 seconds
  connectionTimeoutMillis: 2000, // Return an error after 2 seconds
  acquireTimeoutMillis: 30000, // Return an error after 30 seconds
  createTimeoutMillis: 30000,
  destroyTimeoutMillis: 5000,
  reapIntervalMillis: 1000,
  createRetryIntervalMillis: 200,
  // SSL configuration for production
  ssl: process.env.NODE_ENV === 'production' ? {
    rejectUnauthorized: false
  } : false
});

// Pool monitoring
setInterval(() => {
  const poolStats = {
    totalCount: optimizedPool.totalCount,
    idleCount: optimizedPool.idleCount,
    waitingCount: optimizedPool.waitingCount
  };
  
  console.log('Pool Stats:', poolStats);
  
  // Alert if pool is exhausted
  if (poolStats.waitingCount > 5) {
    console.warn('High connection pool wait time!');
  }
}, 30000);

Alerting and Notifications

Performance Alert System

class AlertManager {
  constructor() {
    this.alerts = new Map();
    this.thresholds = {
      responseTime: 2000, // 2 seconds
      errorRate: 0.05,    // 5%
      memoryUsage: 0.8,   // 80%
      cpuUsage: 0.8       // 80%
    };
  }
  
  checkAlerts(metrics) {
    const alerts = [];
    
    // Response time alert
    if (metrics.performance?.operation_duration?.p95 > this.thresholds.responseTime) {
      alerts.push({
        type: 'HIGH_RESPONSE_TIME',
        message: `95th percentile response time is ${metrics.performance.operation_duration.p95}ms`,
        severity: 'warning'
      });
    }
    
    // Error rate alert
    const errorRate = metrics.errors?.errorRate || 0;
    if (errorRate > this.thresholds.errorRate) {
      alerts.push({
        type: 'HIGH_ERROR_RATE',
        message: `Error rate is ${(errorRate * 100).toFixed(2)}%`,
        severity: 'critical'
      });
    }
    
    // Memory usage alert
    const memoryUsage = metrics.system?.memory?.heapUsed / metrics.system?.memory?.heapTotal || 0;
    if (memoryUsage > this.thresholds.memoryUsage) {
      alerts.push({
        type: 'HIGH_MEMORY_USAGE',
        message: `Memory usage is ${(memoryUsage * 100).toFixed(2)}%`,
        severity: 'warning'
      });
    }
    
    return alerts;
  }
  
  async sendAlert(alert) {
    // Send to monitoring service (e.g., DataDog, New Relic, etc.)
    console.warn('ALERT:', alert);
    
    // You can integrate with external services here
    // await sendToSlack(alert);
    // await sendToEmail(alert);
  }
}

const alertManager = new AlertManager();

// Check alerts every 30 seconds
setInterval(() => {
  const metrics = {
    performance: perfMonitor.getMetricsSummary(),
    system: systemMonitor.getSystemMetrics(),
    errors: { errorRate: 0 } // Calculate from your error tracking
  };
  
  const alerts = alertManager.checkAlerts(metrics);
  alerts.forEach(alert => alertManager.sendAlert(alert));
}, 30000);

Conclusion

Effective performance monitoring requires:

Comprehensive metrics collection - Track all relevant KPIs
Real-time monitoring - Detect issues as they happen
Proactive alerting - Get notified before problems escalate
Performance optimization - Continuously improve based on data
Regular analysis - Review trends and patterns

Remember: Monitoring without action is just data collection. Use the insights to continuously improve your application's performance and reliability.

Backend Performance Monitoring & Optimization

Backend Performance Monitoring & Optimization

Performance Metrics to Track

Key Performance Indicators (KPIs)

Custom Performance Monitoring

Application Performance Monitoring (APM)

Express.js Middleware for Request Tracking

Memory and CPU Monitoring

Database Performance Monitoring

Query Performance Tracking

Real-time Monitoring Dashboard

Express.js Health Check Endpoint

Performance Optimization Strategies

Caching Implementation

Connection Pooling Optimization

Alerting and Notifications

Performance Alert System

Conclusion

Related Articles

Incident Playbook for Beginners: Real-World Monitoring and Troubleshooting Stories

System Design Power-Guide 2025: What To Learn, In What Order, With Real-World Links

DSA Patterns Master Guide: How To Identify Problems, Pick Patterns, and Practice (With LeetCode Sets)