Performance Profiling Guide: CPU, Memory, Network Optimization

Performance Profiling Fundamentals

Performance profiling identifies bottlenecks in CPU usage, memory allocation, network I/O, and database queries. For QA engineers, profiling transforms vague “slow application” reports into actionable optimization targets.

Key Profiling Areas

CPU Profiling - Identify hot code paths consuming CPU cycles
Memory Profiling - Find memory allocations and leaks
I/O Profiling - Detect slow disk and network operations
Database Profiling - Optimize queries and connection pooling

CPU Profiling

Node.js CPU Profiling

# Built-in profiler
node --prof app.js

# Generate human-readable report
node --prof-process isolate-0x*.log > profile.txt

# Using clinic flame
npm install -g clinic
clinic flame -- node app.js

# Generate load
ab -n 10000 -c 100 http://localhost:3000/

# Stop and view flame graph

// Programmatic profiling
const { Session } = require('inspector');
const fs = require('fs');

const session = new Session();
session.connect();

// Start profiling
session.post('Profiler.enable');
session.post('Profiler.start');

// Run workload
performHeavyOperation();

// Stop profiling
session.post('Profiler.stop', (err, { profile }) => {
    fs.writeFileSync('./profile.cpuprofile', JSON.stringify(profile));
    session.disconnect();
});

// Load profile.cpuprofile in Chrome DevTools

Python CPU Profiling

# cProfile - Built-in profiler
import cProfile
import pstats

def slow_function():
    total = 0
    for i in range(1000000):
        total += i
    return total

# Profile execution
cProfile.run('slow_function()', 'output.prof')

# Analyze results
stats = pstats.Stats('output.prof')
stats.sort_stats('cumulative')
stats.print_stats(10)  # Top 10 functions

# Output:
#    ncalls  tottime  percall  cumtime  percall filename:lineno(function)
#         1    0.052    0.052    0.052    0.052 profile.py:4(slow_function)

# py-spy - Sampling profiler (no code changes)
# Install: pip install py-spy

# Record flame graph
py-spy record -o profile.svg -- python app.py

# Live top view
py-spy top --pid 12345

# Export to speedscope format
py-spy record -f speedscope -o profile.json -- python app.py

Go CPU Profiling

// pprof profiling
package main

import (
    "log"
    "net/http"
    _ "net/http/pprof"
    "runtime/pprof"
)

func main() {
    // HTTP endpoint for profiling
    go func() {
        log.Println(http.ListenAndServe("localhost:6060", nil))
    }()

    // Run application
    runApp()
}

// Capture profile
// go tool pprof http://localhost:6060/debug/pprof/profile?seconds=30

// Visualize
// go tool pprof -http=:8080 profile.pb.gz

Flame Graphs

Generating Flame Graphs

# Install FlameGraph tools
git clone https://github.com/brendangregg/FlameGraph.git

# Capture stack traces (Linux perf)
perf record -F 99 -p <PID> -g -- sleep 30
perf script > out.perf

# Generate flame graph
./FlameGraph/stackcollapse-perf.pl out.perf > out.folded
./FlameGraph/flamegraph.pl out.folded > flamegraph.svg

# Open flamegraph.svg in browser

Reading Flame Graphs:

Width - Percentage of samples (CPU time)
Height - Stack depth (call chain)
Color - Random (for differentiation)
Plateaus - Functions consuming CPU

Interactive Flame Graphs with Speedscope

# Install speedscope
npm install -g speedscope

# Convert profile to speedscope format
# For Node.js
node --cpu-prof app.js
speedscope CPU.*.cpuprofile

# For Python (py-spy)
py-spy record -f speedscope -o profile.json -- python app.py
speedscope profile.json

Memory Profiling

Node.js Memory Profiling

// Track memory usage
const v8 = require('v8');
const fs = require('fs');

function takeHeapSnapshot(filename) {
    const snapshot = v8.writeHeapSnapshot(filename);
    console.log(`Heap snapshot written to ${snapshot}`);
}

// Before operation
takeHeapSnapshot('./heap-before.heapsnapshot');

// Perform memory-intensive operation
let leakyArray = [];
for (let i = 0; i < 1000000; i++) {
    leakyArray.push({ data: new Array(100).fill(i) });
}

// After operation
takeHeapSnapshot('./heap-after.heapsnapshot');

// Compare snapshots in Chrome DevTools
// DevTools → Memory → Load snapshot

// Real-time memory monitoring
setInterval(() => {
    const usage = process.memoryUsage();

    console.log({
        rss: `${Math.round(usage.rss / 1024 / 1024)} MB`,        // Total memory
        heapTotal: `${Math.round(usage.heapTotal / 1024 / 1024)} MB`,
        heapUsed: `${Math.round(usage.heapUsed / 1024 / 1024)} MB`,
        external: `${Math.round(usage.external / 1024 / 1024)} MB`
    });
}, 5000);

Python Memory Profiling

# memory_profiler
from memory_profiler import profile

@profile
def memory_intensive_function():
    large_list = [i for i in range(1000000)]
    large_dict = {i: str(i) for i in range(100000)}
    return len(large_list) + len(large_dict)

# Run: python -m memory_profiler script.py

# Output:
# Line    Mem usage    Increment   Line Contents
#    3     50.0 MiB     50.0 MiB   @profile
#    4                             def memory_intensive_function():
#    5     88.2 MiB     38.2 MiB       large_list = [...]
#    6    103.5 MiB     15.3 MiB       large_dict = {...}

Java Memory Profiling

# Java Flight Recorder
java -XX:StartFlightRecording=duration=60s,filename=recording.jfr -jar app.jar

# Analyze with JDK Mission Control
jmc recording.jfr

# Or use jcmd
jcmd <PID> JFR.start duration=60s filename=recording.jfr
jcmd <PID> JFR.stop

Database Query Profiling

PostgreSQL Query Analysis

-- Enable query timing
\timing on

-- Explain query plan
EXPLAIN ANALYZE SELECT * FROM users WHERE email = 'test@example.com';

-- Output:
-- Seq Scan on users  (cost=0.00..1000.00 rows=1 width=100) (actual time=0.050..10.234 rows=1 loops=1)
--   Filter: (email = 'test@example.com')
--   Rows Removed by Filter: 99999
-- Planning Time: 0.150 ms
-- Execution Time: 10.300 ms

-- Add index to optimize
CREATE INDEX idx_users_email ON users(email);

-- Verify improvement
EXPLAIN ANALYZE SELECT * FROM users WHERE email = 'test@example.com';

-- Output:
-- Index Scan using idx_users_email on users (cost=0.42..8.44 rows=1) (actual time=0.020..0.025 rows=1)
--   Index Cond: (email = 'test@example.com')
-- Execution Time: 0.050 ms

MongoDB Query Profiling

// Enable profiling
db.setProfilingLevel(2)  // Profile all queries

// Query with explain
db.users.find({ email: 'test@example.com' }).explain('executionStats')

// Output shows:
// - executionTimeMillis
// - totalDocsExamined
// - indexesUsed

// Create index
db.users.createIndex({ email: 1 })

// Check slow queries
db.system.profile.find({ millis: { $gt: 100 } }).sort({ ts: -1 })

ORM Query Optimization

# Django query profiling
from django.db import connection
from django.test.utils import override_settings

@override_settings(DEBUG=True)
def profile_queries():
    # N+1 query problem
    posts = Post.objects.all()
    for post in posts:
        print(post.author.name)  # Separate query for each post

    print(f"Queries executed: {len(connection.queries)}")

    # Optimized with select_related
    posts = Post.objects.select_related('author').all()
    for post in posts:
        print(post.author.name)  # Single join query

    print(f"Queries executed: {len(connection.queries)}")

# Use django-debug-toolbar for visual profiling

Network I/O Profiling

HTTP Request Timing

// Measure request breakdown
const https = require('https');
const { performance } = require('perf_hooks');

function profileHTTPRequest(url) {
    const timings = {
        dnsLookup: 0,
        tcpConnection: 0,
        tlsHandshake: 0,
        firstByte: 0,
        download: 0,
        total: 0
    };

    const start = performance.now();

    const req = https.request(url, (res) => {
        let data = '';

        res.on('data', (chunk) => {
            data += chunk;
        });

        res.on('end', () => {
            timings.total = performance.now() - start;
            console.log(timings);
        });
    });

    req.on('socket', (socket) => {
        socket.on('lookup', () => {
            timings.dnsLookup = performance.now() - start;
        });

        socket.on('connect', () => {
            timings.tcpConnection = performance.now() - start;
        });

        socket.on('secureConnect', () => {
            timings.tlsHandshake = performance.now() - start;
        });
    });

    req.end();
}

profileHTTPRequest('https://api.example.com/data');

Application Performance Monitoring (APM)

New Relic

// newrelic.js configuration
exports.config = {
    app_name: ['My Application'],
    license_key: 'your_license_key',
    logging: {
        level: 'info'
    },
    transaction_tracer: {
        enabled: true
    }
};

// Require at app start
require('newrelic');
const express = require('express');

// Custom transactions
const newrelic = require('newrelic');

app.get('/api/data', (req, res) => {
    newrelic.startWebTransaction('/api/data', async () => {
        const segment = newrelic.startSegment('database-query' (as discussed in [API Performance Testing: Metrics and Tools](/blog/api-performance-testing)), true, async () => {
            return await database.query('SELECT * FROM data');
        });

        res.json(await segment);
    });
});

Datadog APM

# Datadog Python APM
from ddtrace import tracer

@tracer.wrap(service='my-app', resource='expensive-operation')
def expensive_operation():
    # Custom span
    with tracer.trace('database-query' (as discussed in [Database Performance Testing: Query Optimization](/blog/database-performance-testing)), service='postgres'):
        result = db.query('SELECT * FROM large_table')

    with tracer.trace('data-processing'):
        processed = process_data(result)

    return processed

Optimization Strategies

CPU Optimization

# Before: O(n²) algorithm
def find_duplicates_slow(arr):
    duplicates = []
    for i in range(len(arr)):
        for j in range(i + 1, len(arr)):
            if arr[i] == arr[j]:
                duplicates.append(arr[i])
    return duplicates

# After: O(n) algorithm
def find_duplicates_fast(arr):
    seen = set()
    duplicates = set()
    for item in arr:
        if item in seen:
            duplicates.add(item)
        seen.add(item)
    return list(duplicates)

# Benchmark
import timeit

arr = list(range(1000)) * 2
print(f"Slow: {timeit.timeit(lambda: find_duplicates_slow(arr), number=10)}")
print(f"Fast: {timeit.timeit(lambda: find_duplicates_fast(arr), number=10)}")

Memory Optimization

// Before: Storing entire dataset in memory
function processLargeFile(filename) {
    const data = fs.readFileSync(filename, 'utf8');  // Loads entire file
    return data.split('\n').map(line => processLine(line));
}

// After: Streaming processing
function processLargeFileOptimized(filename) {
    return new Promise((resolve) => {
        const results = [];
        const stream = fs.createReadStream(filename);
        const rl = readline.createInterface({ input: stream });

        rl.on('line', (line) => {
            results.push(processLine(line));
        });

        rl.on('close', () => resolve(results));
    });
}

Conclusion

Performance profiling transforms guesswork into data-driven optimization. By systematically profiling CPU, memory, network, and database (as discussed in Database Testing Deep Dive: From Integrity to Performance) performance, QA engineers identify bottlenecks and validate improvements with measurable metrics.

Key Takeaways:

Profile before optimizing - measure don’t guess
Use flame graphs to visualize CPU hotspots
Track memory allocations to prevent leaks
Optimize database queries with explain plans
Monitor production with APM tools
Focus on algorithmic improvements first
Validate optimizations with benchmarks