Performance Profiling Fundamentals

Performance profiling identifies bottlenecks in CPU usage, memory allocation, network I/O, and database queries. For QA engineers, profiling transforms vague “slow application” reports into actionable optimization targets.

Key Profiling Areas

  • CPU Profiling - Identify hot code paths consuming CPU cycles
  • Memory Profiling - Find memory allocations and leaks
  • I/O Profiling - Detect slow disk and network operations
  • Database Profiling - Optimize queries and connection pooling

CPU Profiling

Node.js CPU Profiling

# Built-in profiler
node --prof app.js

# Generate human-readable report
node --prof-process isolate-0x*.log > profile.txt

# Using clinic flame
npm install -g clinic
clinic flame -- node app.js

# Generate load
ab -n 10000 -c 100 http://localhost:3000/

# Stop and view flame graph
// Programmatic profiling
const { Session } = require('inspector');
const fs = require('fs');

const session = new Session();
session.connect();

// Start profiling
session.post('Profiler.enable');
session.post('Profiler.start');

// Run workload
performHeavyOperation();

// Stop profiling
session.post('Profiler.stop', (err, { profile }) => {
    fs.writeFileSync('./profile.cpuprofile', JSON.stringify(profile));
    session.disconnect();
});

// Load profile.cpuprofile in Chrome DevTools

Python CPU Profiling

# cProfile - Built-in profiler
import cProfile
import pstats

def slow_function():
    total = 0
    for i in range(1000000):
        total += i
    return total

# Profile execution
cProfile.run('slow_function()', 'output.prof')

# Analyze results
stats = pstats.Stats('output.prof')
stats.sort_stats('cumulative')
stats.print_stats(10)  # Top 10 functions

# Output:
#    ncalls  tottime  percall  cumtime  percall filename:lineno(function)
#         1    0.052    0.052    0.052    0.052 profile.py:4(slow_function)
# py-spy - Sampling profiler (no code changes)
# Install: pip install py-spy

# Record flame graph
py-spy record -o profile.svg -- python app.py

# Live top view
py-spy top --pid 12345

# Export to speedscope format
py-spy record -f speedscope -o profile.json -- python app.py

Go CPU Profiling

// pprof profiling
package main

import (
    "log"
    "net/http"
    _ "net/http/pprof"
    "runtime/pprof"
)

func main() {
    // HTTP endpoint for profiling
    go func() {
        log.Println(http.ListenAndServe("localhost:6060", nil))
    }()

    // Run application
    runApp()
}

// Capture profile
// go tool pprof http://localhost:6060/debug/pprof/profile?seconds=30

// Visualize
// go tool pprof -http=:8080 profile.pb.gz

Flame Graphs

Generating Flame Graphs

# Install FlameGraph tools
git clone https://github.com/brendangregg/FlameGraph.git

# Capture stack traces (Linux perf)
perf record -F 99 -p <PID> -g -- sleep 30
perf script > out.perf

# Generate flame graph
./FlameGraph/stackcollapse-perf.pl out.perf > out.folded
./FlameGraph/flamegraph.pl out.folded > flamegraph.svg

# Open flamegraph.svg in browser

Reading Flame Graphs:

  • Width - Percentage of samples (CPU time)
  • Height - Stack depth (call chain)
  • Color - Random (for differentiation)
  • Plateaus - Functions consuming CPU

Interactive Flame Graphs with Speedscope

# Install speedscope
npm install -g speedscope

# Convert profile to speedscope format
# For Node.js
node --cpu-prof app.js
speedscope CPU.*.cpuprofile

# For Python (py-spy)
py-spy record -f speedscope -o profile.json -- python app.py
speedscope profile.json

Memory Profiling

Node.js Memory Profiling

// Track memory usage
const v8 = require('v8');
const fs = require('fs');

function takeHeapSnapshot(filename) {
    const snapshot = v8.writeHeapSnapshot(filename);
    console.log(`Heap snapshot written to ${snapshot}`);
}

// Before operation
takeHeapSnapshot('./heap-before.heapsnapshot');

// Perform memory-intensive operation
let leakyArray = [];
for (let i = 0; i < 1000000; i++) {
    leakyArray.push({ data: new Array(100).fill(i) });
}

// After operation
takeHeapSnapshot('./heap-after.heapsnapshot');

// Compare snapshots in Chrome DevTools
// DevTools → Memory → Load snapshot
// Real-time memory monitoring
setInterval(() => {
    const usage = process.memoryUsage();

    console.log({
        rss: `${Math.round(usage.rss / 1024 / 1024)} MB`,        // Total memory
        heapTotal: `${Math.round(usage.heapTotal / 1024 / 1024)} MB`,
        heapUsed: `${Math.round(usage.heapUsed / 1024 / 1024)} MB`,
        external: `${Math.round(usage.external / 1024 / 1024)} MB`
    });
}, 5000);

Python Memory Profiling

# memory_profiler
from memory_profiler import profile

@profile
def memory_intensive_function():
    large_list = [i for i in range(1000000)]
    large_dict = {i: str(i) for i in range(100000)}
    return len(large_list) + len(large_dict)

# Run: python -m memory_profiler script.py

# Output:
# Line    Mem usage    Increment   Line Contents
#    3     50.0 MiB     50.0 MiB   @profile
#    4                             def memory_intensive_function():
#    5     88.2 MiB     38.2 MiB       large_list = [...]
#    6    103.5 MiB     15.3 MiB       large_dict = {...}

Java Memory Profiling

# Java Flight Recorder
java -XX:StartFlightRecording=duration=60s,filename=recording.jfr -jar app.jar

# Analyze with JDK Mission Control
jmc recording.jfr

# Or use jcmd
jcmd <PID> JFR.start duration=60s filename=recording.jfr
jcmd <PID> JFR.stop

Database Query Profiling

PostgreSQL Query Analysis

-- Enable query timing
\timing on

-- Explain query plan
EXPLAIN ANALYZE SELECT * FROM users WHERE email = 'test@example.com';

-- Output:
-- Seq Scan on users  (cost=0.00..1000.00 rows=1 width=100) (actual time=0.050..10.234 rows=1 loops=1)
--   Filter: (email = 'test@example.com')
--   Rows Removed by Filter: 99999
-- Planning Time: 0.150 ms
-- Execution Time: 10.300 ms

-- Add index to optimize
CREATE INDEX idx_users_email ON users(email);

-- Verify improvement
EXPLAIN ANALYZE SELECT * FROM users WHERE email = 'test@example.com';

-- Output:
-- Index Scan using idx_users_email on users (cost=0.42..8.44 rows=1) (actual time=0.020..0.025 rows=1)
--   Index Cond: (email = 'test@example.com')
-- Execution Time: 0.050 ms

MongoDB Query Profiling

// Enable profiling
db.setProfilingLevel(2)  // Profile all queries

// Query with explain
db.users.find({ email: 'test@example.com' }).explain('executionStats')

// Output shows:
// - executionTimeMillis
// - totalDocsExamined
// - indexesUsed

// Create index
db.users.createIndex({ email: 1 })

// Check slow queries
db.system.profile.find({ millis: { $gt: 100 } }).sort({ ts: -1 })

ORM Query Optimization

# Django query profiling
from django.db import connection
from django.test.utils import override_settings

@override_settings(DEBUG=True)
def profile_queries():
    # N+1 query problem
    posts = Post.objects.all()
    for post in posts:
        print(post.author.name)  # Separate query for each post

    print(f"Queries executed: {len(connection.queries)}")

    # Optimized with select_related
    posts = Post.objects.select_related('author').all()
    for post in posts:
        print(post.author.name)  # Single join query

    print(f"Queries executed: {len(connection.queries)}")

# Use django-debug-toolbar for visual profiling

Network I/O Profiling

HTTP Request Timing

// Measure request breakdown
const https = require('https');
const { performance } = require('perf_hooks');

function profileHTTPRequest(url) {
    const timings = {
        dnsLookup: 0,
        tcpConnection: 0,
        tlsHandshake: 0,
        firstByte: 0,
        download: 0,
        total: 0
    };

    const start = performance.now();

    const req = https.request(url, (res) => {
        let data = '';

        res.on('data', (chunk) => {
            data += chunk;
        });

        res.on('end', () => {
            timings.total = performance.now() - start;
            console.log(timings);
        });
    });

    req.on('socket', (socket) => {
        socket.on('lookup', () => {
            timings.dnsLookup = performance.now() - start;
        });

        socket.on('connect', () => {
            timings.tcpConnection = performance.now() - start;
        });

        socket.on('secureConnect', () => {
            timings.tlsHandshake = performance.now() - start;
        });
    });

    req.end();
}

profileHTTPRequest('https://api.example.com/data');

Application Performance Monitoring (APM)

New Relic

// newrelic.js configuration
exports.config = {
    app_name: ['My Application'],
    license_key: 'your_license_key',
    logging: {
        level: 'info'
    },
    transaction_tracer: {
        enabled: true
    }
};

// Require at app start
require('newrelic');
const express = require('express');

// Custom transactions
const newrelic = require('newrelic');

app.get('/api/data', (req, res) => {
    newrelic.startWebTransaction('/api/data', async () => {
        const segment = newrelic.startSegment('database-query' (as discussed in [API Performance Testing: Metrics and Tools](/blog/api-performance-testing)), true, async () => {
            return await database.query('SELECT * FROM data');
        });

        res.json(await segment);
    });
});

Datadog APM

# Datadog Python APM
from ddtrace import tracer

@tracer.wrap(service='my-app', resource='expensive-operation')
def expensive_operation():
    # Custom span
    with tracer.trace('database-query' (as discussed in [Database Performance Testing: Query Optimization](/blog/database-performance-testing)), service='postgres'):
        result = db.query('SELECT * FROM large_table')

    with tracer.trace('data-processing'):
        processed = process_data(result)

    return processed

Optimization Strategies

CPU Optimization

# Before: O(n²) algorithm
def find_duplicates_slow(arr):
    duplicates = []
    for i in range(len(arr)):
        for j in range(i + 1, len(arr)):
            if arr[i] == arr[j]:
                duplicates.append(arr[i])
    return duplicates

# After: O(n) algorithm
def find_duplicates_fast(arr):
    seen = set()
    duplicates = set()
    for item in arr:
        if item in seen:
            duplicates.add(item)
        seen.add(item)
    return list(duplicates)

# Benchmark
import timeit

arr = list(range(1000)) * 2
print(f"Slow: {timeit.timeit(lambda: find_duplicates_slow(arr), number=10)}")
print(f"Fast: {timeit.timeit(lambda: find_duplicates_fast(arr), number=10)}")

Memory Optimization

// Before: Storing entire dataset in memory
function processLargeFile(filename) {
    const data = fs.readFileSync(filename, 'utf8');  // Loads entire file
    return data.split('\n').map(line => processLine(line));
}

// After: Streaming processing
function processLargeFileOptimized(filename) {
    return new Promise((resolve) => {
        const results = [];
        const stream = fs.createReadStream(filename);
        const rl = readline.createInterface({ input: stream });

        rl.on('line', (line) => {
            results.push(processLine(line));
        });

        rl.on('close', () => resolve(results));
    });
}

Conclusion

Performance profiling transforms guesswork into data-driven optimization. By systematically profiling CPU, memory, network, and database (as discussed in Database Testing Deep Dive: From Integrity to Performance) performance, QA engineers identify bottlenecks and validate improvements with measurable metrics.

Key Takeaways:

  • Profile before optimizing - measure don’t guess
  • Use flame graphs to visualize CPU hotspots
  • Track memory allocations to prevent leaks
  • Optimize database queries with explain plans
  • Monitor production with APM tools
  • Focus on algorithmic improvements first
  • Validate optimizations with benchmarks