SQL Query Optimizer

Optimize SQL queries for maximum performance.

EXPLAIN Analysis -- Original slow query EXPLAIN ANALYZE SELECT u.*, COUNT(o.id) as order_count FROM users u LEFT JOIN orders o ON o.user_id = u.id WHERE u.created_at > '2024-01-01' GROUP BY u.id ORDER BY order_count DESC LIMIT 10;

-- Output analysis: / Sort (cost=15234.32..15234.34 rows=10 width=120) (actual time=245.123..245.125 rows=10 loops=1) Sort Key: (count(o.id)) DESC -> HashAggregate (cost=15000.00..15100.00 rows=1000 width=120) (actual time=244.891..245.023 rows=1000 loops=1) Group Key: u.id -> Hash Left Join (cost=1234.56..14500.00 rows=50000 width=112) (actual time=12.345..230.456 rows=50000 loops=1) Hash Cond: (o.user_id = u.id) -> Seq Scan on orders o (cost=0.00..10000.00 rows=100000 width=8) (actual time=0.012..180.234 rows=100000 loops=1) -> Hash (cost=1000.00..1000.00 rows=5000 width=112) (actual time=10.234..10.234 rows=5000 loops=1) Buckets: 8192 Batches: 1 Memory Usage: 456kB -> Seq Scan on users u (cost=0.00..1000.00 rows=5000 width=112) (actual time=0.008..5.123 rows=5000 loops=1) Filter: (created_at > '2024-01-01'::date) Rows Removed by Filter: 1000 Planning Time: 0.234 ms Execution Time: 245.234 ms /

-- Issues identified: -- 1. Seq Scan on orders (no index on user_id) -- 2. Seq Scan on users (no index on created_at) -- 3. Full table scans expensive

Index Recommendations -- Problem: Sequential scans EXPLAIN ANALYZE SELECT * FROM orders WHERE user_id = 123; / Seq Scan on orders (cost=0.00..10000.00 rows=50 width=100) (actual time=0.012..89.456 rows=50 loops=1) Filter: (user_id = 123) Rows Removed by Filter: 99950 /

-- Solution: Add index CREATE INDEX idx_orders_user_id ON orders(user_id);

-- After index: / Index Scan using idx_orders_user_id on orders (cost=0.29..45.32 rows=50 width=100) (actual time=0.023..0.089 rows=50 loops=1) Index Cond: (user_id = 123) /

-- Performance: 89ms → 0.09ms (990x faster!)

Query Rewrites 1. Avoid SELECT * -- ❌ Bad: Fetches all columns SELECT * FROM users WHERE id = 123;

-- ✅ Good: Fetch only needed columns SELECT id, email, name FROM users WHERE id = 123;

-- Performance: 50% faster, less network transfer

Use EXISTS Instead of IN -- ❌ Slow: Subquery executed fully SELECT * FROM users WHERE id IN (SELECT user_id FROM orders WHERE total > 100);

-- ✅ Fast: Short-circuits on first match SELECT * FROM users u WHERE EXISTS ( SELECT 1 FROM orders o WHERE o.user_id = u.id AND o.total > 100 );

-- Performance: 3x faster on large datasets

Avoid Functions on Indexed Columns -- ❌ Bad: Index not used SELECT * FROM users WHERE LOWER(email) = 'john@example.com';

-- ✅ Good: Index scan possible SELECT * FROM users WHERE email = 'john@example.com';

-- Or create functional index: CREATE INDEX idx_users_email_lower ON users(LOWER(email));

Use Covering Indexes -- Query needs: id, email, name SELECT id, email, name FROM users WHERE email = 'john@example.com';

-- Create covering index (includes all needed columns) CREATE INDEX idx_users_email_covering ON users(email) INCLUDE (id, name);

-- Result: Index-only scan (no table access needed)

Optimize JOIN Order -- ❌ Bad: Large table first SELECT * FROM orders o JOIN users u ON u.id = o.user_id WHERE u.email = 'john@example.com';

-- ✅ Good: Filter first, join second SELECT * FROM users u JOIN orders o ON o.user_id = u.id WHERE u.email = 'john@example.com';

-- Or use CTE for clarity: WITH filtered_users AS ( SELECT id FROM users WHERE email = 'john@example.com' ) SELECT o.* FROM orders o JOIN filtered_users u ON u.id = o.user_id;

Composite Indexes -- Query pattern: WHERE user_id = X AND status = 'active' ORDER BY created_at DESC CREATE INDEX idx_orders_user_status_created ON orders(user_id, status, created_at DESC);

-- Index column order matters! -- Rule: Equality filters → Range filters → Sort columns

-- Example queries that use this index: -- 1. SELECT * FROM orders WHERE user_id = 123; ✅ -- 2. SELECT * FROM orders WHERE user_id = 123 AND status = 'active'; ✅ -- 3. SELECT * FROM orders WHERE user_id = 123 ORDER BY created_at DESC; ✅ -- 4. SELECT * FROM orders WHERE status = 'active'; ❌ (doesn't start with user_id)

Query Performance Benchmarking // scripts/benchmark-queries.ts import { PrismaClient } from "@prisma/client"; import { performance } from "perf_hooks";

const prisma = new PrismaClient();

async function benchmarkQuery( name: string, query: () => Promise, iterations: number = 10 ) { const times: number[] = [];

for (let i = 0; i < iterations; i++) { const start = performance.now(); await query(); const end = performance.now(); times.push(end - start); }

const avg = times.reduce((a, b) => a + b, 0) / times.length; const min = Math.min(...times); const max = Math.max(...times);

console.log(\n${name}:); console.log(Avg: ${avg.toFixed(2)}ms); console.log(Min: ${min.toFixed(2)}ms); console.log(Max: ${max.toFixed(2)}ms);

return { avg, min, max }; }

// Compare queries async function compareQueries() { console.log("🔍 Benchmarking queries...\n");

// Query 1: Original const result1 = await benchmarkQuery("Original Query", async () => { return prisma.$queryRawSELECT u.*, COUNT(o.id) as order_count FROM users u LEFT JOIN orders o ON o.user_id = u.id GROUP BY u.id LIMIT 10; });

// Query 2: Optimized const result2 = await benchmarkQuery("Optimized Query", async () => { return prisma.$queryRawSELECT u.id, u.email, u.name, (SELECT COUNT(*) FROM orders WHERE user_id = u.id) as order_count FROM users u LIMIT 10; });

// Comparison const improvement = ( ((result1.avg - result2.avg) / result1.avg) * 100 ).toFixed(1); console.log(\n📊 Improvement: ${improvement}% faster); }

compareQueries();

Query Optimization Checklist interface QueryOptimization { query: string; issues: string[]; recommendations: string[]; estimatedImprovement: string; }

const optimizations: QueryOptimization[] = [ { query: "SELECT * FROM orders WHERE user_id = $1", issues: [ "Missing index on user_id", "SELECT * fetches unnecessary columns", ], recommendations: [ "CREATE INDEX idx_orders_user_id ON orders(user_id)", "SELECT id, total, status instead of ", ], estimatedImprovement: "90% faster", }, { query: "SELECT COUNT() FROM orders", issues: ["Full table scan", "No WHERE clause filtering"], recommendations: [ "Add WHERE clause to filter rows", "Consider approximate count for large tables", ], estimatedImprovement: "70% faster", }, ];

Automated Slow Query Detection // scripts/detect-slow-queries.ts async function detectSlowQueries() { // Enable slow query logging in PostgreSQL await prisma.$executeRawALTER DATABASE mydb SET log_min_duration_statement = 100;;

// Query pg_stat_statements for slow queries const slowQueries = await prisma.$queryRawSELECT query, calls, total_exec_time / 1000 as total_time_seconds, mean_exec_time / 1000 as mean_time_ms, max_exec_time / 1000 as max_time_ms FROM pg_stat_statements WHERE mean_exec_time > 100 -- > 100ms ORDER BY mean_exec_time DESC LIMIT 20;

console.log("🐌 Slow Queries Detected:\n"); slowQueries.forEach((q, i) => { console.log(${i + 1}. ${q.query.substring(0, 80)}...); console.log(Calls: ${q.calls}); console.log(Avg: ${q.mean_time_ms.toFixed(2)}ms); console.log(Max: ${q.max_time_ms.toFixed(2)}ms\n); }); }

Best Practices Always use EXPLAIN: Understand query plans Index foreign keys: Essential for joins Avoid SELECT *: Fetch only needed columns Use composite indexes: Multi-column queries Consider covering indexes: Eliminate table access Batch operations: Reduce round trips Monitor regularly: Track slow queries Output Checklist EXPLAIN plan analyzed Missing indexes identified Query rewrite suggestions Performance benchmarks Before/after metrics Index creation scripts Slow query monitoring Optimization priority list

sql-query-optimizer

安装