Database Query Optimization - Complete Guide
Published: September 25, 2024 | Reading time: 22 minutes
Database Query Optimization Overview
Query optimization improves database performance and reduces response times:
Query Optimization Benefits
# Query Optimization Benefits
- Faster query execution
- Reduced resource usage
- Better scalability
- Improved user experience
- Lower server costs
- Enhanced application performance
- Reduced database load
Query Analysis
MySQL Query Analysis
MySQL Query Analysis Techniques
# MySQL Query Analysis
# 1. EXPLAIN Statement
EXPLAIN SELECT * FROM users WHERE email = 'user@example.com';
# EXPLAIN with different formats
EXPLAIN FORMAT=JSON SELECT * FROM users WHERE email = 'user@example.com';
EXPLAIN FORMAT=TREE SELECT * FROM users WHERE email = 'user@example.com';
# 2. EXPLAIN ANALYZE (MySQL 8.0+)
EXPLAIN ANALYZE SELECT u.*, p.title
FROM users u
JOIN posts p ON u.id = p.user_id
WHERE u.status = 'active';
# 3. Query Profiling
SET profiling = 1;
SELECT * FROM users WHERE email = 'user@example.com';
SHOW PROFILES;
SHOW PROFILE FOR QUERY 1;
# 4. Performance Schema
# Enable performance schema
SELECT * FROM performance_schema.events_statements_summary_by_digest
ORDER BY sum_timer_wait DESC
LIMIT 10;
# 5. Slow Query Log
# Enable slow query log
SET GLOBAL slow_query_log = 'ON';
SET GLOBAL long_query_time = 1;
SET GLOBAL log_queries_not_using_indexes = 'ON';
# Analyze slow queries
mysqldumpslow -s t -t 10 /var/log/mysql/slow.log
# 6. Query Optimization Examples
# Before optimization
SELECT * FROM users u
JOIN posts p ON u.id = p.user_id
WHERE u.created_at > '2024-01-01'
AND p.status = 'published';
# After optimization
SELECT u.id, u.name, u.email, p.title, p.created_at
FROM users u
INNER JOIN posts p ON u.id = p.user_id
WHERE u.created_at > '2024-01-01'
AND p.status = 'published'
AND u.status = 'active';
# 7. Index Usage Analysis
SHOW INDEX FROM users;
SHOW INDEX FROM posts;
# Check index usage
SELECT * FROM sys.schema_unused_indexes;
SELECT * FROM sys.schema_redundant_indexes;
PostgreSQL Query Analysis
PostgreSQL Query Analysis
# PostgreSQL Query Analysis
# 1. EXPLAIN Statement
EXPLAIN (ANALYZE, BUFFERS, FORMAT JSON)
SELECT * FROM users WHERE email = 'user@example.com';
# 2. Query Statistics
SELECT query, calls, total_time, mean_time, rows
FROM pg_stat_statements
ORDER BY total_time DESC
LIMIT 10;
# 3. Index Usage Statistics
SELECT schemaname, tablename, indexname, idx_scan, idx_tup_read, idx_tup_fetch
FROM pg_stat_user_indexes
ORDER BY idx_scan DESC;
# 4. Table Statistics
SELECT schemaname, tablename, seq_scan, seq_tup_read, idx_scan, idx_tup_fetch
FROM pg_stat_user_tables
ORDER BY seq_tup_read DESC;
# 5. Slow Query Logging
# postgresql.conf configuration
log_min_duration_statement = 1000
log_statement = 'mod'
log_line_prefix = '%t [%p]: [%l-1] user=%u,db=%d,app=%a,client=%h '
# 6. Query Optimization Examples
# Before optimization
SELECT * FROM users u
JOIN posts p ON u.id = p.user_id
WHERE u.created_at > '2024-01-01'::date
AND p.status = 'published';
# After optimization with proper indexing
CREATE INDEX CONCURRENTLY idx_users_created_status ON users(created_at, status);
CREATE INDEX CONCURRENTLY idx_posts_user_status ON posts(user_id, status);
SELECT u.id, u.name, u.email, p.title, p.created_at
FROM users u
INNER JOIN posts p ON u.id = p.user_id
WHERE u.created_at > '2024-01-01'::date
AND u.status = 'active'
AND p.status = 'published';
# 7. Vacuum and Analyze
VACUUM ANALYZE users;
VACUUM ANALYZE posts;
# Check table bloat
SELECT schemaname, tablename,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) as size,
pg_stat_get_dead_tuples(c.oid) as dead_tuples
FROM pg_class c
JOIN pg_namespace n ON n.oid = c.relnamespace
WHERE c.relkind = 'r'
AND n.nspname NOT IN ('information_schema', 'pg_catalog');
Indexing Strategies
MySQL Indexing
MySQL Indexing Strategies
# MySQL Indexing Strategies
# 1. Primary Key Index
CREATE TABLE users (
id INT AUTO_INCREMENT PRIMARY KEY,
email VARCHAR(255) UNIQUE,
name VARCHAR(255),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
# 2. Single Column Index
CREATE INDEX idx_users_email ON users(email);
CREATE INDEX idx_users_created_at ON users(created_at);
# 3. Composite Index
CREATE INDEX idx_users_status_created ON users(status, created_at);
# 4. Covering Index
CREATE INDEX idx_users_covering ON users(status, created_at, name, email);
# 5. Partial Index (MySQL 8.0+)
CREATE INDEX idx_active_users ON users(email) WHERE status = 'active';
# 6. Index Optimization Examples
# Query: SELECT * FROM users WHERE status = 'active' AND created_at > '2024-01-01'
CREATE INDEX idx_users_status_created ON users(status, created_at);
# Query: SELECT name, email FROM users WHERE status = 'active'
CREATE INDEX idx_users_status_covering ON users(status, name, email);
# 7. Index Maintenance
# Analyze table statistics
ANALYZE TABLE users;
# Check index usage
SELECT * FROM sys.schema_unused_indexes WHERE object_schema = 'your_database';
# Drop unused indexes
DROP INDEX idx_unused_index ON users;
# 8. Index Monitoring
# Check index cardinality
SELECT
table_name,
index_name,
cardinality,
sub_part,
packed,
nullable,
index_type
FROM information_schema.statistics
WHERE table_schema = 'your_database'
ORDER BY cardinality DESC;
# 9. Full-Text Index
CREATE FULLTEXT INDEX idx_posts_content ON posts(title, content);
# Full-text search
SELECT * FROM posts
WHERE MATCH(title, content) AGAINST('database optimization' IN NATURAL LANGUAGE MODE);
# 10. Spatial Index
CREATE SPATIAL INDEX idx_locations_coords ON locations(coordinates);
PostgreSQL Indexing
PostgreSQL Indexing Strategies
# PostgreSQL Indexing Strategies
# 1. B-tree Index (Default)
CREATE INDEX idx_users_email ON users USING btree (email);
CREATE INDEX idx_users_created_at ON users USING btree (created_at);
# 2. Hash Index
CREATE INDEX idx_users_email_hash ON users USING hash (email);
# 3. GIN Index (Generalized Inverted Index)
CREATE INDEX idx_posts_tags ON posts USING gin (tags);
CREATE INDEX idx_users_metadata ON users USING gin (metadata);
# 4. GiST Index (Generalized Search Tree)
CREATE INDEX idx_locations_coords ON locations USING gist (coordinates);
# 5. BRIN Index (Block Range Index)
CREATE INDEX idx_logs_timestamp ON logs USING brin (timestamp);
# 6. Partial Index
CREATE INDEX idx_active_users ON users (email) WHERE status = 'active';
# 7. Expression Index
CREATE INDEX idx_users_lower_email ON users (lower(email));
# 8. Composite Index with Sort Order
CREATE INDEX idx_users_status_created ON users (status ASC, created_at DESC);
# 9. Covering Index
CREATE INDEX idx_users_covering ON users (status) INCLUDE (name, email);
# 10. Index Optimization Examples
# Query: SELECT * FROM users WHERE status = 'active' AND created_at > '2024-01-01'
CREATE INDEX CONCURRENTLY idx_users_status_created ON users(status, created_at);
# Query: SELECT name, email FROM users WHERE status = 'active'
CREATE INDEX CONCURRENTLY idx_users_status_covering ON users(status) INCLUDE (name, email);
# 11. Index Maintenance
# Reindex
REINDEX INDEX CONCURRENTLY idx_users_email;
# Analyze
ANALYZE users;
# Check index usage
SELECT schemaname, tablename, indexname, idx_scan, idx_tup_read
FROM pg_stat_user_indexes
ORDER BY idx_scan DESC;
# 12. Full-Text Search Index
CREATE INDEX idx_posts_fts ON posts USING gin (to_tsvector('english', title || ' ' || content));
# Full-text search
SELECT * FROM posts
WHERE to_tsvector('english', title || ' ' || content) @@ to_tsquery('english', 'database & optimization');
Query Rewriting
Query Optimization Techniques
Query Rewriting Strategies
# Query Rewriting Strategies
# 1. Avoid SELECT *
# Bad
SELECT * FROM users WHERE status = 'active';
# Good
SELECT id, name, email FROM users WHERE status = 'active';
# 2. Use LIMIT for Large Result Sets
# Bad
SELECT * FROM posts ORDER BY created_at DESC;
# Good
SELECT * FROM posts ORDER BY created_at DESC LIMIT 20;
# 3. Optimize JOINs
# Bad - Cross join
SELECT u.name, p.title
FROM users u, posts p
WHERE u.id = p.user_id;
# Good - Explicit join
SELECT u.name, p.title
FROM users u
INNER JOIN posts p ON u.id = p.user_id;
# 4. Use EXISTS instead of IN for Subqueries
# Bad
SELECT * FROM users
WHERE id IN (SELECT user_id FROM posts WHERE status = 'published');
# Good
SELECT * FROM users u
WHERE EXISTS (SELECT 1 FROM posts p WHERE p.user_id = u.id AND p.status = 'published');
# 5. Optimize WHERE Clauses
# Bad - Function in WHERE
SELECT * FROM users WHERE YEAR(created_at) = 2024;
# Good - Range query
SELECT * FROM users WHERE created_at >= '2024-01-01' AND created_at < '2025-01-01';
# 6. Use UNION ALL instead of UNION when possible
# Bad
SELECT name FROM users WHERE status = 'active'
UNION
SELECT name FROM users WHERE status = 'inactive';
# Good
SELECT name FROM users WHERE status = 'active'
UNION ALL
SELECT name FROM users WHERE status = 'inactive';
# 7. Optimize GROUP BY
# Bad
SELECT status, COUNT(*)
FROM users
GROUP BY status
ORDER BY COUNT(*) DESC;
# Good
SELECT status, COUNT(*) as count
FROM users
GROUP BY status
ORDER BY count DESC;
# 8. Use Appropriate Data Types
# Bad - String comparison
SELECT * FROM users WHERE id = '123';
# Good - Numeric comparison
SELECT * FROM users WHERE id = 123;
# 9. Optimize Pagination
# Bad - OFFSET for large datasets
SELECT * FROM users ORDER BY id LIMIT 20 OFFSET 10000;
# Good - Cursor-based pagination
SELECT * FROM users WHERE id > 10000 ORDER BY id LIMIT 20;
# 10. Use Prepared Statements
# Node.js example
const query = 'SELECT * FROM users WHERE id = ? AND status = ?';
const [rows] = await connection.execute(query, [userId, 'active']);
Database Configuration
MySQL Configuration Optimization
MySQL Performance Configuration
# MySQL Performance Configuration
# 1. Buffer Pool Configuration
# my.cnf configuration
[mysqld]
# InnoDB buffer pool (70-80% of available RAM)
innodb_buffer_pool_size = 2G
innodb_buffer_pool_instances = 8
# 2. Query Cache (MySQL 5.7 and earlier)
query_cache_type = 1
query_cache_size = 256M
query_cache_limit = 2M
# 3. Connection Configuration
max_connections = 200
max_connect_errors = 1000
connect_timeout = 10
wait_timeout = 28800
interactive_timeout = 28800
# 4. InnoDB Configuration
innodb_log_file_size = 256M
innodb_log_buffer_size = 16M
innodb_flush_log_at_trx_commit = 2
innodb_file_per_table = 1
innodb_flush_method = O_DIRECT
# 5. MyISAM Configuration
key_buffer_size = 128M
read_buffer_size = 2M
read_rnd_buffer_size = 8M
sort_buffer_size = 2M
# 6. Temporary Tables
tmp_table_size = 64M
max_heap_table_size = 64M
# 7. Thread Configuration
thread_cache_size = 16
thread_stack = 256K
# 8. Logging Configuration
slow_query_log = 1
slow_query_log_file = /var/log/mysql/slow.log
long_query_time = 2
log_queries_not_using_indexes = 1
# 9. Performance Monitoring
performance_schema = ON
performance_schema_max_table_instances = 500
performance_schema_max_table_handles = 1000
# 10. Optimization Settings
optimizer_switch = 'index_merge=on,index_merge_union=on,index_merge_sort_union=on'
join_buffer_size = 2M
read_buffer_size = 2M
read_rnd_buffer_size = 8M
PostgreSQL Configuration Optimization
PostgreSQL Performance Configuration
# PostgreSQL Performance Configuration
# 1. Memory Configuration
# postgresql.conf
shared_buffers = 256MB # 25% of RAM
effective_cache_size = 1GB # 75% of RAM
work_mem = 4MB # Per operation
maintenance_work_mem = 64MB # For maintenance operations
# 2. Connection Configuration
max_connections = 100
shared_preload_libraries = 'pg_stat_statements'
track_activities = on
track_counts = on
track_io_timing = on
track_functions = all
# 3. WAL Configuration
wal_level = replica
wal_buffers = 16MB
checkpoint_completion_target = 0.9
checkpoint_timeout = 15min
max_wal_size = 1GB
min_wal_size = 80MB
# 4. Query Planner Configuration
random_page_cost = 1.1
effective_io_concurrency = 200
seq_page_cost = 1.0
# 5. Logging Configuration
log_min_duration_statement = 1000
log_statement = 'mod'
log_line_prefix = '%t [%p]: [%l-1] user=%u,db=%d,app=%a,client=%h '
log_checkpoints = on
log_connections = on
log_disconnections = on
log_lock_waits = on
# 6. Autovacuum Configuration
autovacuum = on
autovacuum_max_workers = 3
autovacuum_naptime = 1min
autovacuum_vacuum_threshold = 50
autovacuum_analyze_threshold = 50
autovacuum_vacuum_scale_factor = 0.2
autovacuum_analyze_scale_factor = 0.1
# 7. Statistics Configuration
default_statistics_target = 100
track_activities = on
track_counts = on
track_io_timing = on
# 8. Lock Configuration
deadlock_timeout = 1s
lock_timeout = 0
log_lock_waits = on
# 9. Background Writer Configuration
bgwriter_delay = 200ms
bgwriter_lru_maxpages = 100
bgwriter_lru_multiplier = 2.0
# 10. Performance Extensions
# Enable pg_stat_statements
CREATE EXTENSION IF NOT EXISTS pg_stat_statements;
# Enable pg_hint_plan for query hints
CREATE EXTENSION IF NOT EXISTS pg_hint_plan;
Monitoring and Profiling
Database Performance Monitoring
Database Monitoring Setup
# Database Performance Monitoring
# 1. MySQL Performance Monitoring
# Enable performance schema
UPDATE performance_schema.setup_instruments
SET ENABLED = 'YES', TIMED = 'YES'
WHERE NAME LIKE '%statement%';
# Monitor slow queries
SELECT
DIGEST_TEXT,
COUNT_STAR,
AVG_TIMER_WAIT/1000000000 as avg_time_seconds,
MAX_TIMER_WAIT/1000000000 as max_time_seconds
FROM performance_schema.events_statements_summary_by_digest
ORDER BY AVG_TIMER_WAIT DESC
LIMIT 10;
# Monitor table I/O
SELECT
OBJECT_SCHEMA,
OBJECT_NAME,
COUNT_READ,
COUNT_WRITE,
COUNT_FETCH,
COUNT_INSERT,
COUNT_UPDATE,
COUNT_DELETE
FROM performance_schema.table_io_waits_summary_by_table
ORDER BY COUNT_READ + COUNT_WRITE DESC
LIMIT 10;
# 2. PostgreSQL Performance Monitoring
# Monitor query performance
SELECT
query,
calls,
total_time,
mean_time,
rows,
100.0 * shared_blks_hit / nullif(shared_blks_hit + shared_blks_read, 0) AS hit_percent
FROM pg_stat_statements
ORDER BY total_time DESC
LIMIT 10;
# Monitor table statistics
SELECT
schemaname,
tablename,
seq_scan,
seq_tup_read,
idx_scan,
idx_tup_fetch,
n_tup_ins,
n_tup_upd,
n_tup_del
FROM pg_stat_user_tables
ORDER BY seq_tup_read DESC;
# Monitor index usage
SELECT
schemaname,
tablename,
indexname,
idx_scan,
idx_tup_read,
idx_tup_fetch
FROM pg_stat_user_indexes
ORDER BY idx_scan DESC;
# 3. Custom Monitoring Script
#!/bin/bash
# db-monitor.sh
# MySQL monitoring
mysql -e "
SELECT
'Slow Queries' as metric,
COUNT(*) as value
FROM mysql.slow_log
WHERE start_time > DATE_SUB(NOW(), INTERVAL 1 HOUR);
SELECT
'Active Connections' as metric,
COUNT(*) as value
FROM information_schema.processlist
WHERE command != 'Sleep';
"
# PostgreSQL monitoring
psql -c "
SELECT
'Active Connections' as metric,
count(*) as value
FROM pg_stat_activity
WHERE state = 'active';
SELECT
'Database Size' as metric,
pg_size_pretty(pg_database_size(current_database())) as value;
"
# 4. Alerting Configuration
# Slow query alert
mysql -e "
SELECT
DIGEST_TEXT,
AVG_TIMER_WAIT/1000000000 as avg_time
FROM performance_schema.events_statements_summary_by_digest
WHERE AVG_TIMER_WAIT/1000000000 > 5
ORDER BY AVG_TIMER_WAIT DESC;
"
# 5. Performance Dashboard Queries
# Top 10 slowest queries
SELECT
DIGEST_TEXT as query,
COUNT_STAR as executions,
AVG_TIMER_WAIT/1000000000 as avg_time_seconds,
SUM_ROWS_EXAMINED/COUNT_STAR as avg_rows_examined
FROM performance_schema.events_statements_summary_by_digest
WHERE DIGEST_TEXT IS NOT NULL
ORDER BY AVG_TIMER_WAIT DESC
LIMIT 10;
# Index usage statistics
SELECT
TABLE_SCHEMA,
TABLE_NAME,
INDEX_NAME,
CARDINALITY,
SUB_PART,
PACKED,
NULLABLE
FROM information_schema.statistics
WHERE TABLE_SCHEMA = 'your_database'
ORDER BY CARDINALITY DESC;
Best Practices
Query Optimization Checklist
Indexing Best Practices
- Create indexes on frequently queried columns
- Use composite indexes for multi-column queries
- Avoid over-indexing
- Monitor index usage
- Use covering indexes
- Consider partial indexes
- Regular index maintenance
Query Best Practices
- Avoid SELECT *
- Use appropriate JOIN types
- Optimize WHERE clauses
- Use LIMIT for large result sets
- Prefer EXISTS over IN
- Use prepared statements
- Monitor query performance
Summary
Database query optimization involves several key components:
- Query Analysis: EXPLAIN, profiling, slow query logs
- Indexing Strategies: B-tree, composite, covering indexes
- Query Rewriting: Optimization techniques, best practices
- Database Configuration: Memory, connection, logging settings
- Monitoring: Performance tracking, alerting, dashboards
- Best Practices: Indexing guidelines, query optimization
Need More Help?
Struggling with database query optimization or need help improving your database performance? Our database experts can help you optimize your queries and improve performance.
Get Database Help