pgsql-21 查询优化与执行计划
目录
21 - 查询优化与执行计划
1. 📖 概述
查询优化是数据库性能调优的核心,本章介绍如何使用EXPLAIN分析查询,理解执行计划,以及优化慢查询。
2. 🔍 EXPLAIN基础
2.1 EXPLAIN命令
-- 查看执行计划(不执行)
EXPLAIN
SELECT * FROM users WHERE email = 'alice@example.com';
-- 查看详细信息
EXPLAIN VERBOSE
SELECT * FROM users WHERE email = 'alice@example.com';
-- 实际执行并显示统计(推荐)
EXPLAIN ANALYZE
SELECT * FROM users WHERE email = 'alice@example.com';
-- 显示缓冲区信息
EXPLAIN (ANALYZE, BUFFERS)
SELECT * FROM users WHERE email = 'alice@example.com';
-- JSON格式输出
EXPLAIN (FORMAT JSON)
SELECT * FROM users WHERE email = 'alice@example.com';
2.2 执行计划示例
EXPLAIN ANALYZE
SELECT * FROM orders
WHERE user_id = 123 AND status = 'completed';
-- 输出示例:
/*
Seq Scan on orders (cost=0.00..431.00 rows=5 width=64) (actual time=0.123..8.456 rows=5 loops=1)
Filter: ((user_id = 123) AND (status = 'completed'::text))
Rows Removed by Filter: 9995
Planning Time: 0.234 ms
Execution Time: 8.567 ms
*/
2.3 执行计划解读
- Seq Scan: 顺序扫描(全表扫描)
- Index Scan: 索引扫描
- Bitmap Scan: 位图扫描
- cost=启动成本..总成本: 查询成本估算
- rows=估计行数: 优化器估算返回行数
- width=平均行宽: 每行平均字节数
- actual time=实际时间: 实际执行时间(ms)
- loops=循环次数: 执行次数
3. 📊 常见扫描类型
3.1 1. Sequential Scan(顺序扫描)
-- 全表扫描
EXPLAIN ANALYZE
SELECT * FROM orders;
/*
Seq Scan on orders (cost=0.00..1810.00 rows=100000 width=64)
*/
-- 何时使用:
-- - 表很小
-- - 查询大部分行
-- - 没有合适的索引
3.2 2. Index Scan(索引扫描)
-- 创建索引
CREATE INDEX idx_orders_user_id ON orders(user_id);
-- 使用索引扫描
EXPLAIN ANALYZE
SELECT * FROM orders WHERE user_id = 123;
/*
Index Scan using idx_orders_user_id on orders
(cost=0.29..8.31 rows=1 width=64)
Index Cond: (user_id = 123)
*/
3.3 3. Index Only Scan(仅索引扫描)
-- 创建覆盖索引
CREATE INDEX idx_orders_user_status ON orders(user_id, status);
-- 查询只需要索引列
EXPLAIN ANALYZE
SELECT user_id, status FROM orders WHERE user_id = 123;
/*
Index Only Scan using idx_orders_user_status on orders
(cost=0.29..4.31 rows=1 width=8)
Index Cond: (user_id = 123)
Heap Fetches: 0
*/
-- Heap Fetches: 0 表示没有访问表,性能最优
3.4 4. Bitmap Scan(位图扫描)
-- 多个条件,使用多个索引
CREATE INDEX idx_orders_status ON orders(status);
CREATE INDEX idx_orders_created ON orders(created_at);
EXPLAIN ANALYZE
SELECT * FROM orders
WHERE status = 'completed' AND created_at > '2024-01-01';
/*
Bitmap Heap Scan on orders
Recheck Cond: ((status = 'completed') AND (created_at > '2024-01-01'))
-> BitmapAnd
-> Bitmap Index Scan on idx_orders_status
-> Bitmap Index Scan on idx_orders_created
*/
-- 位图扫描适用于中等选择性的查询
4. 🚀 JOIN优化
4.1 Nested Loop Join(嵌套循环)
-- 小表JOIN
EXPLAIN ANALYZE
SELECT *
FROM users u
INNER JOIN orders o ON u.id = o.user_id
WHERE u.id = 123;
/*
Nested Loop (cost=0.57..16.61 rows=1 width=128)
-> Index Scan using users_pkey on users u
-> Index Scan using idx_orders_user_id on orders o
*/
-- 适用于: 小表, 有索引, 返回少量行
4.2 Hash Join(哈希连接)
-- 大表JOIN
EXPLAIN ANALYZE
SELECT *
FROM orders o
INNER JOIN order_items oi ON o.id = oi.order_id;
/*
Hash Join (cost=2180.00..5230.00 rows=100000 width=96)
Hash Cond: (oi.order_id = o.id)
-> Seq Scan on order_items oi
-> Hash
-> Seq Scan on orders o
*/
-- 适用于: 大表, 等值连接, 内存充足
4.3 Merge Join(归并连接)
-- 两表都已排序
EXPLAIN ANALYZE
SELECT *
FROM orders o
INNER JOIN users u ON o.user_id = u.id
ORDER BY o.user_id;
/*
Merge Join (cost=0.57..4830.00 rows=100000 width=128)
Merge Cond: (o.user_id = u.id)
-> Index Scan using idx_orders_user_id on orders o
-> Index Scan using users_pkey on users u
*/
-- 适用于: 两表都有排序索引, 范围连接
5. 🎯 优化技巧
5.1 1. 使用合适的索引
-- 不好: 没有索引
EXPLAIN ANALYZE
SELECT * FROM users WHERE email = 'test@example.com';
-- Seq Scan on users (cost=0.00..1810.00 rows=1)
-- 好: 添加索引
CREATE INDEX idx_users_email ON users(email);
EXPLAIN ANALYZE
SELECT * FROM users WHERE email = 'test@example.com';
-- Index Scan using idx_users_email on users (cost=0.29..8.31 rows=1)
5.2 2. 使用复合索引
-- 查询条件
SELECT * FROM orders
WHERE user_id = 123 AND status = 'completed'
ORDER BY created_at DESC
LIMIT 10;
-- 不好: 单列索引
CREATE INDEX idx_orders_user_id ON orders(user_id);
-- 只能使用一个索引
-- 好: 复合索引(按查询频率和选择性排序)
CREATE INDEX idx_orders_user_status_created ON orders(user_id, status, created_at DESC);
-- 最优: 包含所有查询列(覆盖索引)
CREATE INDEX idx_orders_covering ON orders(user_id, status, created_at DESC)
INCLUDE (total_amount);
5.3 3. 避免函数导致索引失效
-- 不好: 函数导致索引失效
EXPLAIN ANALYZE
SELECT * FROM users WHERE LOWER(email) = 'test@example.com';
-- Seq Scan on users
-- 好: 使用表达式索引
CREATE INDEX idx_users_email_lower ON users(LOWER(email));
EXPLAIN ANALYZE
SELECT * FROM users WHERE LOWER(email) = 'test@example.com';
-- Index Scan using idx_users_email_lower on users
-- 或者: 不使用函数
EXPLAIN ANALYZE
SELECT * FROM users WHERE email = 'test@example.com';
-- Index Scan using idx_users_email on users
5.4 4. 避免SELECT *
-- 不好: 查询所有列
SELECT * FROM users WHERE id = 123;
-- 可能需要访问表(Heap Fetch)
-- 好: 只查询需要的列
SELECT id, username, email FROM users WHERE id = 123;
-- 可能使用Index Only Scan
5.5 5. 使用LIMIT
-- 不好: 查询所有结果
SELECT * FROM orders ORDER BY created_at DESC;
-- 需要排序全部100万行
-- 好: 使用LIMIT
SELECT * FROM orders ORDER BY created_at DESC LIMIT 10;
-- 只需要找到前10行即可停止
EXPLAIN ANALYZE
SELECT * FROM orders ORDER BY created_at DESC LIMIT 10;
/*
Limit (cost=0.29..1.54 rows=10)
-> Index Scan Backward using idx_orders_created on orders
*/
5.6 6. 优化子查询
-- 不好: 相关子查询(N+1问题)
SELECT
u.username,
(SELECT COUNT(*) FROM orders WHERE user_id = u.id) AS order_count
FROM users u;
-- 好: JOIN
SELECT
u.username,
COUNT(o.id) AS order_count
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.username;
-- 或: CTE
WITH order_counts AS (
SELECT user_id, COUNT(*) AS count
FROM orders
GROUP BY user_id
)
SELECT u.username, COALESCE(oc.count, 0) AS order_count
FROM users u
LEFT JOIN order_counts oc ON u.id = oc.user_id;
5.7 7. 使用EXISTS代替IN
-- 不好: IN子查询
SELECT * FROM users
WHERE id IN (SELECT user_id FROM orders WHERE status = 'completed');
-- 好: EXISTS
SELECT * FROM users u
WHERE EXISTS (
SELECT 1 FROM orders o
WHERE o.user_id = u.id AND o.status = 'completed'
);
-- EXPLAIN ANALYZE显示EXISTS通常更快
5.8 8. 分区表优化
-- 创建分区表
CREATE TABLE orders_partitioned (
id SERIAL,
user_id INTEGER,
created_at DATE,
total_amount NUMERIC(10,2)
) PARTITION BY RANGE (created_at);
CREATE TABLE orders_2023 PARTITION OF orders_partitioned
FOR VALUES FROM ('2023-01-01') TO ('2024-01-01');
CREATE TABLE orders_2024 PARTITION OF orders_partitioned
FOR VALUES FROM ('2024-01-01') TO ('2025-01-01');
-- 查询时只扫描相关分区
EXPLAIN ANALYZE
SELECT * FROM orders_partitioned
WHERE created_at >= '2024-01-01' AND created_at < '2024-02-01';
/*
Seq Scan on orders_2024 (cost=0.00..180.00 rows=100)
Filter: (created_at >= '2024-01-01' AND created_at < '2024-02-01')
-- 只扫描orders_2024分区,不扫描orders_2023
*/
6. 🔧 优化工具和技巧
6.1 1. pg_stat_statements
-- 启用扩展
CREATE EXTENSION pg_stat_statements;
-- 查看最慢的查询
SELECT
query,
calls,
total_exec_time,
mean_exec_time,
max_exec_time,
rows
FROM pg_stat_statements
ORDER BY mean_exec_time DESC
LIMIT 10;
-- 查看最频繁的查询
SELECT
query,
calls,
total_exec_time,
mean_exec_time
FROM pg_stat_statements
ORDER BY calls DESC
LIMIT 10;
6.2 2. VACUUM和ANALYZE
-- ANALYZE: 更新统计信息
ANALYZE users;
ANALYZE; -- 所有表
-- VACUUM: 清理死元组
VACUUM users;
VACUUM; -- 所有表
-- VACUUM ANALYZE: 两者都执行
VACUUM ANALYZE users;
-- VACUUM FULL: 完全清理(锁表,慢)
VACUUM FULL users;
-- 自动VACUUM设置
ALTER TABLE users SET (
autovacuum_vacuum_scale_factor = 0.1,
autovacuum_analyze_scale_factor = 0.05
);
6.3 3. 查看索引使用情况
-- 查看未使用的索引
SELECT
schemaname,
tablename,
indexname,
idx_scan,
idx_tup_read,
idx_tup_fetch
FROM pg_stat_user_indexes
WHERE idx_scan = 0
AND indexname NOT LIKE '%pkey'
ORDER BY tablename, indexname;
-- 查看索引大小
SELECT
tablename,
indexname,
pg_size_pretty(pg_relation_size(indexrelid)) AS index_size
FROM pg_stat_user_indexes
ORDER BY pg_relation_size(indexrelid) DESC;
6.4 4. 查看表膨胀
-- 查看表膨胀情况
SELECT
schemaname,
tablename,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS total_size,
pg_size_pretty(pg_relation_size(schemaname||'.'||tablename)) AS table_size,
ROUND(100.0 * pg_relation_size(schemaname||'.'||tablename) /
NULLIF(pg_total_relation_size(schemaname||'.'||tablename), 0), 2) AS table_percentage
FROM pg_tables
WHERE schemaname = 'public'
ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC;
7. 🎯 实战优化案例
7.1 案例1: 慢查询优化
-- 原始查询(慢)
SELECT
u.username,
COUNT(o.id) AS order_count,
SUM(o.total_amount) AS total_spent
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE u.created_at >= '2024-01-01'
GROUP BY u.id, u.username
ORDER BY total_spent DESC;
-- EXPLAIN ANALYZE显示:
-- Hash Join (cost=2890.00..15320.00 rows=10000)
-- 执行时间: 2345ms
-- 优化步骤:
-- 1. 添加索引
CREATE INDEX idx_users_created ON users(created_at);
CREATE INDEX idx_orders_user_id ON orders(user_id);
-- 2. 如果只需要有订单的用户,使用INNER JOIN
SELECT
u.username,
COUNT(o.id) AS order_count,
SUM(o.total_amount) AS total_spent
FROM users u
INNER JOIN orders o ON u.id = o.user_id
WHERE u.created_at >= '2024-01-01'
GROUP BY u.id, u.username
HAVING COUNT(o.id) > 0
ORDER BY total_spent DESC
LIMIT 100; -- 添加LIMIT
-- 3. 使用物化CTE
WITH user_stats AS MATERIALIZED (
SELECT
user_id,
COUNT(*) AS order_count,
SUM(total_amount) AS total_spent
FROM orders
GROUP BY user_id
)
SELECT
u.username,
us.order_count,
us.total_spent
FROM users u
INNER JOIN user_stats us ON u.id = us.user_id
WHERE u.created_at >= '2024-01-01'
ORDER BY us.total_spent DESC
LIMIT 100;
-- 优化后执行时间: 123ms (20倍提升)
7.2 案例2: N+1查询问题
-- 不好: N+1查询
-- 代码层面循环查询
SELECT * FROM users; -- 1次查询
-- 然后对每个用户:
-- SELECT * FROM orders WHERE user_id = ?; -- N次查询
-- 好: 一次查询获取所有数据
SELECT
u.id,
u.username,
json_agg(json_build_object(
'id', o.id,
'total_amount', o.total_amount,
'created_at', o.created_at
)) AS orders
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.username;
7.3 案例3: 大数据量分页
-- 不好: 深度分页
SELECT * FROM orders
ORDER BY created_at DESC
OFFSET 100000 LIMIT 10;
-- 需要扫描100010行
-- 好: 使用游标分页
SELECT * FROM orders
WHERE created_at < '2024-01-01 10:00:00' -- 上一页最后一条记录的时间
ORDER BY created_at DESC
LIMIT 10;
-- 或使用id分页
SELECT * FROM orders
WHERE id < 50000 -- 上一页最后一条记录的id
ORDER BY id DESC
LIMIT 10;
8. 📝 优化检查清单
8.1 查询层面
- 使用EXPLAIN ANALYZE分析执行计划
- 只查询需要的列,避免SELECT *
- 使用合适的JOIN类型
- 避免函数导致索引失效
- 使用LIMIT限制结果集
- 优化子查询,使用JOIN或CTE
- 使用EXISTS代替IN(大数据集)
8.2 索引层面
- 为WHERE、JOIN、ORDER BY创建索引
- 使用复合索引(注意列顺序)
- 使用覆盖索引(INCLUDE)
- 创建表达式索引(函数查询)
- 删除未使用的索引
- 定期REINDEX
8.3 表层面
- 定期VACUUM清理死元组
- 定期ANALYZE更新统计信息
- 考虑使用分区表(大表)
- 合理设置autovacuum参数
- 监控表膨胀
8.4 数据库层面
- 调整shared_buffers(25%内存)
- 调整work_mem(复杂查询)
- 调整effective_cache_size(50-75%内存)
- 启用pg_stat_statements
- 监控连接数和锁
9. 📚 下一步
关键要点:
- 使用EXPLAIN ANALYZE分析查询
- Index Only Scan性能最优
- 避免全表扫描(Seq Scan)
- 复合索引注意列顺序
- 定期VACUUM和ANALYZE
- 监控pg_stat_statements
xingliuhua