目录

pgsql-21 查询优化与执行计划

21 - 查询优化与执行计划

1. 📖 概述

查询优化是数据库性能调优的核心,本章介绍如何使用EXPLAIN分析查询,理解执行计划,以及优化慢查询。

2. 🔍 EXPLAIN基础

2.1 EXPLAIN命令

-- 查看执行计划(不执行)
EXPLAIN
SELECT * FROM users WHERE email = 'alice@example.com';

-- 查看详细信息
EXPLAIN VERBOSE
SELECT * FROM users WHERE email = 'alice@example.com';

-- 实际执行并显示统计(推荐)
EXPLAIN ANALYZE
SELECT * FROM users WHERE email = 'alice@example.com';

-- 显示缓冲区信息
EXPLAIN (ANALYZE, BUFFERS)
SELECT * FROM users WHERE email = 'alice@example.com';

-- JSON格式输出
EXPLAIN (FORMAT JSON)
SELECT * FROM users WHERE email = 'alice@example.com';

2.2 执行计划示例

EXPLAIN ANALYZE
SELECT * FROM orders
WHERE user_id = 123 AND status = 'completed';

-- 输出示例:
/*
Seq Scan on orders  (cost=0.00..431.00 rows=5 width=64) (actual time=0.123..8.456 rows=5 loops=1)
  Filter: ((user_id = 123) AND (status = 'completed'::text))
  Rows Removed by Filter: 9995
Planning Time: 0.234 ms
Execution Time: 8.567 ms
*/

2.3 执行计划解读

  • Seq Scan: 顺序扫描(全表扫描)
  • Index Scan: 索引扫描
  • Bitmap Scan: 位图扫描
  • cost=启动成本..总成本: 查询成本估算
  • rows=估计行数: 优化器估算返回行数
  • width=平均行宽: 每行平均字节数
  • actual time=实际时间: 实际执行时间(ms)
  • loops=循环次数: 执行次数

3. 📊 常见扫描类型

3.1 1. Sequential Scan(顺序扫描)

-- 全表扫描
EXPLAIN ANALYZE
SELECT * FROM orders;

/*
Seq Scan on orders (cost=0.00..1810.00 rows=100000 width=64)
*/

-- 何时使用:
-- - 表很小
-- - 查询大部分行
-- - 没有合适的索引

3.2 2. Index Scan(索引扫描)

-- 创建索引
CREATE INDEX idx_orders_user_id ON orders(user_id);

-- 使用索引扫描
EXPLAIN ANALYZE
SELECT * FROM orders WHERE user_id = 123;

/*
Index Scan using idx_orders_user_id on orders
  (cost=0.29..8.31 rows=1 width=64)
  Index Cond: (user_id = 123)
*/

3.3 3. Index Only Scan(仅索引扫描)

-- 创建覆盖索引
CREATE INDEX idx_orders_user_status ON orders(user_id, status);

-- 查询只需要索引列
EXPLAIN ANALYZE
SELECT user_id, status FROM orders WHERE user_id = 123;

/*
Index Only Scan using idx_orders_user_status on orders
  (cost=0.29..4.31 rows=1 width=8)
  Index Cond: (user_id = 123)
  Heap Fetches: 0
*/

-- Heap Fetches: 0 表示没有访问表,性能最优

3.4 4. Bitmap Scan(位图扫描)

-- 多个条件,使用多个索引
CREATE INDEX idx_orders_status ON orders(status);
CREATE INDEX idx_orders_created ON orders(created_at);

EXPLAIN ANALYZE
SELECT * FROM orders
WHERE status = 'completed' AND created_at > '2024-01-01';

/*
Bitmap Heap Scan on orders
  Recheck Cond: ((status = 'completed') AND (created_at > '2024-01-01'))
  ->  BitmapAnd
        ->  Bitmap Index Scan on idx_orders_status
        ->  Bitmap Index Scan on idx_orders_created
*/

-- 位图扫描适用于中等选择性的查询

4. 🚀 JOIN优化

4.1 Nested Loop Join(嵌套循环)

-- 小表JOIN
EXPLAIN ANALYZE
SELECT *
FROM users u
INNER JOIN orders o ON u.id = o.user_id
WHERE u.id = 123;

/*
Nested Loop  (cost=0.57..16.61 rows=1 width=128)
  ->  Index Scan using users_pkey on users u
  ->  Index Scan using idx_orders_user_id on orders o
*/

-- 适用于: 小表, 有索引, 返回少量行

4.2 Hash Join(哈希连接)

-- 大表JOIN
EXPLAIN ANALYZE
SELECT *
FROM orders o
INNER JOIN order_items oi ON o.id = oi.order_id;

/*
Hash Join  (cost=2180.00..5230.00 rows=100000 width=96)
  Hash Cond: (oi.order_id = o.id)
  ->  Seq Scan on order_items oi
  ->  Hash
        ->  Seq Scan on orders o
*/

-- 适用于: 大表, 等值连接, 内存充足

4.3 Merge Join(归并连接)

-- 两表都已排序
EXPLAIN ANALYZE
SELECT *
FROM orders o
INNER JOIN users u ON o.user_id = u.id
ORDER BY o.user_id;

/*
Merge Join  (cost=0.57..4830.00 rows=100000 width=128)
  Merge Cond: (o.user_id = u.id)
  ->  Index Scan using idx_orders_user_id on orders o
  ->  Index Scan using users_pkey on users u
*/

-- 适用于: 两表都有排序索引, 范围连接

5. 🎯 优化技巧

5.1 1. 使用合适的索引

-- 不好: 没有索引
EXPLAIN ANALYZE
SELECT * FROM users WHERE email = 'test@example.com';
-- Seq Scan on users  (cost=0.00..1810.00 rows=1)

-- 好: 添加索引
CREATE INDEX idx_users_email ON users(email);

EXPLAIN ANALYZE
SELECT * FROM users WHERE email = 'test@example.com';
-- Index Scan using idx_users_email on users  (cost=0.29..8.31 rows=1)

5.2 2. 使用复合索引

-- 查询条件
SELECT * FROM orders
WHERE user_id = 123 AND status = 'completed'
ORDER BY created_at DESC
LIMIT 10;

-- 不好: 单列索引
CREATE INDEX idx_orders_user_id ON orders(user_id);
-- 只能使用一个索引

-- 好: 复合索引(按查询频率和选择性排序)
CREATE INDEX idx_orders_user_status_created ON orders(user_id, status, created_at DESC);

-- 最优: 包含所有查询列(覆盖索引)
CREATE INDEX idx_orders_covering ON orders(user_id, status, created_at DESC)
INCLUDE (total_amount);

5.3 3. 避免函数导致索引失效

-- 不好: 函数导致索引失效
EXPLAIN ANALYZE
SELECT * FROM users WHERE LOWER(email) = 'test@example.com';
-- Seq Scan on users

-- 好: 使用表达式索引
CREATE INDEX idx_users_email_lower ON users(LOWER(email));

EXPLAIN ANALYZE
SELECT * FROM users WHERE LOWER(email) = 'test@example.com';
-- Index Scan using idx_users_email_lower on users

-- 或者: 不使用函数
EXPLAIN ANALYZE
SELECT * FROM users WHERE email = 'test@example.com';
-- Index Scan using idx_users_email on users

5.4 4. 避免SELECT *

-- 不好: 查询所有列
SELECT * FROM users WHERE id = 123;
-- 可能需要访问表(Heap Fetch)

-- 好: 只查询需要的列
SELECT id, username, email FROM users WHERE id = 123;
-- 可能使用Index Only Scan

5.5 5. 使用LIMIT

-- 不好: 查询所有结果
SELECT * FROM orders ORDER BY created_at DESC;
-- 需要排序全部100万行

-- 好: 使用LIMIT
SELECT * FROM orders ORDER BY created_at DESC LIMIT 10;
-- 只需要找到前10行即可停止

EXPLAIN ANALYZE
SELECT * FROM orders ORDER BY created_at DESC LIMIT 10;
/*
Limit  (cost=0.29..1.54 rows=10)
  ->  Index Scan Backward using idx_orders_created on orders
*/

5.6 6. 优化子查询

-- 不好: 相关子查询(N+1问题)
SELECT
    u.username,
    (SELECT COUNT(*) FROM orders WHERE user_id = u.id) AS order_count
FROM users u;

-- 好: JOIN
SELECT
    u.username,
    COUNT(o.id) AS order_count
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.username;

-- 或: CTE
WITH order_counts AS (
    SELECT user_id, COUNT(*) AS count
    FROM orders
    GROUP BY user_id
)
SELECT u.username, COALESCE(oc.count, 0) AS order_count
FROM users u
LEFT JOIN order_counts oc ON u.id = oc.user_id;

5.7 7. 使用EXISTS代替IN

-- 不好: IN子查询
SELECT * FROM users
WHERE id IN (SELECT user_id FROM orders WHERE status = 'completed');

-- 好: EXISTS
SELECT * FROM users u
WHERE EXISTS (
    SELECT 1 FROM orders o
    WHERE o.user_id = u.id AND o.status = 'completed'
);

-- EXPLAIN ANALYZE显示EXISTS通常更快

5.8 8. 分区表优化

-- 创建分区表
CREATE TABLE orders_partitioned (
    id SERIAL,
    user_id INTEGER,
    created_at DATE,
    total_amount NUMERIC(10,2)
) PARTITION BY RANGE (created_at);

CREATE TABLE orders_2023 PARTITION OF orders_partitioned
    FOR VALUES FROM ('2023-01-01') TO ('2024-01-01');

CREATE TABLE orders_2024 PARTITION OF orders_partitioned
    FOR VALUES FROM ('2024-01-01') TO ('2025-01-01');

-- 查询时只扫描相关分区
EXPLAIN ANALYZE
SELECT * FROM orders_partitioned
WHERE created_at >= '2024-01-01' AND created_at < '2024-02-01';

/*
Seq Scan on orders_2024  (cost=0.00..180.00 rows=100)
  Filter: (created_at >= '2024-01-01' AND created_at < '2024-02-01')
-- 只扫描orders_2024分区,不扫描orders_2023
*/

6. 🔧 优化工具和技巧

6.1 1. pg_stat_statements

-- 启用扩展
CREATE EXTENSION pg_stat_statements;

-- 查看最慢的查询
SELECT
    query,
    calls,
    total_exec_time,
    mean_exec_time,
    max_exec_time,
    rows
FROM pg_stat_statements
ORDER BY mean_exec_time DESC
LIMIT 10;

-- 查看最频繁的查询
SELECT
    query,
    calls,
    total_exec_time,
    mean_exec_time
FROM pg_stat_statements
ORDER BY calls DESC
LIMIT 10;

6.2 2. VACUUM和ANALYZE

-- ANALYZE: 更新统计信息
ANALYZE users;
ANALYZE;  -- 所有表

-- VACUUM: 清理死元组
VACUUM users;
VACUUM;  -- 所有表

-- VACUUM ANALYZE: 两者都执行
VACUUM ANALYZE users;

-- VACUUM FULL: 完全清理(锁表,慢)
VACUUM FULL users;

-- 自动VACUUM设置
ALTER TABLE users SET (
    autovacuum_vacuum_scale_factor = 0.1,
    autovacuum_analyze_scale_factor = 0.05
);

6.3 3. 查看索引使用情况

-- 查看未使用的索引
SELECT
    schemaname,
    tablename,
    indexname,
    idx_scan,
    idx_tup_read,
    idx_tup_fetch
FROM pg_stat_user_indexes
WHERE idx_scan = 0
  AND indexname NOT LIKE '%pkey'
ORDER BY tablename, indexname;

-- 查看索引大小
SELECT
    tablename,
    indexname,
    pg_size_pretty(pg_relation_size(indexrelid)) AS index_size
FROM pg_stat_user_indexes
ORDER BY pg_relation_size(indexrelid) DESC;

6.4 4. 查看表膨胀

-- 查看表膨胀情况
SELECT
    schemaname,
    tablename,
    pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS total_size,
    pg_size_pretty(pg_relation_size(schemaname||'.'||tablename)) AS table_size,
    ROUND(100.0 * pg_relation_size(schemaname||'.'||tablename) /
          NULLIF(pg_total_relation_size(schemaname||'.'||tablename), 0), 2) AS table_percentage
FROM pg_tables
WHERE schemaname = 'public'
ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC;

7. 🎯 实战优化案例

7.1 案例1: 慢查询优化

-- 原始查询(慢)
SELECT
    u.username,
    COUNT(o.id) AS order_count,
    SUM(o.total_amount) AS total_spent
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE u.created_at >= '2024-01-01'
GROUP BY u.id, u.username
ORDER BY total_spent DESC;

-- EXPLAIN ANALYZE显示:
-- Hash Join  (cost=2890.00..15320.00 rows=10000)
-- 执行时间: 2345ms

-- 优化步骤:

-- 1. 添加索引
CREATE INDEX idx_users_created ON users(created_at);
CREATE INDEX idx_orders_user_id ON orders(user_id);

-- 2. 如果只需要有订单的用户,使用INNER JOIN
SELECT
    u.username,
    COUNT(o.id) AS order_count,
    SUM(o.total_amount) AS total_spent
FROM users u
INNER JOIN orders o ON u.id = o.user_id
WHERE u.created_at >= '2024-01-01'
GROUP BY u.id, u.username
HAVING COUNT(o.id) > 0
ORDER BY total_spent DESC
LIMIT 100;  -- 添加LIMIT

-- 3. 使用物化CTE
WITH user_stats AS MATERIALIZED (
    SELECT
        user_id,
        COUNT(*) AS order_count,
        SUM(total_amount) AS total_spent
    FROM orders
    GROUP BY user_id
)
SELECT
    u.username,
    us.order_count,
    us.total_spent
FROM users u
INNER JOIN user_stats us ON u.id = us.user_id
WHERE u.created_at >= '2024-01-01'
ORDER BY us.total_spent DESC
LIMIT 100;

-- 优化后执行时间: 123ms (20倍提升)

7.2 案例2: N+1查询问题

-- 不好: N+1查询
-- 代码层面循环查询
SELECT * FROM users;  -- 1次查询
-- 然后对每个用户:
-- SELECT * FROM orders WHERE user_id = ?;  -- N次查询

-- 好: 一次查询获取所有数据
SELECT
    u.id,
    u.username,
    json_agg(json_build_object(
        'id', o.id,
        'total_amount', o.total_amount,
        'created_at', o.created_at
    )) AS orders
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.username;

7.3 案例3: 大数据量分页

-- 不好: 深度分页
SELECT * FROM orders
ORDER BY created_at DESC
OFFSET 100000 LIMIT 10;
-- 需要扫描100010行

-- 好: 使用游标分页
SELECT * FROM orders
WHERE created_at < '2024-01-01 10:00:00'  -- 上一页最后一条记录的时间
ORDER BY created_at DESC
LIMIT 10;

-- 或使用id分页
SELECT * FROM orders
WHERE id < 50000  -- 上一页最后一条记录的id
ORDER BY id DESC
LIMIT 10;

8. 📝 优化检查清单

8.1 查询层面

  • 使用EXPLAIN ANALYZE分析执行计划
  • 只查询需要的列,避免SELECT *
  • 使用合适的JOIN类型
  • 避免函数导致索引失效
  • 使用LIMIT限制结果集
  • 优化子查询,使用JOIN或CTE
  • 使用EXISTS代替IN(大数据集)

8.2 索引层面

  • 为WHERE、JOIN、ORDER BY创建索引
  • 使用复合索引(注意列顺序)
  • 使用覆盖索引(INCLUDE)
  • 创建表达式索引(函数查询)
  • 删除未使用的索引
  • 定期REINDEX

8.3 表层面

  • 定期VACUUM清理死元组
  • 定期ANALYZE更新统计信息
  • 考虑使用分区表(大表)
  • 合理设置autovacuum参数
  • 监控表膨胀

8.4 数据库层面

  • 调整shared_buffers(25%内存)
  • 调整work_mem(复杂查询)
  • 调整effective_cache_size(50-75%内存)
  • 启用pg_stat_statements
  • 监控连接数和锁

9. 📚 下一步


关键要点:

  • 使用EXPLAIN ANALYZE分析查询
  • Index Only Scan性能最优
  • 避免全表扫描(Seq Scan)
  • 复合索引注意列顺序
  • 定期VACUUM和ANALYZE
  • 监控pg_stat_statements