pgsql-10 子查询与CTE
目录
10 - 子查询与CTE
1. 📖 概述
本章详细介绍PostgreSQL的子查询和CTE(Common Table Expressions,公共表表达式),这些是编写复杂查询的强大工具。
2. 🔍 子查询基础
2.1 标量子查询
标量子查询返回单个值。
-- 查询价格高于平均价格的产品
SELECT
name,
price,
(SELECT AVG(price) FROM products) AS avg_price
FROM products
WHERE price > (SELECT AVG(price) FROM products);
-- 显示每个产品与平均价格的差值
SELECT
name,
price,
price - (SELECT AVG(price) FROM products) AS price_diff
FROM products;
2.2 列子查询
-- 显示每个用户的订单数量
SELECT
id,
username,
email,
(SELECT COUNT(*) FROM orders WHERE user_id = users.id) AS order_count,
(SELECT SUM(total_amount) FROM orders WHERE user_id = users.id) AS total_spent
FROM users;
2.3 行子查询
-- PostgreSQL
SELECT * FROM products
WHERE (category_id, price) = (
SELECT category_id, MAX(price)
FROM products
WHERE category_id = 1
GROUP BY category_id
);
3. 📊 WHERE子查询
3.1 IN子查询
-- 查询有订单的用户
SELECT * FROM users
WHERE id IN (SELECT DISTINCT user_id FROM orders);
-- 查询购买过特定分类产品的用户
SELECT DISTINCT u.*
FROM users u
WHERE u.id IN (
SELECT o.user_id
FROM orders o
INNER JOIN order_items oi ON o.id = oi.order_id
INNER JOIN products p ON oi.product_id = p.id
WHERE p.category = 'Electronics'
);
-- NOT IN
SELECT * FROM users
WHERE id NOT IN (SELECT user_id FROM orders WHERE status = 'cancelled');
注意: 使用NOT IN时要小心NULL值:
-- 如果子查询返回NULL,NOT IN会返回空结果
-- 推荐使用NOT EXISTS代替NOT IN
-- 不推荐
SELECT * FROM users
WHERE id NOT IN (SELECT user_id FROM orders WHERE user_id IS NOT NULL);
-- 推荐
SELECT * FROM users u
WHERE NOT EXISTS (SELECT 1 FROM orders o WHERE o.user_id = u.id);
3.2 EXISTS子查询
EXISTS比IN更高效,特别是在大数据集上。
-- 查询有订单的用户
SELECT * FROM users u
WHERE EXISTS (
SELECT 1 FROM orders o WHERE o.user_id = u.id
);
-- 查询没有订单的用户
SELECT * FROM users u
WHERE NOT EXISTS (
SELECT 1 FROM orders o WHERE o.user_id = u.id
);
-- 复杂EXISTS示例
SELECT * FROM products p
WHERE EXISTS (
SELECT 1
FROM order_items oi
INNER JOIN orders o ON oi.order_id = o.id
WHERE oi.product_id = p.id
AND o.created_at >= NOW() - INTERVAL '30 days'
);
3.3 ANY/ALL子查询
-- ANY: 满足任意一个条件
SELECT * FROM products
WHERE price > ANY (SELECT price FROM products WHERE category = 'Books');
-- 等价于: price大于Books分类中任意一个产品的价格
-- ALL: 满足所有条件
SELECT * FROM products
WHERE price > ALL (SELECT price FROM products WHERE category = 'Books');
-- 等价于: price大于Books分类中所有产品的价格
-- 实际示例
SELECT name, price FROM products
WHERE price >= ALL (SELECT price FROM products); -- 最贵的产品
4. 🎯 FROM子查询(派生表)
-- 基本派生表
SELECT
category,
avg_price,
product_count
FROM (
SELECT
category,
AVG(price) AS avg_price,
COUNT(*) AS product_count
FROM products
GROUP BY category
) AS category_stats
WHERE product_count > 10
ORDER BY avg_price DESC;
-- 复杂示例:多层嵌套
SELECT
year,
month,
revenue,
LAG(revenue) OVER (ORDER BY year, month) AS prev_month_revenue,
revenue - LAG(revenue) OVER (ORDER BY year, month) AS revenue_growth
FROM (
SELECT
EXTRACT(YEAR FROM created_at) AS year,
EXTRACT(MONTH FROM created_at) AS month,
SUM(total_amount) AS revenue
FROM orders
WHERE status = 'completed'
GROUP BY EXTRACT(YEAR FROM created_at), EXTRACT(MONTH FROM created_at)
) AS monthly_revenue
ORDER BY year, month;
5. 🌲 CTE(Common Table Expression)
CTE使用WITH子句定义临时结果集,提高查询可读性和可维护性。
5.1 基本CTE
-- 单个CTE
WITH high_value_customers AS (
SELECT
user_id,
SUM(total_amount) AS lifetime_value
FROM orders
GROUP BY user_id
HAVING SUM(total_amount) > 1000
)
SELECT
u.username,
u.email,
hvc.lifetime_value
FROM users u
INNER JOIN high_value_customers hvc ON u.id = hvc.user_id
ORDER BY hvc.lifetime_value DESC;
5.2 多个CTE
-- 多个CTE定义
WITH
active_users AS (
SELECT id, username, email
FROM users
WHERE is_active = TRUE
),
recent_orders AS (
SELECT user_id, COUNT(*) AS order_count, SUM(total_amount) AS total_spent
FROM orders
WHERE created_at >= NOW() - INTERVAL '90 days'
GROUP BY user_id
)
SELECT
au.username,
au.email,
COALESCE(ro.order_count, 0) AS recent_order_count,
COALESCE(ro.total_spent, 0) AS recent_total_spent
FROM active_users au
LEFT JOIN recent_orders ro ON au.id = ro.user_id
ORDER BY ro.total_spent DESC NULLS LAST;
5.3 递归CTE
递归CTE是PostgreSQL的强大功能,用于处理树形或图形数据。
-- 示例:组织架构树
CREATE TABLE employees (
id SERIAL PRIMARY KEY,
name VARCHAR(100),
manager_id INTEGER REFERENCES employees(id)
);
INSERT INTO employees (id, name, manager_id) VALUES
(1, 'CEO', NULL),
(2, 'VP Sales', 1),
(3, 'VP Engineering', 1),
(4, 'Sales Manager', 2),
(5, 'Engineer Lead', 3),
(6, 'Sales Rep', 4),
(7, 'Engineer', 5);
-- 递归CTE:显示组织架构
WITH RECURSIVE org_chart AS (
-- 基础查询:顶层员工
SELECT
id,
name,
manager_id,
1 AS level,
name AS path
FROM employees
WHERE manager_id IS NULL
UNION ALL
-- 递归查询:下属员工
SELECT
e.id,
e.name,
e.manager_id,
oc.level + 1,
oc.path || ' -> ' || e.name
FROM employees e
INNER JOIN org_chart oc ON e.manager_id = oc.id
)
SELECT
REPEAT(' ', level - 1) || name AS hierarchy,
level,
path
FROM org_chart
ORDER BY path;
-- 输出类似:
-- CEO (1)
-- VP Sales (2)
-- Sales Manager (3)
-- Sales Rep (4)
-- VP Engineering (2)
-- Engineer Lead (3)
-- Engineer (4)
5.4 递归CTE示例:生成序列
-- 生成1到10的数字序列
WITH RECURSIVE numbers AS (
SELECT 1 AS n
UNION ALL
SELECT n + 1 FROM numbers WHERE n < 10
)
SELECT * FROM numbers;
-- 生成日期序列(最近30天)
WITH RECURSIVE date_series AS (
SELECT CURRENT_DATE - INTERVAL '29 days' AS date
UNION ALL
SELECT date + INTERVAL '1 day'
FROM date_series
WHERE date < CURRENT_DATE
)
SELECT
ds.date,
COALESCE(COUNT(o.id), 0) AS order_count,
COALESCE(SUM(o.total_amount), 0) AS daily_revenue
FROM date_series ds
LEFT JOIN orders o ON DATE(o.created_at) = ds.date
GROUP BY ds.date
ORDER BY ds.date;
5.5 递归CTE示例:分类树
-- 分类表(支持多级分类)
CREATE TABLE categories (
id SERIAL PRIMARY KEY,
name VARCHAR(100),
parent_id INTEGER REFERENCES categories(id)
);
INSERT INTO categories (id, name, parent_id) VALUES
(1, 'Electronics', NULL),
(2, 'Computers', 1),
(3, 'Phones', 1),
(4, 'Laptops', 2),
(5, 'Desktops', 2),
(6, 'Gaming Laptops', 4),
(7, 'Business Laptops', 4);
-- 查询某个分类及其所有子分类
WITH RECURSIVE category_tree AS (
-- 起始分类
SELECT id, name, parent_id, 0 AS depth
FROM categories
WHERE id = 2 -- Computers
UNION ALL
-- 递归查找子分类
SELECT c.id, c.name, c.parent_id, ct.depth + 1
FROM categories c
INNER JOIN category_tree ct ON c.parent_id = ct.id
)
SELECT
REPEAT(' ', depth) || name AS category_hierarchy,
depth
FROM category_tree
ORDER BY depth, name;
-- 查询某个分类的所有父级分类(面包屑导航)
WITH RECURSIVE breadcrumb AS (
SELECT id, name, parent_id, 1 AS level
FROM categories
WHERE id = 6 -- Gaming Laptops
UNION ALL
SELECT c.id, c.name, c.parent_id, b.level + 1
FROM categories c
INNER JOIN breadcrumb b ON c.id = b.parent_id
)
SELECT
name,
level
FROM breadcrumb
ORDER BY level DESC;
-- 输出: Electronics > Computers > Laptops > Gaming Laptops
6. 🔄 CTE vs 子查询 vs 临时表
6.1 对比
| 特性 | CTE | 子查询 | 临时表 |
|---|---|---|---|
| 可读性 | 高 | 中低 | 中 |
| 可维护性 | 高 | 低 | 中 |
| 性能 | 优化器决定 | 优化器决定 | 可控制 |
| 递归 | 支持 | 不支持 | 不支持 |
| 多次引用 | 可能重复执行 | 重复执行 | 只计算一次 |
| 索引 | 不能创建 | 不能创建 | 可以创建 |
6.2 何时使用
-- 使用CTE:提高可读性
WITH
monthly_sales AS (
SELECT
DATE_TRUNC('month', created_at) AS month,
SUM(total_amount) AS revenue
FROM orders
GROUP BY DATE_TRUNC('month', created_at)
),
monthly_costs AS (
SELECT
DATE_TRUNC('month', date) AS month,
SUM(amount) AS cost
FROM expenses
GROUP BY DATE_TRUNC('month', date)
)
SELECT
ms.month,
ms.revenue,
mc.cost,
ms.revenue - mc.cost AS profit
FROM monthly_sales ms
LEFT JOIN monthly_costs mc ON ms.month = mc.month
ORDER BY ms.month;
-- 使用临时表:需要多次引用且数据量大
CREATE TEMP TABLE active_users AS
SELECT id, username, email
FROM users
WHERE is_active = TRUE;
CREATE INDEX idx_temp_active_users_id ON active_users(id);
-- 可以多次使用,且有索引支持
SELECT * FROM active_users WHERE id IN (...);
SELECT * FROM active_users au INNER JOIN orders o ON au.id = o.user_id;
7. 🎨 MATERIALIZED CTE (PostgreSQL 12+)
-- 强制物化CTE(避免重复计算)
WITH expensive_calculation AS MATERIALIZED (
SELECT
user_id,
COUNT(*) AS order_count,
SUM(total_amount) AS total_spent
FROM orders
GROUP BY user_id
)
SELECT * FROM expensive_calculation WHERE order_count > 10
UNION ALL
SELECT * FROM expensive_calculation WHERE total_spent > 10000;
-- 不物化CTE
WITH calculation AS NOT MATERIALIZED (
SELECT * FROM large_table
)
SELECT * FROM calculation WHERE ...;
8. 🚀 实战示例
8.1 示例1:电商销售报表
WITH
-- 产品销售统计
product_sales AS (
SELECT
p.id,
p.name,
p.category,
COUNT(DISTINCT o.id) AS order_count,
SUM(oi.quantity) AS total_quantity,
SUM(oi.quantity * oi.price) AS total_revenue
FROM products p
LEFT JOIN order_items oi ON p.id = oi.product_id
LEFT JOIN orders o ON oi.order_id = o.id
WHERE o.status = 'completed'
AND o.created_at >= NOW() - INTERVAL '90 days'
GROUP BY p.id, p.name, p.category
),
-- 分类汇总
category_totals AS (
SELECT
category,
SUM(total_revenue) AS category_revenue
FROM product_sales
GROUP BY category
)
SELECT
ps.name AS product_name,
ps.category,
ps.order_count,
ps.total_quantity,
ps.total_revenue,
ct.category_revenue,
ROUND(100.0 * ps.total_revenue / NULLIF(ct.category_revenue, 0), 2) AS category_percentage
FROM product_sales ps
INNER JOIN category_totals ct ON ps.category = ct.category
WHERE ps.total_revenue > 0
ORDER BY ps.category, ps.total_revenue DESC;
8.2 示例2:用户留存分析
WITH
-- 每月新用户
monthly_new_users AS (
SELECT
DATE_TRUNC('month', created_at) AS cohort_month,
COUNT(*) AS new_users,
ARRAY_AGG(id) AS user_ids
FROM users
GROUP BY DATE_TRUNC('month', created_at)
),
-- 用户活动
user_activity AS (
SELECT
user_id,
DATE_TRUNC('month', created_at) AS activity_month
FROM orders
GROUP BY user_id, DATE_TRUNC('month', created_at)
),
-- 留存计算
retention AS (
SELECT
mnu.cohort_month,
ua.activity_month,
EXTRACT(MONTH FROM AGE(ua.activity_month, mnu.cohort_month)) AS months_since_signup,
COUNT(DISTINCT ua.user_id) AS active_users,
mnu.new_users
FROM monthly_new_users mnu
CROSS JOIN LATERAL UNNEST(mnu.user_ids) AS uid
LEFT JOIN user_activity ua ON ua.user_id = uid
GROUP BY mnu.cohort_month, ua.activity_month, mnu.new_users
)
SELECT
cohort_month,
months_since_signup,
active_users,
new_users,
ROUND(100.0 * active_users / new_users, 2) AS retention_rate
FROM retention
WHERE months_since_signup IS NOT NULL
ORDER BY cohort_month, months_since_signup;
8.3 示例3:推荐系统(协同过滤)
-- 找出与指定用户购买过相似产品的用户,并推荐产品
WITH
-- 目标用户购买的产品
target_user_products AS (
SELECT DISTINCT product_id
FROM order_items oi
INNER JOIN orders o ON oi.order_id = o.id
WHERE o.user_id = 123
),
-- 购买过相似产品的用户
similar_users AS (
SELECT
o.user_id,
COUNT(DISTINCT oi.product_id) AS common_products
FROM order_items oi
INNER JOIN orders o ON oi.order_id = o.id
WHERE oi.product_id IN (SELECT product_id FROM target_user_products)
AND o.user_id != 123
GROUP BY o.user_id
HAVING COUNT(DISTINCT oi.product_id) >= 2
ORDER BY common_products DESC
LIMIT 50
),
-- 这些用户购买的其他产品
recommendations AS (
SELECT
p.id,
p.name,
p.category,
COUNT(DISTINCT o.user_id) AS purchased_by_similar_users,
AVG(p.price) AS avg_price
FROM products p
INNER JOIN order_items oi ON p.id = oi.product_id
INNER JOIN orders o ON oi.order_id = o.id
WHERE o.user_id IN (SELECT user_id FROM similar_users)
AND p.id NOT IN (SELECT product_id FROM target_user_products)
GROUP BY p.id, p.name, p.category
)
SELECT
name,
category,
purchased_by_similar_users AS relevance_score,
avg_price
FROM recommendations
WHERE purchased_by_similar_users >= 3
ORDER BY relevance_score DESC, avg_price DESC
LIMIT 10;
9. 🆚 PostgreSQL vs MySQL
| 特性 | PostgreSQL | MySQL |
|---|---|---|
| CTE(WITH) | 8.4+ | 8.0+ |
| 递归CTE | 8.4+ | 8.0+ |
| MATERIALIZED CTE | 12+ | 不支持 |
| LATERAL子查询 | 9.3+ | 不支持 |
| 递归深度限制 | 默认无限制 | max_recursive_iterations |
10. 🎯 最佳实践
10.1 1. 优先使用CTE提高可读性
-- 不好:嵌套子查询
SELECT * FROM (
SELECT * FROM (
SELECT user_id, COUNT(*) AS cnt FROM orders GROUP BY user_id
) AS inner_query WHERE cnt > 5
) AS outer_query WHERE user_id > 100;
-- 好:使用CTE
WITH
user_order_counts AS (
SELECT user_id, COUNT(*) AS order_count
FROM orders
GROUP BY user_id
),
frequent_buyers AS (
SELECT * FROM user_order_counts WHERE order_count > 5
)
SELECT * FROM frequent_buyers WHERE user_id > 100;
10.2 2. 递归CTE设置终止条件
-- 始终设置递归终止条件,避免无限循环
WITH RECURSIVE series AS (
SELECT 1 AS n
UNION ALL
SELECT n + 1 FROM series WHERE n < 1000 -- 终止条件
)
SELECT * FROM series;
10.3 3. 复杂计算考虑临时表
-- 数据量大且需要多次引用时,使用临时表
CREATE TEMP TABLE daily_stats AS
SELECT
DATE(created_at) AS date,
COUNT(*) AS order_count,
SUM(total_amount) AS revenue
FROM orders
GROUP BY DATE(created_at);
CREATE INDEX idx_daily_stats_date ON daily_stats(date);
-- 多次使用
SELECT * FROM daily_stats WHERE date >= '2024-01-01';
SELECT AVG(revenue) FROM daily_stats;
11. 📝 练习题
- 使用CTE查询每个分类中价格最高的3个产品
- 使用递归CTE构建评论回复树
- 计算每月新用户和活跃用户
- 找出连续3天都有订单的用户
- 生成销售环比增长报表
11.1 参考答案
-- 1. 每个分类价格最高的3个产品
WITH ranked_products AS (
SELECT
*,
ROW_NUMBER() OVER (PARTITION BY category ORDER BY price DESC) AS rn
FROM products
)
SELECT category, name, price
FROM ranked_products
WHERE rn <= 3
ORDER BY category, rn;
-- 2. 评论回复树(假设有comments表)
-- CREATE TABLE comments (
-- id SERIAL PRIMARY KEY,
-- content TEXT,
-- parent_id INTEGER REFERENCES comments(id),
-- created_at TIMESTAMPTZ DEFAULT NOW()
-- );
WITH RECURSIVE comment_tree AS (
SELECT id, content, parent_id, 0 AS depth, ARRAY[id] AS path
FROM comments
WHERE parent_id IS NULL
UNION ALL
SELECT c.id, c.content, c.parent_id, ct.depth + 1, ct.path || c.id
FROM comments c
INNER JOIN comment_tree ct ON c.parent_id = ct.id
)
SELECT
REPEAT(' ', depth) || content AS hierarchy,
depth
FROM comment_tree
ORDER BY path;
-- 3. 每月新用户和活跃用户
WITH months AS (
SELECT generate_series(
DATE_TRUNC('month', NOW() - INTERVAL '12 months'),
DATE_TRUNC('month', NOW()),
INTERVAL '1 month'
) AS month
),
new_users AS (
SELECT
DATE_TRUNC('month', created_at) AS month,
COUNT(*) AS new_count
FROM users
GROUP BY DATE_TRUNC('month', created_at)
),
active_users AS (
SELECT
DATE_TRUNC('month', o.created_at) AS month,
COUNT(DISTINCT o.user_id) AS active_count
FROM orders o
GROUP BY DATE_TRUNC('month', o.created_at)
)
SELECT
m.month,
COALESCE(nu.new_count, 0) AS new_users,
COALESCE(au.active_count, 0) AS active_users
FROM months m
LEFT JOIN new_users nu ON m.month = nu.month
LEFT JOIN active_users au ON m.month = au.month
ORDER BY m.month;
12. 📚 下一步
关键要点:
- CTE提高查询可读性和可维护性
- 递归CTE处理树形/图形数据
- EXISTS通常比IN更高效
- NOT IN要小心NULL值问题
- MATERIALIZED CTE可以优化性能
xingliuhua