pgsql-07 高级查询技巧
目录
07 - 高级查询技巧
1. 📖 概述
本章介绍PostgreSQL的高级查询技巧,包括复杂条件查询、集合操作、分析函数等。
2. 🔍 UNION操作
2.1 UNION和UNION ALL
-- UNION: 合并结果集并去重
SELECT username, email FROM users WHERE created_at < '2024-01-01'
UNION
SELECT username, email FROM users WHERE is_active = TRUE;
-- UNION ALL: 合并结果集不去重(性能更好)
SELECT username, email FROM users WHERE created_at < '2024-01-01'
UNION ALL
SELECT username, email FROM users WHERE is_active = TRUE;
-- UNION要求:
-- 1. 列数必须相同
-- 2. 对应列的数据类型必须兼容
-- 3. 列名以第一个查询为准
-- 实战示例: 合并不同来源的数据
SELECT
'customer' AS user_type,
id,
name,
email
FROM customers
UNION ALL
SELECT
'employee' AS user_type,
id,
name,
email
FROM employees
ORDER BY user_type, name;
2.2 INTERSECT和EXCEPT
-- INTERSECT: 交集(PostgreSQL支持, MySQL不支持)
SELECT email FROM users WHERE created_at > '2024-01-01'
INTERSECT
SELECT email FROM users WHERE is_premium = TRUE;
-- 返回: 2024年后注册且是会员的用户
-- EXCEPT: 差集(PostgreSQL支持, MySQL不支持)
SELECT email FROM users
EXCEPT
SELECT email FROM banned_users;
-- 返回: 未被封禁的用户
-- MySQL替代方案:
-- INTERSECT -> INNER JOIN
-- EXCEPT -> LEFT JOIN + WHERE NULL
3. 🎯 复杂条件查询
3.1 IN与多值匹配
-- 基本IN
SELECT * FROM products WHERE category IN ('Electronics', 'Books', 'Toys');
-- IN子查询
SELECT * FROM orders
WHERE user_id IN (
SELECT id FROM users WHERE is_premium = TRUE
);
-- 多列IN (PostgreSQL特色)
SELECT * FROM order_items
WHERE (order_id, product_id) IN (
SELECT order_id, product_id FROM promotions
);
3.2 ANY/ALL/SOME
-- ANY: 满足任意一个条件
SELECT * FROM products
WHERE price > ANY (SELECT price FROM products WHERE category = 'Books');
-- 等价于: price大于Books中任意一个产品的价格
-- ALL: 满足所有条件
SELECT * FROM products
WHERE price > ALL (SELECT price FROM products WHERE category = 'Books');
-- 等价于: price大于Books中所有产品的价格
-- SOME: ANY的别名
SELECT * FROM products
WHERE price < SOME (SELECT price FROM products WHERE category = 'Electronics');
-- 实用示例: 找出比所有竞争对手都便宜的产品
SELECT p.name, p.price
FROM our_products p
WHERE p.price < ALL (
SELECT price
FROM competitor_products cp
WHERE cp.category = p.category
);
3.3 CASE高级用法
-- 复杂分类
SELECT
product_name,
price,
CASE
WHEN price < 20 THEN 'Budget'
WHEN price BETWEEN 20 AND 50 THEN 'Standard'
WHEN price BETWEEN 50 AND 100 THEN 'Premium'
ELSE 'Luxury'
END AS price_tier,
CASE
WHEN stock = 0 THEN 'Out of Stock'
WHEN stock < 10 THEN 'Low Stock'
WHEN stock < 100 THEN 'In Stock'
ELSE 'Well Stocked'
END AS stock_status
FROM products;
-- 条件聚合
SELECT
category,
COUNT(*) AS total_products,
SUM(CASE WHEN price < 50 THEN 1 ELSE 0 END) AS cheap_products,
SUM(CASE WHEN price >= 50 AND price < 100 THEN 1 ELSE 0 END) AS mid_products,
SUM(CASE WHEN price >= 100 THEN 1 ELSE 0 END) AS expensive_products
FROM products
GROUP BY category;
-- 动态排序
SELECT * FROM products
ORDER BY
CASE
WHEN category = 'Featured' THEN 1
WHEN category = 'New' THEN 2
WHEN category = 'Sale' THEN 3
ELSE 4
END,
price DESC;
4. 📊 高级聚合
4.1 FILTER子句 (PostgreSQL 9.4+)
-- 条件聚合(更优雅的方式)
SELECT
category,
COUNT(*) AS total,
COUNT(*) FILTER (WHERE price < 50) AS cheap_count,
COUNT(*) FILTER (WHERE price >= 100) AS expensive_count,
AVG(price) FILTER (WHERE stock > 0) AS avg_price_in_stock
FROM products
GROUP BY category;
-- 对比CASE方式
SELECT
category,
COUNT(*) AS total,
SUM(CASE WHEN price < 50 THEN 1 ELSE 0 END) AS cheap_count
FROM products
GROUP BY category;
4.2 GROUPING SETS
-- GROUPING SETS: 一次查询多个分组
SELECT
category,
brand,
COUNT(*) AS product_count,
AVG(price) AS avg_price
FROM products
GROUP BY GROUPING SETS (
(category),
(brand),
(category, brand),
() -- 总计
);
-- 等价于多个UNION ALL
SELECT category, NULL AS brand, COUNT(*), AVG(price) FROM products GROUP BY category
UNION ALL
SELECT NULL, brand, COUNT(*), AVG(price) FROM products GROUP BY brand
UNION ALL
SELECT category, brand, COUNT(*), AVG(price) FROM products GROUP BY category, brand
UNION ALL
SELECT NULL, NULL, COUNT(*), AVG(price) FROM products;
4.3 ROLLUP和CUBE
-- ROLLUP: 分层汇总
SELECT
category,
brand,
COUNT(*) AS count,
SUM(price) AS total
FROM products
GROUP BY ROLLUP (category, brand)
ORDER BY category NULLS LAST, brand NULLS LAST;
-- 结果包括:
-- 1. 每个(category, brand)组合
-- 2. 每个category的小计
-- 3. 总计
-- CUBE: 所有维度组合
SELECT
category,
brand,
COUNT(*) AS count
FROM products
GROUP BY CUBE (category, brand);
-- 结果包括:
-- (category, brand), (category), (brand), ()
5. 🔄 递归查询
5.1 简单递归
-- 生成数字序列
WITH RECURSIVE numbers AS (
SELECT 1 AS n
UNION ALL
SELECT n + 1 FROM numbers WHERE n < 10
)
SELECT * FROM numbers;
-- 生成日期序列
WITH RECURSIVE dates AS (
SELECT CURRENT_DATE AS date
UNION ALL
SELECT date + 1 FROM dates WHERE date < CURRENT_DATE + 30
)
SELECT * FROM dates;
5.2 树形结构查询
-- 查询分类树
WITH RECURSIVE category_tree AS (
-- 根节点
SELECT id, name, parent_id, 0 AS level, name AS path
FROM categories
WHERE parent_id IS NULL
UNION ALL
-- 递归部分
SELECT
c.id,
c.name,
c.parent_id,
ct.level + 1,
ct.path || ' > ' || c.name
FROM categories c
INNER JOIN category_tree ct ON c.parent_id = ct.id
)
SELECT * FROM category_tree ORDER BY path;
-- 查询子树(某个节点的所有子孙)
WITH RECURSIVE subtree AS (
SELECT * FROM categories WHERE id = 5
UNION ALL
SELECT c.*
FROM categories c
INNER JOIN subtree s ON c.parent_id = s.id
)
SELECT * FROM subtree;
-- 查询路径(从节点到根)
WITH RECURSIVE path_to_root AS (
SELECT * FROM categories WHERE id = 10
UNION ALL
SELECT c.*
FROM categories c
INNER JOIN path_to_root p ON c.id = p.parent_id
)
SELECT * FROM path_to_root;
6. 🎨 数组和JSON查询
6.1 数组操作
-- 数组包含
SELECT * FROM articles WHERE tags @> ARRAY['postgresql'];
-- 数组重叠
SELECT * FROM articles WHERE tags && ARRAY['database', 'sql'];
-- 展开数组
SELECT
id,
title,
unnest(tags) AS tag
FROM articles;
-- 数组聚合
SELECT
category,
array_agg(name ORDER BY price DESC) AS products
FROM products
GROUP BY category;
6.2 JSON查询
-- JSON字段提取
SELECT
name,
attributes->>'brand' AS brand,
attributes->>'color' AS color,
(attributes->>'price')::NUMERIC AS price
FROM products
WHERE attributes @> '{"available": true}';
-- JSON路径查询
SELECT
name,
attributes #>> '{specs,cpu,model}' AS cpu_model
FROM products;
-- JSON聚合
SELECT
category,
json_agg(json_build_object(
'name', name,
'price', price
)) AS products
FROM products
GROUP BY category;
7. 📈 分析函数
7.1 NTILE分桶
-- 将数据分成4个桶
SELECT
name,
price,
NTILE(4) OVER (ORDER BY price) AS price_quartile
FROM products;
-- 应用: 客户分层
SELECT
user_id,
total_spent,
CASE NTILE(5) OVER (ORDER BY total_spent DESC)
WHEN 1 THEN 'Top 20%'
WHEN 2 THEN 'Top 40%'
WHEN 3 THEN 'Middle 20%'
WHEN 4 THEN 'Bottom 40%'
ELSE 'Bottom 20%'
END AS customer_segment
FROM user_lifetime_value;
7.2 PERCENT_RANK和CUME_DIST
SELECT
name,
price,
PERCENT_RANK() OVER (ORDER BY price) AS percent_rank,
CUME_DIST() OVER (ORDER BY price) AS cumulative_dist,
NTILE(100) OVER (ORDER BY price) AS percentile
FROM products;
8. 🔗 LATERAL JOIN
-- LATERAL: 右侧可以引用左侧的列
SELECT
u.username,
recent_orders.order_count,
recent_orders.total_amount
FROM users u
CROSS JOIN LATERAL (
SELECT
COUNT(*) AS order_count,
SUM(total_amount) AS total_amount
FROM orders o
WHERE o.user_id = u.id
AND o.created_at >= NOW() - INTERVAL '30 days'
) AS recent_orders
WHERE recent_orders.order_count > 0;
-- 每个分类的前3名产品
SELECT
c.name AS category,
p.name AS product,
p.price
FROM categories c
CROSS JOIN LATERAL (
SELECT name, price
FROM products
WHERE category_id = c.id
ORDER BY price DESC
LIMIT 3
) p;
9. 💡 查询优化技巧
9.1 避免重复计算
-- 不好: 重复计算
SELECT
(SELECT COUNT(*) FROM orders WHERE user_id = users.id) AS order_count,
(SELECT SUM(total_amount) FROM orders WHERE user_id = users.id) AS total_spent,
(SELECT MAX(created_at) FROM orders WHERE user_id = users.id) AS last_order
FROM users;
-- 好: 使用CTE或JOIN
WITH user_stats AS (
SELECT
user_id,
COUNT(*) AS order_count,
SUM(total_amount) AS total_spent,
MAX(created_at) AS last_order
FROM orders
GROUP BY user_id
)
SELECT
u.*,
COALESCE(us.order_count, 0) AS order_count,
COALESCE(us.total_spent, 0) AS total_spent,
us.last_order
FROM users u
LEFT JOIN user_stats us ON u.id = us.user_id;
9.2 使用索引友好的查询
-- 不好: 函数导致索引失效
SELECT * FROM users WHERE LOWER(email) = 'alice@example.com';
-- 好: 创建表达式索引或直接匹配
CREATE INDEX idx_users_email_lower ON users(LOWER(email));
-- 或
SELECT * FROM users WHERE email = 'alice@example.com';
-- 不好: LIKE开头通配符
SELECT * FROM users WHERE email LIKE '%@gmail.com';
-- 好: 使用全文搜索或后缀索引
SELECT * FROM users WHERE email LIKE 'alice%';
10. 🆚 PostgreSQL vs MySQL
| 功能 | PostgreSQL | MySQL |
|---|---|---|
| INTERSECT | 支持 | 不支持 |
| EXCEPT | 支持 | 不支持 |
| FILTER子句 | 支持 | 不支持 |
| GROUPING SETS | 支持 | 不支持 |
| ROLLUP/CUBE | 支持 | 8.0+支持 |
| LATERAL JOIN | 支持 | 8.0.14+支持 |
| 递归CTE | 支持 | 8.0+支持 |
| 数组类型 | 原生支持 | 不支持 |
11. 📝 实战示例
11.1 销售漏斗分析
WITH funnel AS (
SELECT
COUNT(DISTINCT user_id) FILTER (WHERE action = 'view') AS viewed,
COUNT(DISTINCT user_id) FILTER (WHERE action = 'add_to_cart') AS added_to_cart,
COUNT(DISTINCT user_id) FILTER (WHERE action = 'checkout') AS checked_out,
COUNT(DISTINCT user_id) FILTER (WHERE action = 'purchase') AS purchased
FROM user_actions
WHERE created_at >= NOW() - INTERVAL '7 days'
)
SELECT
viewed,
added_to_cart,
ROUND(100.0 * added_to_cart / viewed, 2) AS add_to_cart_rate,
checked_out,
ROUND(100.0 * checked_out / added_to_cart, 2) AS checkout_rate,
purchased,
ROUND(100.0 * purchased / checked_out, 2) AS purchase_rate
FROM funnel;
11.2 同期群分析(Cohort Analysis)
WITH cohorts AS (
SELECT
user_id,
DATE_TRUNC('month', created_at) AS cohort_month
FROM users
),
user_activities AS (
SELECT
o.user_id,
DATE_TRUNC('month', o.created_at) AS activity_month
FROM orders o
)
SELECT
c.cohort_month,
ua.activity_month,
EXTRACT(MONTH FROM AGE(ua.activity_month, c.cohort_month)) AS months_since_signup,
COUNT(DISTINCT ua.user_id) AS active_users,
(SELECT COUNT(*) FROM cohorts WHERE cohort_month = c.cohort_month) AS cohort_size,
ROUND(100.0 * COUNT(DISTINCT ua.user_id) /
(SELECT COUNT(*) FROM cohorts WHERE cohort_month = c.cohort_month), 2) AS retention_rate
FROM cohorts c
LEFT JOIN user_activities ua ON c.user_id = ua.user_id
GROUP BY c.cohort_month, ua.activity_month
ORDER BY c.cohort_month, ua.activity_month;
12. 📚 下一步
关键要点:
- INTERSECT/EXCEPT是PostgreSQL独有
- FILTER子句比CASE更优雅
- LATERAL JOIN非常强大
- 递归CTE处理树形结构
- 避免重复计算和索引失效
xingliuhua