目录

pgsql-07 高级查询技巧

07 - 高级查询技巧

1. 📖 概述

本章介绍PostgreSQL的高级查询技巧,包括复杂条件查询、集合操作、分析函数等。

2. 🔍 UNION操作

2.1 UNION和UNION ALL

-- UNION: 合并结果集并去重
SELECT username, email FROM users WHERE created_at < '2024-01-01'
UNION
SELECT username, email FROM users WHERE is_active = TRUE;

-- UNION ALL: 合并结果集不去重(性能更好)
SELECT username, email FROM users WHERE created_at < '2024-01-01'
UNION ALL
SELECT username, email FROM users WHERE is_active = TRUE;

-- UNION要求:
-- 1. 列数必须相同
-- 2. 对应列的数据类型必须兼容
-- 3. 列名以第一个查询为准

-- 实战示例: 合并不同来源的数据
SELECT
    'customer' AS user_type,
    id,
    name,
    email
FROM customers
UNION ALL
SELECT
    'employee' AS user_type,
    id,
    name,
    email
FROM employees
ORDER BY user_type, name;

2.2 INTERSECT和EXCEPT

-- INTERSECT: 交集(PostgreSQL支持, MySQL不支持)
SELECT email FROM users WHERE created_at > '2024-01-01'
INTERSECT
SELECT email FROM users WHERE is_premium = TRUE;
-- 返回: 2024年后注册且是会员的用户

-- EXCEPT: 差集(PostgreSQL支持, MySQL不支持)
SELECT email FROM users
EXCEPT
SELECT email FROM banned_users;
-- 返回: 未被封禁的用户

-- MySQL替代方案:
-- INTERSECT -> INNER JOIN
-- EXCEPT -> LEFT JOIN + WHERE NULL

3. 🎯 复杂条件查询

3.1 IN与多值匹配

-- 基本IN
SELECT * FROM products WHERE category IN ('Electronics', 'Books', 'Toys');

-- IN子查询
SELECT * FROM orders
WHERE user_id IN (
    SELECT id FROM users WHERE is_premium = TRUE
);

-- 多列IN (PostgreSQL特色)
SELECT * FROM order_items
WHERE (order_id, product_id) IN (
    SELECT order_id, product_id FROM promotions
);

3.2 ANY/ALL/SOME

-- ANY: 满足任意一个条件
SELECT * FROM products
WHERE price > ANY (SELECT price FROM products WHERE category = 'Books');
-- 等价于: price大于Books中任意一个产品的价格

-- ALL: 满足所有条件
SELECT * FROM products
WHERE price > ALL (SELECT price FROM products WHERE category = 'Books');
-- 等价于: price大于Books中所有产品的价格

-- SOME: ANY的别名
SELECT * FROM products
WHERE price < SOME (SELECT price FROM products WHERE category = 'Electronics');

-- 实用示例: 找出比所有竞争对手都便宜的产品
SELECT p.name, p.price
FROM our_products p
WHERE p.price < ALL (
    SELECT price
    FROM competitor_products cp
    WHERE cp.category = p.category
);

3.3 CASE高级用法

-- 复杂分类
SELECT
    product_name,
    price,
    CASE
        WHEN price < 20 THEN 'Budget'
        WHEN price BETWEEN 20 AND 50 THEN 'Standard'
        WHEN price BETWEEN 50 AND 100 THEN 'Premium'
        ELSE 'Luxury'
    END AS price_tier,
    CASE
        WHEN stock = 0 THEN 'Out of Stock'
        WHEN stock < 10 THEN 'Low Stock'
        WHEN stock < 100 THEN 'In Stock'
        ELSE 'Well Stocked'
    END AS stock_status
FROM products;

-- 条件聚合
SELECT
    category,
    COUNT(*) AS total_products,
    SUM(CASE WHEN price < 50 THEN 1 ELSE 0 END) AS cheap_products,
    SUM(CASE WHEN price >= 50 AND price < 100 THEN 1 ELSE 0 END) AS mid_products,
    SUM(CASE WHEN price >= 100 THEN 1 ELSE 0 END) AS expensive_products
FROM products
GROUP BY category;

-- 动态排序
SELECT * FROM products
ORDER BY
    CASE
        WHEN category = 'Featured' THEN 1
        WHEN category = 'New' THEN 2
        WHEN category = 'Sale' THEN 3
        ELSE 4
    END,
    price DESC;

4. 📊 高级聚合

4.1 FILTER子句 (PostgreSQL 9.4+)

-- 条件聚合(更优雅的方式)
SELECT
    category,
    COUNT(*) AS total,
    COUNT(*) FILTER (WHERE price < 50) AS cheap_count,
    COUNT(*) FILTER (WHERE price >= 100) AS expensive_count,
    AVG(price) FILTER (WHERE stock > 0) AS avg_price_in_stock
FROM products
GROUP BY category;

-- 对比CASE方式
SELECT
    category,
    COUNT(*) AS total,
    SUM(CASE WHEN price < 50 THEN 1 ELSE 0 END) AS cheap_count
FROM products
GROUP BY category;

4.2 GROUPING SETS

-- GROUPING SETS: 一次查询多个分组
SELECT
    category,
    brand,
    COUNT(*) AS product_count,
    AVG(price) AS avg_price
FROM products
GROUP BY GROUPING SETS (
    (category),
    (brand),
    (category, brand),
    ()  -- 总计
);

-- 等价于多个UNION ALL
SELECT category, NULL AS brand, COUNT(*), AVG(price) FROM products GROUP BY category
UNION ALL
SELECT NULL, brand, COUNT(*), AVG(price) FROM products GROUP BY brand
UNION ALL
SELECT category, brand, COUNT(*), AVG(price) FROM products GROUP BY category, brand
UNION ALL
SELECT NULL, NULL, COUNT(*), AVG(price) FROM products;

4.3 ROLLUP和CUBE

-- ROLLUP: 分层汇总
SELECT
    category,
    brand,
    COUNT(*) AS count,
    SUM(price) AS total
FROM products
GROUP BY ROLLUP (category, brand)
ORDER BY category NULLS LAST, brand NULLS LAST;
-- 结果包括:
-- 1. 每个(category, brand)组合
-- 2. 每个category的小计
-- 3. 总计

-- CUBE: 所有维度组合
SELECT
    category,
    brand,
    COUNT(*) AS count
FROM products
GROUP BY CUBE (category, brand);
-- 结果包括:
-- (category, brand), (category), (brand), ()

5. 🔄 递归查询

5.1 简单递归

-- 生成数字序列
WITH RECURSIVE numbers AS (
    SELECT 1 AS n
    UNION ALL
    SELECT n + 1 FROM numbers WHERE n < 10
)
SELECT * FROM numbers;

-- 生成日期序列
WITH RECURSIVE dates AS (
    SELECT CURRENT_DATE AS date
    UNION ALL
    SELECT date + 1 FROM dates WHERE date < CURRENT_DATE + 30
)
SELECT * FROM dates;

5.2 树形结构查询

-- 查询分类树
WITH RECURSIVE category_tree AS (
    -- 根节点
    SELECT id, name, parent_id, 0 AS level, name AS path
    FROM categories
    WHERE parent_id IS NULL

    UNION ALL

    -- 递归部分
    SELECT
        c.id,
        c.name,
        c.parent_id,
        ct.level + 1,
        ct.path || ' > ' || c.name
    FROM categories c
    INNER JOIN category_tree ct ON c.parent_id = ct.id
)
SELECT * FROM category_tree ORDER BY path;

-- 查询子树(某个节点的所有子孙)
WITH RECURSIVE subtree AS (
    SELECT * FROM categories WHERE id = 5
    UNION ALL
    SELECT c.*
    FROM categories c
    INNER JOIN subtree s ON c.parent_id = s.id
)
SELECT * FROM subtree;

-- 查询路径(从节点到根)
WITH RECURSIVE path_to_root AS (
    SELECT * FROM categories WHERE id = 10
    UNION ALL
    SELECT c.*
    FROM categories c
    INNER JOIN path_to_root p ON c.id = p.parent_id
)
SELECT * FROM path_to_root;

6. 🎨 数组和JSON查询

6.1 数组操作

-- 数组包含
SELECT * FROM articles WHERE tags @> ARRAY['postgresql'];

-- 数组重叠
SELECT * FROM articles WHERE tags && ARRAY['database', 'sql'];

-- 展开数组
SELECT
    id,
    title,
    unnest(tags) AS tag
FROM articles;

-- 数组聚合
SELECT
    category,
    array_agg(name ORDER BY price DESC) AS products
FROM products
GROUP BY category;

6.2 JSON查询

-- JSON字段提取
SELECT
    name,
    attributes->>'brand' AS brand,
    attributes->>'color' AS color,
    (attributes->>'price')::NUMERIC AS price
FROM products
WHERE attributes @> '{"available": true}';

-- JSON路径查询
SELECT
    name,
    attributes #>> '{specs,cpu,model}' AS cpu_model
FROM products;

-- JSON聚合
SELECT
    category,
    json_agg(json_build_object(
        'name', name,
        'price', price
    )) AS products
FROM products
GROUP BY category;

7. 📈 分析函数

7.1 NTILE分桶

-- 将数据分成4个桶
SELECT
    name,
    price,
    NTILE(4) OVER (ORDER BY price) AS price_quartile
FROM products;

-- 应用: 客户分层
SELECT
    user_id,
    total_spent,
    CASE NTILE(5) OVER (ORDER BY total_spent DESC)
        WHEN 1 THEN 'Top 20%'
        WHEN 2 THEN 'Top 40%'
        WHEN 3 THEN 'Middle 20%'
        WHEN 4 THEN 'Bottom 40%'
        ELSE 'Bottom 20%'
    END AS customer_segment
FROM user_lifetime_value;

7.2 PERCENT_RANK和CUME_DIST

SELECT
    name,
    price,
    PERCENT_RANK() OVER (ORDER BY price) AS percent_rank,
    CUME_DIST() OVER (ORDER BY price) AS cumulative_dist,
    NTILE(100) OVER (ORDER BY price) AS percentile
FROM products;

8. 🔗 LATERAL JOIN

-- LATERAL: 右侧可以引用左侧的列
SELECT
    u.username,
    recent_orders.order_count,
    recent_orders.total_amount
FROM users u
CROSS JOIN LATERAL (
    SELECT
        COUNT(*) AS order_count,
        SUM(total_amount) AS total_amount
    FROM orders o
    WHERE o.user_id = u.id
      AND o.created_at >= NOW() - INTERVAL '30 days'
) AS recent_orders
WHERE recent_orders.order_count > 0;

-- 每个分类的前3名产品
SELECT
    c.name AS category,
    p.name AS product,
    p.price
FROM categories c
CROSS JOIN LATERAL (
    SELECT name, price
    FROM products
    WHERE category_id = c.id
    ORDER BY price DESC
    LIMIT 3
) p;

9. 💡 查询优化技巧

9.1 避免重复计算

-- 不好: 重复计算
SELECT
    (SELECT COUNT(*) FROM orders WHERE user_id = users.id) AS order_count,
    (SELECT SUM(total_amount) FROM orders WHERE user_id = users.id) AS total_spent,
    (SELECT MAX(created_at) FROM orders WHERE user_id = users.id) AS last_order
FROM users;

-- 好: 使用CTE或JOIN
WITH user_stats AS (
    SELECT
        user_id,
        COUNT(*) AS order_count,
        SUM(total_amount) AS total_spent,
        MAX(created_at) AS last_order
    FROM orders
    GROUP BY user_id
)
SELECT
    u.*,
    COALESCE(us.order_count, 0) AS order_count,
    COALESCE(us.total_spent, 0) AS total_spent,
    us.last_order
FROM users u
LEFT JOIN user_stats us ON u.id = us.user_id;

9.2 使用索引友好的查询

-- 不好: 函数导致索引失效
SELECT * FROM users WHERE LOWER(email) = 'alice@example.com';

-- 好: 创建表达式索引或直接匹配
CREATE INDEX idx_users_email_lower ON users(LOWER(email));
-- 或
SELECT * FROM users WHERE email = 'alice@example.com';

-- 不好: LIKE开头通配符
SELECT * FROM users WHERE email LIKE '%@gmail.com';

-- 好: 使用全文搜索或后缀索引
SELECT * FROM users WHERE email LIKE 'alice%';

10. 🆚 PostgreSQL vs MySQL

功能 PostgreSQL MySQL
INTERSECT 支持 不支持
EXCEPT 支持 不支持
FILTER子句 支持 不支持
GROUPING SETS 支持 不支持
ROLLUP/CUBE 支持 8.0+支持
LATERAL JOIN 支持 8.0.14+支持
递归CTE 支持 8.0+支持
数组类型 原生支持 不支持

11. 📝 实战示例

11.1 销售漏斗分析

WITH funnel AS (
    SELECT
        COUNT(DISTINCT user_id) FILTER (WHERE action = 'view') AS viewed,
        COUNT(DISTINCT user_id) FILTER (WHERE action = 'add_to_cart') AS added_to_cart,
        COUNT(DISTINCT user_id) FILTER (WHERE action = 'checkout') AS checked_out,
        COUNT(DISTINCT user_id) FILTER (WHERE action = 'purchase') AS purchased
    FROM user_actions
    WHERE created_at >= NOW() - INTERVAL '7 days'
)
SELECT
    viewed,
    added_to_cart,
    ROUND(100.0 * added_to_cart / viewed, 2) AS add_to_cart_rate,
    checked_out,
    ROUND(100.0 * checked_out / added_to_cart, 2) AS checkout_rate,
    purchased,
    ROUND(100.0 * purchased / checked_out, 2) AS purchase_rate
FROM funnel;

11.2 同期群分析(Cohort Analysis)

WITH cohorts AS (
    SELECT
        user_id,
        DATE_TRUNC('month', created_at) AS cohort_month
    FROM users
),
user_activities AS (
    SELECT
        o.user_id,
        DATE_TRUNC('month', o.created_at) AS activity_month
    FROM orders o
)
SELECT
    c.cohort_month,
    ua.activity_month,
    EXTRACT(MONTH FROM AGE(ua.activity_month, c.cohort_month)) AS months_since_signup,
    COUNT(DISTINCT ua.user_id) AS active_users,
    (SELECT COUNT(*) FROM cohorts WHERE cohort_month = c.cohort_month) AS cohort_size,
    ROUND(100.0 * COUNT(DISTINCT ua.user_id) /
          (SELECT COUNT(*) FROM cohorts WHERE cohort_month = c.cohort_month), 2) AS retention_rate
FROM cohorts c
LEFT JOIN user_activities ua ON c.user_id = ua.user_id
GROUP BY c.cohort_month, ua.activity_month
ORDER BY c.cohort_month, ua.activity_month;

12. 📚 下一步


关键要点:

  • INTERSECT/EXCEPT是PostgreSQL独有
  • FILTER子句比CASE更优雅
  • LATERAL JOIN非常强大
  • 递归CTE处理树形结构
  • 避免重复计算和索引失效