目录

pgsql-10 子查询与CTE

10 - 子查询与CTE

1. 📖 概述

本章详细介绍PostgreSQL的子查询和CTE(Common Table Expressions,公共表表达式),这些是编写复杂查询的强大工具。

2. 🔍 子查询基础

2.1 标量子查询

标量子查询返回单个值。

-- 查询价格高于平均价格的产品
SELECT
    name,
    price,
    (SELECT AVG(price) FROM products) AS avg_price
FROM products
WHERE price > (SELECT AVG(price) FROM products);

-- 显示每个产品与平均价格的差值
SELECT
    name,
    price,
    price - (SELECT AVG(price) FROM products) AS price_diff
FROM products;

2.2 列子查询

-- 显示每个用户的订单数量
SELECT
    id,
    username,
    email,
    (SELECT COUNT(*) FROM orders WHERE user_id = users.id) AS order_count,
    (SELECT SUM(total_amount) FROM orders WHERE user_id = users.id) AS total_spent
FROM users;

2.3 行子查询

-- PostgreSQL
SELECT * FROM products
WHERE (category_id, price) = (
    SELECT category_id, MAX(price)
    FROM products
    WHERE category_id = 1
    GROUP BY category_id
);

3. 📊 WHERE子查询

3.1 IN子查询

-- 查询有订单的用户
SELECT * FROM users
WHERE id IN (SELECT DISTINCT user_id FROM orders);

-- 查询购买过特定分类产品的用户
SELECT DISTINCT u.*
FROM users u
WHERE u.id IN (
    SELECT o.user_id
    FROM orders o
    INNER JOIN order_items oi ON o.id = oi.order_id
    INNER JOIN products p ON oi.product_id = p.id
    WHERE p.category = 'Electronics'
);

-- NOT IN
SELECT * FROM users
WHERE id NOT IN (SELECT user_id FROM orders WHERE status = 'cancelled');

注意: 使用NOT IN时要小心NULL值:

-- 如果子查询返回NULL,NOT IN会返回空结果
-- 推荐使用NOT EXISTS代替NOT IN

-- 不推荐
SELECT * FROM users
WHERE id NOT IN (SELECT user_id FROM orders WHERE user_id IS NOT NULL);

-- 推荐
SELECT * FROM users u
WHERE NOT EXISTS (SELECT 1 FROM orders o WHERE o.user_id = u.id);

3.2 EXISTS子查询

EXISTS比IN更高效,特别是在大数据集上。

-- 查询有订单的用户
SELECT * FROM users u
WHERE EXISTS (
    SELECT 1 FROM orders o WHERE o.user_id = u.id
);

-- 查询没有订单的用户
SELECT * FROM users u
WHERE NOT EXISTS (
    SELECT 1 FROM orders o WHERE o.user_id = u.id
);

-- 复杂EXISTS示例
SELECT * FROM products p
WHERE EXISTS (
    SELECT 1
    FROM order_items oi
    INNER JOIN orders o ON oi.order_id = o.id
    WHERE oi.product_id = p.id
      AND o.created_at >= NOW() - INTERVAL '30 days'
);

3.3 ANY/ALL子查询

-- ANY: 满足任意一个条件
SELECT * FROM products
WHERE price > ANY (SELECT price FROM products WHERE category = 'Books');
-- 等价于: price大于Books分类中任意一个产品的价格

-- ALL: 满足所有条件
SELECT * FROM products
WHERE price > ALL (SELECT price FROM products WHERE category = 'Books');
-- 等价于: price大于Books分类中所有产品的价格

-- 实际示例
SELECT name, price FROM products
WHERE price >= ALL (SELECT price FROM products);  -- 最贵的产品

4. 🎯 FROM子查询(派生表)

-- 基本派生表
SELECT
    category,
    avg_price,
    product_count
FROM (
    SELECT
        category,
        AVG(price) AS avg_price,
        COUNT(*) AS product_count
    FROM products
    GROUP BY category
) AS category_stats
WHERE product_count > 10
ORDER BY avg_price DESC;

-- 复杂示例:多层嵌套
SELECT
    year,
    month,
    revenue,
    LAG(revenue) OVER (ORDER BY year, month) AS prev_month_revenue,
    revenue - LAG(revenue) OVER (ORDER BY year, month) AS revenue_growth
FROM (
    SELECT
        EXTRACT(YEAR FROM created_at) AS year,
        EXTRACT(MONTH FROM created_at) AS month,
        SUM(total_amount) AS revenue
    FROM orders
    WHERE status = 'completed'
    GROUP BY EXTRACT(YEAR FROM created_at), EXTRACT(MONTH FROM created_at)
) AS monthly_revenue
ORDER BY year, month;

5. 🌲 CTE(Common Table Expression)

CTE使用WITH子句定义临时结果集,提高查询可读性和可维护性。

5.1 基本CTE

-- 单个CTE
WITH high_value_customers AS (
    SELECT
        user_id,
        SUM(total_amount) AS lifetime_value
    FROM orders
    GROUP BY user_id
    HAVING SUM(total_amount) > 1000
)
SELECT
    u.username,
    u.email,
    hvc.lifetime_value
FROM users u
INNER JOIN high_value_customers hvc ON u.id = hvc.user_id
ORDER BY hvc.lifetime_value DESC;

5.2 多个CTE

-- 多个CTE定义
WITH
    active_users AS (
        SELECT id, username, email
        FROM users
        WHERE is_active = TRUE
    ),
    recent_orders AS (
        SELECT user_id, COUNT(*) AS order_count, SUM(total_amount) AS total_spent
        FROM orders
        WHERE created_at >= NOW() - INTERVAL '90 days'
        GROUP BY user_id
    )
SELECT
    au.username,
    au.email,
    COALESCE(ro.order_count, 0) AS recent_order_count,
    COALESCE(ro.total_spent, 0) AS recent_total_spent
FROM active_users au
LEFT JOIN recent_orders ro ON au.id = ro.user_id
ORDER BY ro.total_spent DESC NULLS LAST;

5.3 递归CTE

递归CTE是PostgreSQL的强大功能,用于处理树形或图形数据。

-- 示例:组织架构树
CREATE TABLE employees (
    id SERIAL PRIMARY KEY,
    name VARCHAR(100),
    manager_id INTEGER REFERENCES employees(id)
);

INSERT INTO employees (id, name, manager_id) VALUES
    (1, 'CEO', NULL),
    (2, 'VP Sales', 1),
    (3, 'VP Engineering', 1),
    (4, 'Sales Manager', 2),
    (5, 'Engineer Lead', 3),
    (6, 'Sales Rep', 4),
    (7, 'Engineer', 5);

-- 递归CTE:显示组织架构
WITH RECURSIVE org_chart AS (
    -- 基础查询:顶层员工
    SELECT
        id,
        name,
        manager_id,
        1 AS level,
        name AS path
    FROM employees
    WHERE manager_id IS NULL

    UNION ALL

    -- 递归查询:下属员工
    SELECT
        e.id,
        e.name,
        e.manager_id,
        oc.level + 1,
        oc.path || ' -> ' || e.name
    FROM employees e
    INNER JOIN org_chart oc ON e.manager_id = oc.id
)
SELECT
    REPEAT('  ', level - 1) || name AS hierarchy,
    level,
    path
FROM org_chart
ORDER BY path;

-- 输出类似:
-- CEO (1)
--   VP Sales (2)
--     Sales Manager (3)
--       Sales Rep (4)
--   VP Engineering (2)
--     Engineer Lead (3)
--       Engineer (4)

5.4 递归CTE示例:生成序列

-- 生成1到10的数字序列
WITH RECURSIVE numbers AS (
    SELECT 1 AS n
    UNION ALL
    SELECT n + 1 FROM numbers WHERE n < 10
)
SELECT * FROM numbers;

-- 生成日期序列(最近30天)
WITH RECURSIVE date_series AS (
    SELECT CURRENT_DATE - INTERVAL '29 days' AS date
    UNION ALL
    SELECT date + INTERVAL '1 day'
    FROM date_series
    WHERE date < CURRENT_DATE
)
SELECT
    ds.date,
    COALESCE(COUNT(o.id), 0) AS order_count,
    COALESCE(SUM(o.total_amount), 0) AS daily_revenue
FROM date_series ds
LEFT JOIN orders o ON DATE(o.created_at) = ds.date
GROUP BY ds.date
ORDER BY ds.date;

5.5 递归CTE示例:分类树

-- 分类表(支持多级分类)
CREATE TABLE categories (
    id SERIAL PRIMARY KEY,
    name VARCHAR(100),
    parent_id INTEGER REFERENCES categories(id)
);

INSERT INTO categories (id, name, parent_id) VALUES
    (1, 'Electronics', NULL),
    (2, 'Computers', 1),
    (3, 'Phones', 1),
    (4, 'Laptops', 2),
    (5, 'Desktops', 2),
    (6, 'Gaming Laptops', 4),
    (7, 'Business Laptops', 4);

-- 查询某个分类及其所有子分类
WITH RECURSIVE category_tree AS (
    -- 起始分类
    SELECT id, name, parent_id, 0 AS depth
    FROM categories
    WHERE id = 2  -- Computers

    UNION ALL

    -- 递归查找子分类
    SELECT c.id, c.name, c.parent_id, ct.depth + 1
    FROM categories c
    INNER JOIN category_tree ct ON c.parent_id = ct.id
)
SELECT
    REPEAT('  ', depth) || name AS category_hierarchy,
    depth
FROM category_tree
ORDER BY depth, name;

-- 查询某个分类的所有父级分类(面包屑导航)
WITH RECURSIVE breadcrumb AS (
    SELECT id, name, parent_id, 1 AS level
    FROM categories
    WHERE id = 6  -- Gaming Laptops

    UNION ALL

    SELECT c.id, c.name, c.parent_id, b.level + 1
    FROM categories c
    INNER JOIN breadcrumb b ON c.id = b.parent_id
)
SELECT
    name,
    level
FROM breadcrumb
ORDER BY level DESC;
-- 输出: Electronics > Computers > Laptops > Gaming Laptops

6. 🔄 CTE vs 子查询 vs 临时表

6.1 对比

特性 CTE 子查询 临时表
可读性 中低
可维护性
性能 优化器决定 优化器决定 可控制
递归 支持 不支持 不支持
多次引用 可能重复执行 重复执行 只计算一次
索引 不能创建 不能创建 可以创建

6.2 何时使用

-- 使用CTE:提高可读性
WITH
    monthly_sales AS (
        SELECT
            DATE_TRUNC('month', created_at) AS month,
            SUM(total_amount) AS revenue
        FROM orders
        GROUP BY DATE_TRUNC('month', created_at)
    ),
    monthly_costs AS (
        SELECT
            DATE_TRUNC('month', date) AS month,
            SUM(amount) AS cost
        FROM expenses
        GROUP BY DATE_TRUNC('month', date)
    )
SELECT
    ms.month,
    ms.revenue,
    mc.cost,
    ms.revenue - mc.cost AS profit
FROM monthly_sales ms
LEFT JOIN monthly_costs mc ON ms.month = mc.month
ORDER BY ms.month;

-- 使用临时表:需要多次引用且数据量大
CREATE TEMP TABLE active_users AS
SELECT id, username, email
FROM users
WHERE is_active = TRUE;

CREATE INDEX idx_temp_active_users_id ON active_users(id);

-- 可以多次使用,且有索引支持
SELECT * FROM active_users WHERE id IN (...);
SELECT * FROM active_users au INNER JOIN orders o ON au.id = o.user_id;

7. 🎨 MATERIALIZED CTE (PostgreSQL 12+)

-- 强制物化CTE(避免重复计算)
WITH expensive_calculation AS MATERIALIZED (
    SELECT
        user_id,
        COUNT(*) AS order_count,
        SUM(total_amount) AS total_spent
    FROM orders
    GROUP BY user_id
)
SELECT * FROM expensive_calculation WHERE order_count > 10
UNION ALL
SELECT * FROM expensive_calculation WHERE total_spent > 10000;

-- 不物化CTE
WITH calculation AS NOT MATERIALIZED (
    SELECT * FROM large_table
)
SELECT * FROM calculation WHERE ...;

8. 🚀 实战示例

8.1 示例1:电商销售报表

WITH
    -- 产品销售统计
    product_sales AS (
        SELECT
            p.id,
            p.name,
            p.category,
            COUNT(DISTINCT o.id) AS order_count,
            SUM(oi.quantity) AS total_quantity,
            SUM(oi.quantity * oi.price) AS total_revenue
        FROM products p
        LEFT JOIN order_items oi ON p.id = oi.product_id
        LEFT JOIN orders o ON oi.order_id = o.id
        WHERE o.status = 'completed'
          AND o.created_at >= NOW() - INTERVAL '90 days'
        GROUP BY p.id, p.name, p.category
    ),
    -- 分类汇总
    category_totals AS (
        SELECT
            category,
            SUM(total_revenue) AS category_revenue
        FROM product_sales
        GROUP BY category
    )
SELECT
    ps.name AS product_name,
    ps.category,
    ps.order_count,
    ps.total_quantity,
    ps.total_revenue,
    ct.category_revenue,
    ROUND(100.0 * ps.total_revenue / NULLIF(ct.category_revenue, 0), 2) AS category_percentage
FROM product_sales ps
INNER JOIN category_totals ct ON ps.category = ct.category
WHERE ps.total_revenue > 0
ORDER BY ps.category, ps.total_revenue DESC;

8.2 示例2:用户留存分析

WITH
    -- 每月新用户
    monthly_new_users AS (
        SELECT
            DATE_TRUNC('month', created_at) AS cohort_month,
            COUNT(*) AS new_users,
            ARRAY_AGG(id) AS user_ids
        FROM users
        GROUP BY DATE_TRUNC('month', created_at)
    ),
    -- 用户活动
    user_activity AS (
        SELECT
            user_id,
            DATE_TRUNC('month', created_at) AS activity_month
        FROM orders
        GROUP BY user_id, DATE_TRUNC('month', created_at)
    ),
    -- 留存计算
    retention AS (
        SELECT
            mnu.cohort_month,
            ua.activity_month,
            EXTRACT(MONTH FROM AGE(ua.activity_month, mnu.cohort_month)) AS months_since_signup,
            COUNT(DISTINCT ua.user_id) AS active_users,
            mnu.new_users
        FROM monthly_new_users mnu
        CROSS JOIN LATERAL UNNEST(mnu.user_ids) AS uid
        LEFT JOIN user_activity ua ON ua.user_id = uid
        GROUP BY mnu.cohort_month, ua.activity_month, mnu.new_users
    )
SELECT
    cohort_month,
    months_since_signup,
    active_users,
    new_users,
    ROUND(100.0 * active_users / new_users, 2) AS retention_rate
FROM retention
WHERE months_since_signup IS NOT NULL
ORDER BY cohort_month, months_since_signup;

8.3 示例3:推荐系统(协同过滤)

-- 找出与指定用户购买过相似产品的用户,并推荐产品
WITH
    -- 目标用户购买的产品
    target_user_products AS (
        SELECT DISTINCT product_id
        FROM order_items oi
        INNER JOIN orders o ON oi.order_id = o.id
        WHERE o.user_id = 123
    ),
    -- 购买过相似产品的用户
    similar_users AS (
        SELECT
            o.user_id,
            COUNT(DISTINCT oi.product_id) AS common_products
        FROM order_items oi
        INNER JOIN orders o ON oi.order_id = o.id
        WHERE oi.product_id IN (SELECT product_id FROM target_user_products)
          AND o.user_id != 123
        GROUP BY o.user_id
        HAVING COUNT(DISTINCT oi.product_id) >= 2
        ORDER BY common_products DESC
        LIMIT 50
    ),
    -- 这些用户购买的其他产品
    recommendations AS (
        SELECT
            p.id,
            p.name,
            p.category,
            COUNT(DISTINCT o.user_id) AS purchased_by_similar_users,
            AVG(p.price) AS avg_price
        FROM products p
        INNER JOIN order_items oi ON p.id = oi.product_id
        INNER JOIN orders o ON oi.order_id = o.id
        WHERE o.user_id IN (SELECT user_id FROM similar_users)
          AND p.id NOT IN (SELECT product_id FROM target_user_products)
        GROUP BY p.id, p.name, p.category
    )
SELECT
    name,
    category,
    purchased_by_similar_users AS relevance_score,
    avg_price
FROM recommendations
WHERE purchased_by_similar_users >= 3
ORDER BY relevance_score DESC, avg_price DESC
LIMIT 10;

9. 🆚 PostgreSQL vs MySQL

特性 PostgreSQL MySQL
CTE(WITH) 8.4+ 8.0+
递归CTE 8.4+ 8.0+
MATERIALIZED CTE 12+ 不支持
LATERAL子查询 9.3+ 不支持
递归深度限制 默认无限制 max_recursive_iterations

10. 🎯 最佳实践

10.1 1. 优先使用CTE提高可读性

-- 不好:嵌套子查询
SELECT * FROM (
    SELECT * FROM (
        SELECT user_id, COUNT(*) AS cnt FROM orders GROUP BY user_id
    ) AS inner_query WHERE cnt > 5
) AS outer_query WHERE user_id > 100;

-- 好:使用CTE
WITH
    user_order_counts AS (
        SELECT user_id, COUNT(*) AS order_count
        FROM orders
        GROUP BY user_id
    ),
    frequent_buyers AS (
        SELECT * FROM user_order_counts WHERE order_count > 5
    )
SELECT * FROM frequent_buyers WHERE user_id > 100;

10.2 2. 递归CTE设置终止条件

-- 始终设置递归终止条件,避免无限循环
WITH RECURSIVE series AS (
    SELECT 1 AS n
    UNION ALL
    SELECT n + 1 FROM series WHERE n < 1000  -- 终止条件
)
SELECT * FROM series;

10.3 3. 复杂计算考虑临时表

-- 数据量大且需要多次引用时,使用临时表
CREATE TEMP TABLE daily_stats AS
SELECT
    DATE(created_at) AS date,
    COUNT(*) AS order_count,
    SUM(total_amount) AS revenue
FROM orders
GROUP BY DATE(created_at);

CREATE INDEX idx_daily_stats_date ON daily_stats(date);

-- 多次使用
SELECT * FROM daily_stats WHERE date >= '2024-01-01';
SELECT AVG(revenue) FROM daily_stats;

11. 📝 练习题

  1. 使用CTE查询每个分类中价格最高的3个产品
  2. 使用递归CTE构建评论回复树
  3. 计算每月新用户和活跃用户
  4. 找出连续3天都有订单的用户
  5. 生成销售环比增长报表

11.1 参考答案

-- 1. 每个分类价格最高的3个产品
WITH ranked_products AS (
    SELECT
        *,
        ROW_NUMBER() OVER (PARTITION BY category ORDER BY price DESC) AS rn
    FROM products
)
SELECT category, name, price
FROM ranked_products
WHERE rn <= 3
ORDER BY category, rn;

-- 2. 评论回复树(假设有comments表)
-- CREATE TABLE comments (
--     id SERIAL PRIMARY KEY,
--     content TEXT,
--     parent_id INTEGER REFERENCES comments(id),
--     created_at TIMESTAMPTZ DEFAULT NOW()
-- );

WITH RECURSIVE comment_tree AS (
    SELECT id, content, parent_id, 0 AS depth, ARRAY[id] AS path
    FROM comments
    WHERE parent_id IS NULL

    UNION ALL

    SELECT c.id, c.content, c.parent_id, ct.depth + 1, ct.path || c.id
    FROM comments c
    INNER JOIN comment_tree ct ON c.parent_id = ct.id
)
SELECT
    REPEAT('  ', depth) || content AS hierarchy,
    depth
FROM comment_tree
ORDER BY path;

-- 3. 每月新用户和活跃用户
WITH months AS (
    SELECT generate_series(
        DATE_TRUNC('month', NOW() - INTERVAL '12 months'),
        DATE_TRUNC('month', NOW()),
        INTERVAL '1 month'
    ) AS month
),
new_users AS (
    SELECT
        DATE_TRUNC('month', created_at) AS month,
        COUNT(*) AS new_count
    FROM users
    GROUP BY DATE_TRUNC('month', created_at)
),
active_users AS (
    SELECT
        DATE_TRUNC('month', o.created_at) AS month,
        COUNT(DISTINCT o.user_id) AS active_count
    FROM orders o
    GROUP BY DATE_TRUNC('month', o.created_at)
)
SELECT
    m.month,
    COALESCE(nu.new_count, 0) AS new_users,
    COALESCE(au.active_count, 0) AS active_users
FROM months m
LEFT JOIN new_users nu ON m.month = nu.month
LEFT JOIN active_users au ON m.month = au.month
ORDER BY m.month;

12. 📚 下一步


关键要点:

  • CTE提高查询可读性和可维护性
  • 递归CTE处理树形/图形数据
  • EXISTS通常比IN更高效
  • NOT IN要小心NULL值问题
  • MATERIALIZED CTE可以优化性能