目录

pgsql-09 多表连接查询

09 - 多表连接查询

1. 📖 概述

多表连接(JOIN)是关系型数据库的核心功能。本章详细介绍PostgreSQL的各种JOIN类型及其应用场景。

2. 🔗 JOIN类型概览

LEFT JOIN:  返回左表所有记录 + 右表匹配记录
RIGHT JOIN: 返回右表所有记录 + 左表匹配记录
INNER JOIN: 只返回两表匹配的记录
FULL JOIN:  返回两表所有记录
CROSS JOIN: 返回笛卡尔积

3. 📊 测试数据

-- 创建示例表
CREATE TABLE users (
    id SERIAL PRIMARY KEY,
    username VARCHAR(50),
    email VARCHAR(100)
);

CREATE TABLE orders (
    id SERIAL PRIMARY KEY,
    user_id INTEGER,
    total_amount NUMERIC(10, 2),
    created_at TIMESTAMPTZ DEFAULT NOW()
);

CREATE TABLE products (
    id SERIAL PRIMARY KEY,
    name VARCHAR(200),
    price NUMERIC(10, 2)
);

CREATE TABLE order_items (
    id SERIAL PRIMARY KEY,
    order_id INTEGER,
    product_id INTEGER,
    quantity INTEGER,
    price NUMERIC(10, 2)
);

-- 插入测试数据
INSERT INTO users (username, email) VALUES
    ('alice', 'alice@example.com'),
    ('bob', 'bob@example.com'),
    ('charlie', 'charlie@example.com'),
    ('david', 'david@example.com');

INSERT INTO orders (user_id, total_amount) VALUES
    (1, 99.99),
    (1, 150.00),
    (2, 75.50),
    (3, 200.00);

INSERT INTO products (name, price) VALUES
    ('Laptop', 999.99),
    ('Mouse', 29.99),
    ('Keyboard', 79.99),
    ('Monitor', 299.99);

INSERT INTO order_items (order_id, product_id, quantity, price) VALUES
    (1, 2, 1, 29.99),
    (1, 3, 1, 79.99),
    (2, 1, 1, 999.99),
    (3, 4, 1, 299.99);

4. 🔵 INNER JOIN(内连接)

返回两表都有匹配的记录。

-- 基本INNER JOIN
SELECT
    u.username,
    u.email,
    o.id AS order_id,
    o.total_amount
FROM users u
INNER JOIN orders o ON u.id = o.user_id;

-- 简写(省略INNER)
SELECT
    u.username,
    o.total_amount
FROM users u
JOIN orders o ON u.id = o.user_id;

-- 多条件JOIN
SELECT
    u.username,
    o.total_amount,
    o.created_at
FROM users u
INNER JOIN orders o ON u.id = o.user_id
    AND o.total_amount > 100
    AND o.created_at >= '2024-01-01';

-- 三表JOIN
SELECT
    u.username,
    o.id AS order_id,
    p.name AS product_name,
    oi.quantity,
    oi.price
FROM users u
INNER JOIN orders o ON u.id = o.user_id
INNER JOIN order_items oi ON o.id = oi.order_id
INNER JOIN products p ON oi.product_id = p.id;

-- 结果: 只显示有订单的用户

5. ⬅️ LEFT JOIN(左外连接)

返回左表所有记录,右表没有匹配的显示NULL。

-- 基本LEFT JOIN
SELECT
    u.username,
    u.email,
    o.id AS order_id,
    o.total_amount
FROM users u
LEFT JOIN orders o ON u.id = o.user_id;

-- 结果: 包含所有用户,没有订单的用户order_id和total_amount为NULL

-- 找出没有订单的用户
SELECT
    u.username,
    u.email
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE o.id IS NULL;

-- 统计每个用户的订单数(包括0个订单的用户)
SELECT
    u.username,
    COUNT(o.id) AS order_count,
    COALESCE(SUM(o.total_amount), 0) AS total_spent
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.username
ORDER BY total_spent DESC;

-- 多个LEFT JOIN
SELECT
    u.username,
    COUNT(DISTINCT o.id) AS order_count,
    COUNT(DISTINCT oi.id) AS item_count,
    COALESCE(SUM(oi.price * oi.quantity), 0) AS total
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
LEFT JOIN order_items oi ON o.id = oi.order_id
GROUP BY u.id, u.username;

6. ➡️ RIGHT JOIN(右外连接)

返回右表所有记录,左表没有匹配的显示NULL。

-- 基本RIGHT JOIN
SELECT
    u.username,
    o.id AS order_id,
    o.total_amount
FROM users u
RIGHT JOIN orders o ON u.id = o.user_id;

-- 等价的LEFT JOIN写法(更常用)
SELECT
    u.username,
    o.id AS order_id,
    o.total_amount
FROM orders o
LEFT JOIN users u ON o.user_id = u.id;

-- 注意: RIGHT JOIN很少使用,通常改写成LEFT JOIN

7. ⬌ FULL JOIN(全外连接)

返回两表所有记录,没有匹配的显示NULL。

-- 基本FULL JOIN
SELECT
    u.username,
    o.id AS order_id,
    o.total_amount
FROM users u
FULL OUTER JOIN orders o ON u.id = o.user_id;

-- 结果:
-- - 所有用户(包括没有订单的)
-- - 所有订单(包括没有关联用户的,如果有的话)

-- 找出不匹配的记录
SELECT
    u.username,
    o.id AS order_id
FROM users u
FULL OUTER JOIN orders o ON u.id = o.user_id
WHERE u.id IS NULL OR o.id IS NULL;

-- MySQL不支持FULL JOIN,需要用UNION模拟

8. ✖️ CROSS JOIN(交叉连接)

返回两表的笛卡尔积。

-- 显式CROSS JOIN
SELECT
    u.username,
    p.name,
    p.price
FROM users u
CROSS JOIN products p;

-- 隐式CROSS JOIN(不推荐)
SELECT
    u.username,
    p.name
FROM users u, products p;

-- 实际应用: 生成组合
-- 示例: 为每个用户生成所有产品的推荐
SELECT
    u.id AS user_id,
    u.username,
    p.id AS product_id,
    p.name,
    p.price
FROM users u
CROSS JOIN products p
WHERE p.price < 100  -- 添加条件过滤
ORDER BY u.id, p.price;

9. 🔄 SELF JOIN(自连接)

表与自己连接。

-- 示例: 员工-经理关系
CREATE TABLE employees (
    id SERIAL PRIMARY KEY,
    name VARCHAR(100),
    manager_id INTEGER REFERENCES employees(id)
);

INSERT INTO employees (name, manager_id) VALUES
    ('Alice', NULL),      -- CEO
    ('Bob', 1),           -- 报告给Alice
    ('Charlie', 1),       -- 报告给Alice
    ('David', 2),         -- 报告给Bob
    ('Eve', 2);           -- 报告给Bob

-- 查询员工及其经理
SELECT
    e.name AS employee,
    m.name AS manager
FROM employees e
LEFT JOIN employees m ON e.manager_id = m.id;

-- 查询与某个员工同级的其他员工
SELECT
    e1.name AS employee1,
    e2.name AS employee2,
    m.name AS their_manager
FROM employees e1
INNER JOIN employees e2 ON e1.manager_id = e2.manager_id AND e1.id < e2.id
LEFT JOIN employees m ON e1.manager_id = m.id;

10. 🎯 JOIN优化技巧

10.1 1. 使用适当的JOIN类型

-- 不好: 使用LEFT JOIN但WHERE过滤掉了NULL
SELECT u.*, o.*
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE o.total_amount > 100;  -- 这里把LEFT JOIN变成了INNER JOIN

-- 好: 直接使用INNER JOIN
SELECT u.*, o.*
FROM users u
INNER JOIN orders o ON u.id = o.user_id
WHERE o.total_amount > 100;

10.2 2. JOIN顺序很重要

-- PostgreSQL优化器会自动调整顺序,但理解原理很重要

-- 原则: 从小表开始JOIN
-- 好: 先过滤再JOIN
SELECT u.*, o.*
FROM (
    SELECT * FROM orders WHERE created_at >= '2024-01-01'
) o
INNER JOIN users u ON o.user_id = u.id;

-- 或使用WHERE提前过滤
SELECT u.*, o.*
FROM orders o
INNER JOIN users u ON o.user_id = u.id
WHERE o.created_at >= '2024-01-01';

10.3 3. 使用索引

-- 确保JOIN列有索引
CREATE INDEX idx_orders_user_id ON orders(user_id);
CREATE INDEX idx_order_items_order_id ON order_items(order_id);
CREATE INDEX idx_order_items_product_id ON order_items(product_id);

-- 检查执行计划
EXPLAIN ANALYZE
SELECT u.*, o.*
FROM users u
INNER JOIN orders o ON u.id = o.user_id;

10.4 4. 避免SELECT *

-- 不好: 查询所有列
SELECT *
FROM users u
INNER JOIN orders o ON u.id = o.user_id;

-- 好: 只查询需要的列
SELECT
    u.username,
    u.email,
    o.id,
    o.total_amount,
    o.created_at
FROM users u
INNER JOIN orders o ON u.id = o.user_id;

11. 📝 实战示例

11.1 示例1: 订单详情查询

-- 查询订单完整信息(用户、订单、商品)
SELECT
    o.id AS order_id,
    o.created_at,
    u.username,
    u.email,
    json_agg(json_build_object(
        'product_id', p.id,
        'product_name', p.name,
        'quantity', oi.quantity,
        'unit_price', oi.price,
        'subtotal', oi.quantity * oi.price
    )) AS items,
    o.total_amount
FROM orders o
INNER JOIN users u ON o.user_id = u.id
INNER JOIN order_items oi ON o.id = oi.order_id
INNER JOIN products p ON oi.product_id = p.id
GROUP BY o.id, o.created_at, u.username, u.email, o.total_amount
ORDER BY o.created_at DESC;

11.2 示例2: 用户购买统计

-- 用户购买次数和总金额
SELECT
    u.id,
    u.username,
    u.email,
    COUNT(DISTINCT o.id) AS order_count,
    COUNT(DISTINCT oi.product_id) AS unique_products,
    COALESCE(SUM(o.total_amount), 0) AS lifetime_value,
    MAX(o.created_at) AS last_order_date,
    CASE
        WHEN COUNT(o.id) = 0 THEN 'Never Ordered'
        WHEN COUNT(o.id) = 1 THEN 'New Customer'
        WHEN COUNT(o.id) < 5 THEN 'Regular'
        ELSE 'VIP'
    END AS customer_tier
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
LEFT JOIN order_items oi ON o.id = oi.order_id
GROUP BY u.id, u.username, u.email
ORDER BY lifetime_value DESC;

11.3 示例3: 产品销售排行

-- 产品销售排行榜
SELECT
    p.id,
    p.name,
    p.price,
    COUNT(DISTINCT oi.order_id) AS times_ordered,
    SUM(oi.quantity) AS total_quantity_sold,
    SUM(oi.quantity * oi.price) AS total_revenue,
    AVG(oi.price) AS avg_selling_price
FROM products p
LEFT JOIN order_items oi ON p.id = oi.product_id
GROUP BY p.id, p.name, p.price
ORDER BY total_revenue DESC NULLS LAST;

11.4 示例4: 未购买特定商品的用户

-- 查找没有购买过Laptop的用户
SELECT DISTINCT u.*
FROM users u
WHERE NOT EXISTS (
    SELECT 1
    FROM orders o
    INNER JOIN order_items oi ON o.id = oi.order_id
    INNER JOIN products p ON oi.product_id = p.id
    WHERE o.user_id = u.id
      AND p.name = 'Laptop'
);

-- 或使用LEFT JOIN + IS NULL
SELECT u.*
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
LEFT JOIN order_items oi ON o.id = oi.order_id
LEFT JOIN products p ON oi.product_id = p.id AND p.name = 'Laptop'
WHERE p.id IS NULL
GROUP BY u.id;

11.5 示例5: 月度销售报表

-- 每月销售汇总
SELECT
    DATE_TRUNC('month', o.created_at) AS month,
    COUNT(DISTINCT o.id) AS order_count,
    COUNT(DISTINCT o.user_id) AS unique_customers,
    SUM(o.total_amount) AS revenue,
    AVG(o.total_amount) AS avg_order_value,
    COUNT(DISTINCT oi.product_id) AS unique_products_sold
FROM orders o
LEFT JOIN order_items oi ON o.id = oi.order_id
GROUP BY DATE_TRUNC('month', o.created_at)
ORDER BY month DESC;

12. 🆚 PostgreSQL vs MySQL

特性 PostgreSQL MySQL
INNER JOIN 支持 支持
LEFT/RIGHT JOIN 支持 支持
FULL OUTER JOIN 支持 不支持
CROSS JOIN 支持 支持
NATURAL JOIN 支持 支持
LATERAL JOIN 支持 8.0.14+支持
JOIN优化器 非常强大 良好

13. 🎯 最佳实践

  1. 明确指定JOIN类型: 不要省略INNER,写清楚
  2. 使用表别名: 提高可读性
  3. 为JOIN列创建索引: 提高性能
  4. 避免过多JOIN: 超过5个表考虑重构
  5. 使用EXPLAIN分析: 检查执行计划
  6. LEFT JOIN注意NULL: WHERE条件可能改变JOIN语义
  7. 优先使用INNER JOIN: 性能通常更好

14. 📚 下一步


关键要点:

  • INNER JOIN返回匹配记录
  • LEFT JOIN返回左表所有记录
  • FULL JOIN PostgreSQL独有
  • 为JOIN列创建索引
  • 使用EXPLAIN检查性能
  • 避免过多表连接