目录

pgsql-16 JSON数据处理

16 - JSON数据处理

1. 📖 概述

PostgreSQL对JSON的支持是其最强大的特性之一,提供了JSON和JSONB两种类型,以及丰富的操作符和函数。

2. 🔤 JSON vs JSONB

2.1 类型对比

特性 JSON JSONB
存储格式 文本格式 二进制格式
插入速度 慢(需要转换)
查询速度
空格保留 保留 不保留
键顺序 保留 不保留
重复键 保留 只保留最后一个
索引支持 表达式索引 GIN/GiST索引
推荐使用

2.2 创建表

-- PostgreSQL
CREATE TABLE products (
    id SERIAL PRIMARY KEY,
    name VARCHAR(200),
    attributes JSONB,           -- 产品属性
    metadata JSON,              -- 元数据
    tags JSONB                  -- 标签
);

-- 插入数据
INSERT INTO products (name, attributes, tags) VALUES
    ('Laptop', '{"brand": "Dell", "cpu": "i7", "ram": "16GB", "price": 999.99}', '["electronics", "computer"]'),
    ('Phone', '{"brand": "Apple", "model": "iPhone 15", "storage": "256GB"}', '["electronics", "mobile"]'),
    ('Book', '{"title": "PostgreSQL Guide", "author": "John Doe", "pages": 500}', '["book", "tech"]');

-- MySQL对比(只有JSON类型,功能较弱)
-- CREATE TABLE products (
--     id INT AUTO_INCREMENT PRIMARY KEY,
--     name VARCHAR(200),
--     attributes JSON
-- );

3. 🔧 JSON操作符

3.1 提取操作符

-- -> : 提取JSON对象字段(返回JSON)
SELECT
    name,
    attributes -> 'brand' AS brand_json,
    attributes -> 'price' AS price_json
FROM products;
-- 返回: "Dell", 999.99 (带引号的JSON)

-- ->> : 提取JSON对象字段(返回TEXT)
SELECT
    name,
    attributes ->> 'brand' AS brand_text,
    attributes ->> 'price' AS price_text
FROM products;
-- 返回: Dell, 999.99 (纯文本)

-- #> : 通过路径提取(返回JSON)
SELECT attributes #> '{brand}' FROM products;

-- #>> : 通过路径提取(返回TEXT)
SELECT attributes #>> '{brand}' FROM products;

-- 数组访问
SELECT tags -> 0 AS first_tag FROM products;        -- "electronics"
SELECT tags ->> 1 AS second_tag FROM products;      -- computer, mobile, tech

3.2 嵌套JSON访问

-- 插入嵌套JSON
INSERT INTO products (name, attributes) VALUES
    ('Server', '{
        "brand": "HP",
        "specs": {
            "cpu": {"model": "Xeon", "cores": 16},
            "memory": {"size": "64GB", "type": "DDR4"},
            "storage": [
                {"type": "SSD", "size": "1TB"},
                {"type": "HDD", "size": "4TB"}
            ]
        }
    }');

-- 访问嵌套字段
SELECT
    name,
    attributes -> 'specs' -> 'cpu' ->> 'model' AS cpu_model,
    attributes -> 'specs' -> 'memory' ->> 'size' AS memory_size,
    attributes #>> '{specs,cpu,cores}' AS cpu_cores,
    attributes #>> '{specs,storage,0,size}' AS first_storage_size
FROM products
WHERE name = 'Server';

3.3 包含操作符(JSONB专用)

-- @> : 包含(左边包含右边)
SELECT * FROM products
WHERE attributes @> '{"brand": "Dell"}';

SELECT * FROM products
WHERE tags @> '["electronics"]';

-- <@ : 被包含(左边被右边包含)
SELECT * FROM products
WHERE '{"brand": "Dell"}' <@ attributes;

-- ? : 是否存在键
SELECT * FROM products WHERE attributes ? 'brand';

-- ?| : 是否存在任一键
SELECT * FROM products WHERE attributes ?| ARRAY['brand', 'model'];

-- ?& : 是否存在所有键
SELECT * FROM products WHERE attributes ?& ARRAY['brand', 'price'];

4. 📊 JSON函数

4.1 创建JSON

-- json_build_object: 构建JSON对象
SELECT json_build_object(
    'name', name,
    'price', attributes ->> 'price',
    'brand', attributes ->> 'brand'
) AS product_json
FROM products;

-- json_build_array: 构建JSON数组
SELECT json_build_array(1, 2, 3, 'test', TRUE);

-- jsonb_build_object
SELECT jsonb_build_object(
    'id', id,
    'name', name,
    'tags', tags
) FROM products;

-- row_to_json: 行转JSON
SELECT row_to_json(users) FROM users LIMIT 1;

-- json_agg: 聚合为JSON数组
SELECT json_agg(json_build_object(
    'name', name,
    'brand', attributes ->> 'brand'
)) AS all_products
FROM products;

-- jsonb_object_agg: 聚合为JSON对象
SELECT jsonb_object_agg(name, attributes ->> 'brand') AS products_brands
FROM products;

4.2 JSON修改函数

-- jsonb_set: 设置值
UPDATE products
SET attributes = jsonb_set(
    attributes,
    '{price}',
    '899.99'
)
WHERE name = 'Laptop';

-- 添加新字段
UPDATE products
SET attributes = jsonb_set(
    attributes,
    '{discount}',
    '0.1',
    true  -- create_missing = true
)
WHERE attributes @> '{"brand": "Dell"}';

-- jsonb_insert: 插入值
UPDATE products
SET attributes = jsonb_insert(
    attributes,
    '{colors, 0}',
    '"Black"'
)
WHERE name = 'Laptop';

-- || : 合并JSON
UPDATE products
SET attributes = attributes || '{"warranty": "2 years"}'
WHERE name = 'Laptop';

-- - : 删除键
UPDATE products
SET attributes = attributes - 'discount'
WHERE name = 'Laptop';

-- #- : 按路径删除
UPDATE products
SET attributes = attributes #- '{specs,storage,1}'
WHERE name = 'Server';

4.3 JSON查询函数

-- jsonb_each: 展开顶层键值对
SELECT * FROM jsonb_each('{"a": 1, "b": 2}'::jsonb);
-- 返回: key | value
--      a   | 1
--      b   | 2

-- jsonb_each_text: 展开为文本
SELECT * FROM jsonb_each_text('{"a": 1, "b": "test"}'::jsonb);

-- jsonb_object_keys: 获取所有键
SELECT jsonb_object_keys(attributes) AS key
FROM products
WHERE name = 'Laptop';

-- jsonb_array_elements: 展开数组
SELECT jsonb_array_elements(tags) AS tag
FROM products;

-- jsonb_array_elements_text
SELECT jsonb_array_elements_text(tags) AS tag
FROM products;

-- jsonb_typeof: 获取类型
SELECT
    name,
    jsonb_typeof(attributes -> 'brand') AS brand_type,
    jsonb_typeof(attributes -> 'price') AS price_type,
    jsonb_typeof(tags) AS tags_type
FROM products;

5. 🔍 复杂查询示例

5.1 示例1: 按JSON字段过滤和排序

-- 查询价格大于500的产品
SELECT
    name,
    attributes ->> 'brand' AS brand,
    (attributes ->> 'price')::NUMERIC AS price
FROM products
WHERE (attributes ->> 'price')::NUMERIC > 500
ORDER BY (attributes ->> 'price')::NUMERIC DESC;

-- 查询有特定标签的产品
SELECT name, tags
FROM products
WHERE tags @> '["electronics"]'
ORDER BY name;

5.2 示例2: JSON数组操作

-- 创建订单表
CREATE TABLE orders (
    id SERIAL PRIMARY KEY,
    user_id INTEGER,
    items JSONB,  -- [{product_id: 1, quantity: 2, price: 99.99}, ...]
    created_at TIMESTAMPTZ DEFAULT NOW()
);

-- 插入订单
INSERT INTO orders (user_id, items) VALUES
    (1, '[
        {"product_id": 1, "quantity": 2, "price": 99.99},
        {"product_id": 2, "quantity": 1, "price": 599.99}
    ]'),
    (2, '[
        {"product_id": 3, "quantity": 5, "price": 15.99}
    ]');

-- 计算订单总额
SELECT
    id,
    user_id,
    (
        SELECT SUM((item ->> 'quantity')::INTEGER * (item ->> 'price')::NUMERIC)
        FROM jsonb_array_elements(items) AS item
    ) AS total_amount
FROM orders;

-- 展开订单项
SELECT
    o.id AS order_id,
    o.user_id,
    item ->> 'product_id' AS product_id,
    item ->> 'quantity' AS quantity,
    item ->> 'price' AS price
FROM orders o,
     jsonb_array_elements(o.items) AS item;

-- 查询包含特定产品的订单
SELECT *
FROM orders
WHERE items @> '[{"product_id": 1}]';

5.3 示例3: 动态属性查询

-- 电商产品属性(不同产品有不同属性)
CREATE TABLE catalog_products (
    id SERIAL PRIMARY KEY,
    name VARCHAR(200),
    category VARCHAR(50),
    attributes JSONB
);

INSERT INTO catalog_products (name, category, attributes) VALUES
    ('MacBook Pro', 'laptop', '{
        "brand": "Apple",
        "screen": "14 inch",
        "cpu": "M3 Pro",
        "ram": "16GB",
        "storage": "512GB SSD"
    }'),
    ('iPhone 15', 'phone', '{
        "brand": "Apple",
        "screen": "6.1 inch",
        "camera": "48MP",
        "storage": "128GB"
    }'),
    ('Sony WH-1000XM5', 'headphones', '{
        "brand": "Sony",
        "type": "Over-ear",
        "wireless": true,
        "noise_cancelling": true,
        "battery": "30 hours"
    }');

-- 查询有特定属性的产品
SELECT name, category
FROM catalog_products
WHERE attributes ? 'wireless' AND attributes ->> 'wireless' = 'true';

-- 按品牌分组统计
SELECT
    attributes ->> 'brand' AS brand,
    COUNT(*) AS product_count,
    json_agg(json_build_object('name', name, 'category', category)) AS products
FROM catalog_products
GROUP BY attributes ->> 'brand';

-- 搜索包含关键字的产品
SELECT
    name,
    category,
    attributes
FROM catalog_products
WHERE attributes::TEXT ILIKE '%Apple%';

6. 📈 索引优化

6.1 GIN索引

-- 创建GIN索引(推荐用于JSONB)
CREATE INDEX idx_products_attributes ON products USING GIN (attributes);
CREATE INDEX idx_products_tags ON products USING GIN (tags);

-- 路径索引
CREATE INDEX idx_products_brand ON products ((attributes ->> 'brand'));
CREATE INDEX idx_products_price ON products (((attributes ->> 'price')::NUMERIC));

-- 查询会使用索引
EXPLAIN ANALYZE
SELECT * FROM products WHERE attributes @> '{"brand": "Dell"}';

EXPLAIN ANALYZE
SELECT * FROM products WHERE attributes ->> 'brand' = 'Dell';

6.2 GIN索引操作符支持

-- GIN索引支持的操作符:
-- @>, <@, ?, ?|, ?&, @?(@@ for jsonpath)

-- 使用索引的查询
SELECT * FROM products WHERE attributes @> '{"brand": "Apple"}';
SELECT * FROM products WHERE attributes ? 'brand';
SELECT * FROM products WHERE attributes ?& ARRAY['brand', 'price'];

-- 不使用索引的查询(需要表达式索引)
SELECT * FROM products WHERE attributes ->> 'brand' = 'Apple';

7. 🎯 JSON路径查询(jsonpath)

PostgreSQL 12+支持SQL/JSON路径语言。

-- @? : 路径存在
SELECT * FROM products
WHERE attributes @? '$.brand ? (@ == "Apple")';

-- @@ : 路径匹配
SELECT * FROM products
WHERE attributes @@ '$.price > 500';

-- jsonb_path_query: 查询JSON路径
SELECT jsonb_path_query(
    '{"name": "John", "age": 30, "skills": ["SQL", "Python"]}'::jsonb,
    '$.skills[*]'
);

-- jsonb_path_query_array
SELECT jsonb_path_query_array(
    attributes,
    '$.specs.storage[*].size'
) AS storage_sizes
FROM products
WHERE name = 'Server';

-- jsonb_path_exists
SELECT name
FROM products
WHERE jsonb_path_exists(attributes, '$.price ? (@ > 500)');

8. 🔄 JSON与关系型数据转换

8.1 JSON转表

-- 创建用户设置表
CREATE TABLE user_settings (
    user_id INTEGER PRIMARY KEY,
    settings JSONB
);

INSERT INTO user_settings VALUES
    (1, '{"theme": "dark", "language": "en", "notifications": {"email": true, "sms": false}}'),
    (2, '{"theme": "light", "language": "zh", "notifications": {"email": false, "sms": true}}');

-- 展开为行
SELECT
    user_id,
    key AS setting_name,
    value AS setting_value
FROM user_settings,
     jsonb_each(settings);

-- 展开嵌套
SELECT
    user_id,
    'notifications.' || key AS setting_name,
    value AS setting_value
FROM user_settings,
     jsonb_each(settings -> 'notifications');

8.2 表转JSON

-- 单行转JSON
SELECT row_to_json(users.*) FROM users WHERE id = 1;

-- 多行聚合为JSON数组
SELECT jsonb_agg(to_jsonb(users.*)) AS all_users FROM users;

-- 自定义JSON结构
SELECT jsonb_build_object(
    'user_id', u.id,
    'user_name', u.username,
    'orders', (
        SELECT jsonb_agg(jsonb_build_object(
            'order_id', o.id,
            'amount', o.total_amount,
            'date', o.created_at
        ))
        FROM orders o
        WHERE o.user_id = u.id
    )
) AS user_with_orders
FROM users u
WHERE u.id = 1;

9. 🆚 PostgreSQL vs MySQL

功能 PostgreSQL MySQL
JSON类型 JSON, JSONB JSON
二进制存储 JSONB 是(8.0+)
索引 GIN, GiST, 表达式索引 虚拟列索引
操作符 ->, ->>, @>, ?, etc. ->>, ->
修改函数 jsonb_set, jsonb_insert, ||, - JSON_SET, JSON_INSERT, JSON_REMOVE
路径查询 jsonpath JSON path
性能 JSONB非常快 较好

10. 🎯 最佳实践

10.1 1. 优先使用JSONB

-- 推荐
CREATE TABLE products (
    id SERIAL PRIMARY KEY,
    attributes JSONB
);

-- 不推荐(除非需要保留格式)
CREATE TABLE products (
    id SERIAL PRIMARY KEY,
    attributes JSON
);

10.2 2. 为常用查询创建索引

-- GIN索引用于包含查询
CREATE INDEX idx_attributes ON products USING GIN (attributes);

-- 表达式索引用于精确匹配
CREATE INDEX idx_brand ON products ((attributes ->> 'brand'));
CREATE INDEX idx_price ON products (((attributes ->> 'price')::NUMERIC));

10.3 3. 验证JSON结构

-- 创建CHECK约束验证JSON结构
CREATE TABLE products (
    id SERIAL PRIMARY KEY,
    attributes JSONB,
    CHECK (
        attributes ? 'name' AND
        attributes ? 'price' AND
        jsonb_typeof(attributes -> 'price') = 'number'
    )
);

-- 或使用触发器
CREATE OR REPLACE FUNCTION validate_product_attributes()
RETURNS TRIGGER AS $$
BEGIN
    IF NOT (NEW.attributes ? 'name' AND NEW.attributes ? 'price') THEN
        RAISE EXCEPTION 'Product must have name and price';
    END IF;
    RETURN NEW;
END;
$$ LANGUAGE plpgsql;

CREATE TRIGGER validate_attributes
BEFORE INSERT OR UPDATE ON products
FOR EACH ROW EXECUTE FUNCTION validate_product_attributes();

10.4 4. 适度使用JSON

-- 好的使用场景:
-- - 动态属性(每个产品属性不同)
-- - 嵌入式文档(审计日志,元数据)
-- - 灵活的配置

-- 不好的使用场景:
-- - 频繁查询和索引的核心业务字段
-- - 需要强类型和约束的数据
-- - 需要外键关联的数据

-- 混合使用
CREATE TABLE products (
    id SERIAL PRIMARY KEY,
    name VARCHAR(200) NOT NULL,        -- 核心字段用列
    price NUMERIC(10,2) NOT NULL,      -- 核心字段用列
    category_id INTEGER REFERENCES categories(id),
    attributes JSONB,                   -- 动态属性用JSON
    metadata JSONB                      -- 元数据用JSON
);

11. 📝 实战示例

11.1 电商产品搜索

-- 创建产品全文搜索
CREATE TABLE products (
    id SERIAL PRIMARY KEY,
    name TEXT NOT NULL,
    category VARCHAR(100),
    specs JSONB
);

-- 插入测试数据
INSERT INTO products (name, category, specs) VALUES
    ('Dell XPS 15', 'laptop', '{"cpu": "i7-12700H", "ram": "16GB", "storage": "512GB SSD", "screen": "15.6 inch", "price": 1299.99}'),
    ('MacBook Pro 14', 'laptop', '{"cpu": "M3 Pro", "ram": "18GB", "storage": "512GB SSD", "screen": "14 inch", "price": 1999.99}'),
    ('ThinkPad X1', 'laptop', '{"cpu": "i7-1260P", "ram": "16GB", "storage": "1TB SSD", "screen": "14 inch", "price": 1499.99}');

-- 多条件搜索
SELECT
    name,
    category,
    specs ->> 'cpu' AS cpu,
    specs ->> 'ram' AS ram,
    (specs ->> 'price')::NUMERIC AS price
FROM products
WHERE category = 'laptop'
  AND (specs ->> 'price')::NUMERIC BETWEEN 1000 AND 2000
  AND specs @> '{"ram": "16GB"}'
ORDER BY (specs ->> 'price')::NUMERIC;

-- 创建视图简化查询
CREATE VIEW product_search AS
SELECT
    id,
    name,
    category,
    specs ->> 'cpu' AS cpu,
    specs ->> 'ram' AS ram,
    specs ->> 'storage' AS storage,
    (specs ->> 'price')::NUMERIC AS price,
    specs
FROM products;

SELECT * FROM product_search
WHERE price < 1500 AND ram = '16GB';

12. 📚 下一步


关键要点:

  • 优先使用JSONB而非JSON
  • 使用GIN索引优化JSONB查询
  • @>操作符用于包含查询
  • 适度使用JSON,核心字段仍用列存储
  • jsonb_set/jsonb_insert用于修改JSON