pgsql-16 JSON数据处理
目录
16 - JSON数据处理
1. 📖 概述
PostgreSQL对JSON的支持是其最强大的特性之一,提供了JSON和JSONB两种类型,以及丰富的操作符和函数。
2. 🔤 JSON vs JSONB
2.1 类型对比
| 特性 | JSON | JSONB |
|---|---|---|
| 存储格式 | 文本格式 | 二进制格式 |
| 插入速度 | 快 | 慢(需要转换) |
| 查询速度 | 慢 | 快 |
| 空格保留 | 保留 | 不保留 |
| 键顺序 | 保留 | 不保留 |
| 重复键 | 保留 | 只保留最后一个 |
| 索引支持 | 表达式索引 | GIN/GiST索引 |
| 推荐使用 | ❌ | ✅ |
2.2 创建表
-- PostgreSQL
CREATE TABLE products (
id SERIAL PRIMARY KEY,
name VARCHAR(200),
attributes JSONB, -- 产品属性
metadata JSON, -- 元数据
tags JSONB -- 标签
);
-- 插入数据
INSERT INTO products (name, attributes, tags) VALUES
('Laptop', '{"brand": "Dell", "cpu": "i7", "ram": "16GB", "price": 999.99}', '["electronics", "computer"]'),
('Phone', '{"brand": "Apple", "model": "iPhone 15", "storage": "256GB"}', '["electronics", "mobile"]'),
('Book', '{"title": "PostgreSQL Guide", "author": "John Doe", "pages": 500}', '["book", "tech"]');
-- MySQL对比(只有JSON类型,功能较弱)
-- CREATE TABLE products (
-- id INT AUTO_INCREMENT PRIMARY KEY,
-- name VARCHAR(200),
-- attributes JSON
-- );
3. 🔧 JSON操作符
3.1 提取操作符
-- -> : 提取JSON对象字段(返回JSON)
SELECT
name,
attributes -> 'brand' AS brand_json,
attributes -> 'price' AS price_json
FROM products;
-- 返回: "Dell", 999.99 (带引号的JSON)
-- ->> : 提取JSON对象字段(返回TEXT)
SELECT
name,
attributes ->> 'brand' AS brand_text,
attributes ->> 'price' AS price_text
FROM products;
-- 返回: Dell, 999.99 (纯文本)
-- #> : 通过路径提取(返回JSON)
SELECT attributes #> '{brand}' FROM products;
-- #>> : 通过路径提取(返回TEXT)
SELECT attributes #>> '{brand}' FROM products;
-- 数组访问
SELECT tags -> 0 AS first_tag FROM products; -- "electronics"
SELECT tags ->> 1 AS second_tag FROM products; -- computer, mobile, tech
3.2 嵌套JSON访问
-- 插入嵌套JSON
INSERT INTO products (name, attributes) VALUES
('Server', '{
"brand": "HP",
"specs": {
"cpu": {"model": "Xeon", "cores": 16},
"memory": {"size": "64GB", "type": "DDR4"},
"storage": [
{"type": "SSD", "size": "1TB"},
{"type": "HDD", "size": "4TB"}
]
}
}');
-- 访问嵌套字段
SELECT
name,
attributes -> 'specs' -> 'cpu' ->> 'model' AS cpu_model,
attributes -> 'specs' -> 'memory' ->> 'size' AS memory_size,
attributes #>> '{specs,cpu,cores}' AS cpu_cores,
attributes #>> '{specs,storage,0,size}' AS first_storage_size
FROM products
WHERE name = 'Server';
3.3 包含操作符(JSONB专用)
-- @> : 包含(左边包含右边)
SELECT * FROM products
WHERE attributes @> '{"brand": "Dell"}';
SELECT * FROM products
WHERE tags @> '["electronics"]';
-- <@ : 被包含(左边被右边包含)
SELECT * FROM products
WHERE '{"brand": "Dell"}' <@ attributes;
-- ? : 是否存在键
SELECT * FROM products WHERE attributes ? 'brand';
-- ?| : 是否存在任一键
SELECT * FROM products WHERE attributes ?| ARRAY['brand', 'model'];
-- ?& : 是否存在所有键
SELECT * FROM products WHERE attributes ?& ARRAY['brand', 'price'];
4. 📊 JSON函数
4.1 创建JSON
-- json_build_object: 构建JSON对象
SELECT json_build_object(
'name', name,
'price', attributes ->> 'price',
'brand', attributes ->> 'brand'
) AS product_json
FROM products;
-- json_build_array: 构建JSON数组
SELECT json_build_array(1, 2, 3, 'test', TRUE);
-- jsonb_build_object
SELECT jsonb_build_object(
'id', id,
'name', name,
'tags', tags
) FROM products;
-- row_to_json: 行转JSON
SELECT row_to_json(users) FROM users LIMIT 1;
-- json_agg: 聚合为JSON数组
SELECT json_agg(json_build_object(
'name', name,
'brand', attributes ->> 'brand'
)) AS all_products
FROM products;
-- jsonb_object_agg: 聚合为JSON对象
SELECT jsonb_object_agg(name, attributes ->> 'brand') AS products_brands
FROM products;
4.2 JSON修改函数
-- jsonb_set: 设置值
UPDATE products
SET attributes = jsonb_set(
attributes,
'{price}',
'899.99'
)
WHERE name = 'Laptop';
-- 添加新字段
UPDATE products
SET attributes = jsonb_set(
attributes,
'{discount}',
'0.1',
true -- create_missing = true
)
WHERE attributes @> '{"brand": "Dell"}';
-- jsonb_insert: 插入值
UPDATE products
SET attributes = jsonb_insert(
attributes,
'{colors, 0}',
'"Black"'
)
WHERE name = 'Laptop';
-- || : 合并JSON
UPDATE products
SET attributes = attributes || '{"warranty": "2 years"}'
WHERE name = 'Laptop';
-- - : 删除键
UPDATE products
SET attributes = attributes - 'discount'
WHERE name = 'Laptop';
-- #- : 按路径删除
UPDATE products
SET attributes = attributes #- '{specs,storage,1}'
WHERE name = 'Server';
4.3 JSON查询函数
-- jsonb_each: 展开顶层键值对
SELECT * FROM jsonb_each('{"a": 1, "b": 2}'::jsonb);
-- 返回: key | value
-- a | 1
-- b | 2
-- jsonb_each_text: 展开为文本
SELECT * FROM jsonb_each_text('{"a": 1, "b": "test"}'::jsonb);
-- jsonb_object_keys: 获取所有键
SELECT jsonb_object_keys(attributes) AS key
FROM products
WHERE name = 'Laptop';
-- jsonb_array_elements: 展开数组
SELECT jsonb_array_elements(tags) AS tag
FROM products;
-- jsonb_array_elements_text
SELECT jsonb_array_elements_text(tags) AS tag
FROM products;
-- jsonb_typeof: 获取类型
SELECT
name,
jsonb_typeof(attributes -> 'brand') AS brand_type,
jsonb_typeof(attributes -> 'price') AS price_type,
jsonb_typeof(tags) AS tags_type
FROM products;
5. 🔍 复杂查询示例
5.1 示例1: 按JSON字段过滤和排序
-- 查询价格大于500的产品
SELECT
name,
attributes ->> 'brand' AS brand,
(attributes ->> 'price')::NUMERIC AS price
FROM products
WHERE (attributes ->> 'price')::NUMERIC > 500
ORDER BY (attributes ->> 'price')::NUMERIC DESC;
-- 查询有特定标签的产品
SELECT name, tags
FROM products
WHERE tags @> '["electronics"]'
ORDER BY name;
5.2 示例2: JSON数组操作
-- 创建订单表
CREATE TABLE orders (
id SERIAL PRIMARY KEY,
user_id INTEGER,
items JSONB, -- [{product_id: 1, quantity: 2, price: 99.99}, ...]
created_at TIMESTAMPTZ DEFAULT NOW()
);
-- 插入订单
INSERT INTO orders (user_id, items) VALUES
(1, '[
{"product_id": 1, "quantity": 2, "price": 99.99},
{"product_id": 2, "quantity": 1, "price": 599.99}
]'),
(2, '[
{"product_id": 3, "quantity": 5, "price": 15.99}
]');
-- 计算订单总额
SELECT
id,
user_id,
(
SELECT SUM((item ->> 'quantity')::INTEGER * (item ->> 'price')::NUMERIC)
FROM jsonb_array_elements(items) AS item
) AS total_amount
FROM orders;
-- 展开订单项
SELECT
o.id AS order_id,
o.user_id,
item ->> 'product_id' AS product_id,
item ->> 'quantity' AS quantity,
item ->> 'price' AS price
FROM orders o,
jsonb_array_elements(o.items) AS item;
-- 查询包含特定产品的订单
SELECT *
FROM orders
WHERE items @> '[{"product_id": 1}]';
5.3 示例3: 动态属性查询
-- 电商产品属性(不同产品有不同属性)
CREATE TABLE catalog_products (
id SERIAL PRIMARY KEY,
name VARCHAR(200),
category VARCHAR(50),
attributes JSONB
);
INSERT INTO catalog_products (name, category, attributes) VALUES
('MacBook Pro', 'laptop', '{
"brand": "Apple",
"screen": "14 inch",
"cpu": "M3 Pro",
"ram": "16GB",
"storage": "512GB SSD"
}'),
('iPhone 15', 'phone', '{
"brand": "Apple",
"screen": "6.1 inch",
"camera": "48MP",
"storage": "128GB"
}'),
('Sony WH-1000XM5', 'headphones', '{
"brand": "Sony",
"type": "Over-ear",
"wireless": true,
"noise_cancelling": true,
"battery": "30 hours"
}');
-- 查询有特定属性的产品
SELECT name, category
FROM catalog_products
WHERE attributes ? 'wireless' AND attributes ->> 'wireless' = 'true';
-- 按品牌分组统计
SELECT
attributes ->> 'brand' AS brand,
COUNT(*) AS product_count,
json_agg(json_build_object('name', name, 'category', category)) AS products
FROM catalog_products
GROUP BY attributes ->> 'brand';
-- 搜索包含关键字的产品
SELECT
name,
category,
attributes
FROM catalog_products
WHERE attributes::TEXT ILIKE '%Apple%';
6. 📈 索引优化
6.1 GIN索引
-- 创建GIN索引(推荐用于JSONB)
CREATE INDEX idx_products_attributes ON products USING GIN (attributes);
CREATE INDEX idx_products_tags ON products USING GIN (tags);
-- 路径索引
CREATE INDEX idx_products_brand ON products ((attributes ->> 'brand'));
CREATE INDEX idx_products_price ON products (((attributes ->> 'price')::NUMERIC));
-- 查询会使用索引
EXPLAIN ANALYZE
SELECT * FROM products WHERE attributes @> '{"brand": "Dell"}';
EXPLAIN ANALYZE
SELECT * FROM products WHERE attributes ->> 'brand' = 'Dell';
6.2 GIN索引操作符支持
-- GIN索引支持的操作符:
-- @>, <@, ?, ?|, ?&, @?(@@ for jsonpath)
-- 使用索引的查询
SELECT * FROM products WHERE attributes @> '{"brand": "Apple"}';
SELECT * FROM products WHERE attributes ? 'brand';
SELECT * FROM products WHERE attributes ?& ARRAY['brand', 'price'];
-- 不使用索引的查询(需要表达式索引)
SELECT * FROM products WHERE attributes ->> 'brand' = 'Apple';
7. 🎯 JSON路径查询(jsonpath)
PostgreSQL 12+支持SQL/JSON路径语言。
-- @? : 路径存在
SELECT * FROM products
WHERE attributes @? '$.brand ? (@ == "Apple")';
-- @@ : 路径匹配
SELECT * FROM products
WHERE attributes @@ '$.price > 500';
-- jsonb_path_query: 查询JSON路径
SELECT jsonb_path_query(
'{"name": "John", "age": 30, "skills": ["SQL", "Python"]}'::jsonb,
'$.skills[*]'
);
-- jsonb_path_query_array
SELECT jsonb_path_query_array(
attributes,
'$.specs.storage[*].size'
) AS storage_sizes
FROM products
WHERE name = 'Server';
-- jsonb_path_exists
SELECT name
FROM products
WHERE jsonb_path_exists(attributes, '$.price ? (@ > 500)');
8. 🔄 JSON与关系型数据转换
8.1 JSON转表
-- 创建用户设置表
CREATE TABLE user_settings (
user_id INTEGER PRIMARY KEY,
settings JSONB
);
INSERT INTO user_settings VALUES
(1, '{"theme": "dark", "language": "en", "notifications": {"email": true, "sms": false}}'),
(2, '{"theme": "light", "language": "zh", "notifications": {"email": false, "sms": true}}');
-- 展开为行
SELECT
user_id,
key AS setting_name,
value AS setting_value
FROM user_settings,
jsonb_each(settings);
-- 展开嵌套
SELECT
user_id,
'notifications.' || key AS setting_name,
value AS setting_value
FROM user_settings,
jsonb_each(settings -> 'notifications');
8.2 表转JSON
-- 单行转JSON
SELECT row_to_json(users.*) FROM users WHERE id = 1;
-- 多行聚合为JSON数组
SELECT jsonb_agg(to_jsonb(users.*)) AS all_users FROM users;
-- 自定义JSON结构
SELECT jsonb_build_object(
'user_id', u.id,
'user_name', u.username,
'orders', (
SELECT jsonb_agg(jsonb_build_object(
'order_id', o.id,
'amount', o.total_amount,
'date', o.created_at
))
FROM orders o
WHERE o.user_id = u.id
)
) AS user_with_orders
FROM users u
WHERE u.id = 1;
9. 🆚 PostgreSQL vs MySQL
| 功能 | PostgreSQL | MySQL |
|---|---|---|
| JSON类型 | JSON, JSONB | JSON |
| 二进制存储 | JSONB | 是(8.0+) |
| 索引 | GIN, GiST, 表达式索引 | 虚拟列索引 |
| 操作符 | ->, ->>, @>, ?, etc. |
->>, -> |
| 修改函数 | jsonb_set, jsonb_insert, ||, - |
JSON_SET, JSON_INSERT, JSON_REMOVE |
| 路径查询 | jsonpath | JSON path |
| 性能 | JSONB非常快 | 较好 |
10. 🎯 最佳实践
10.1 1. 优先使用JSONB
-- 推荐
CREATE TABLE products (
id SERIAL PRIMARY KEY,
attributes JSONB
);
-- 不推荐(除非需要保留格式)
CREATE TABLE products (
id SERIAL PRIMARY KEY,
attributes JSON
);
10.2 2. 为常用查询创建索引
-- GIN索引用于包含查询
CREATE INDEX idx_attributes ON products USING GIN (attributes);
-- 表达式索引用于精确匹配
CREATE INDEX idx_brand ON products ((attributes ->> 'brand'));
CREATE INDEX idx_price ON products (((attributes ->> 'price')::NUMERIC));
10.3 3. 验证JSON结构
-- 创建CHECK约束验证JSON结构
CREATE TABLE products (
id SERIAL PRIMARY KEY,
attributes JSONB,
CHECK (
attributes ? 'name' AND
attributes ? 'price' AND
jsonb_typeof(attributes -> 'price') = 'number'
)
);
-- 或使用触发器
CREATE OR REPLACE FUNCTION validate_product_attributes()
RETURNS TRIGGER AS $$
BEGIN
IF NOT (NEW.attributes ? 'name' AND NEW.attributes ? 'price') THEN
RAISE EXCEPTION 'Product must have name and price';
END IF;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER validate_attributes
BEFORE INSERT OR UPDATE ON products
FOR EACH ROW EXECUTE FUNCTION validate_product_attributes();
10.4 4. 适度使用JSON
-- 好的使用场景:
-- - 动态属性(每个产品属性不同)
-- - 嵌入式文档(审计日志,元数据)
-- - 灵活的配置
-- 不好的使用场景:
-- - 频繁查询和索引的核心业务字段
-- - 需要强类型和约束的数据
-- - 需要外键关联的数据
-- 混合使用
CREATE TABLE products (
id SERIAL PRIMARY KEY,
name VARCHAR(200) NOT NULL, -- 核心字段用列
price NUMERIC(10,2) NOT NULL, -- 核心字段用列
category_id INTEGER REFERENCES categories(id),
attributes JSONB, -- 动态属性用JSON
metadata JSONB -- 元数据用JSON
);
11. 📝 实战示例
11.1 电商产品搜索
-- 创建产品全文搜索
CREATE TABLE products (
id SERIAL PRIMARY KEY,
name TEXT NOT NULL,
category VARCHAR(100),
specs JSONB
);
-- 插入测试数据
INSERT INTO products (name, category, specs) VALUES
('Dell XPS 15', 'laptop', '{"cpu": "i7-12700H", "ram": "16GB", "storage": "512GB SSD", "screen": "15.6 inch", "price": 1299.99}'),
('MacBook Pro 14', 'laptop', '{"cpu": "M3 Pro", "ram": "18GB", "storage": "512GB SSD", "screen": "14 inch", "price": 1999.99}'),
('ThinkPad X1', 'laptop', '{"cpu": "i7-1260P", "ram": "16GB", "storage": "1TB SSD", "screen": "14 inch", "price": 1499.99}');
-- 多条件搜索
SELECT
name,
category,
specs ->> 'cpu' AS cpu,
specs ->> 'ram' AS ram,
(specs ->> 'price')::NUMERIC AS price
FROM products
WHERE category = 'laptop'
AND (specs ->> 'price')::NUMERIC BETWEEN 1000 AND 2000
AND specs @> '{"ram": "16GB"}'
ORDER BY (specs ->> 'price')::NUMERIC;
-- 创建视图简化查询
CREATE VIEW product_search AS
SELECT
id,
name,
category,
specs ->> 'cpu' AS cpu,
specs ->> 'ram' AS ram,
specs ->> 'storage' AS storage,
(specs ->> 'price')::NUMERIC AS price,
specs
FROM products;
SELECT * FROM product_search
WHERE price < 1500 AND ram = '16GB';
12. 📚 下一步
关键要点:
- 优先使用JSONB而非JSON
- 使用GIN索引优化JSONB查询
@>操作符用于包含查询- 适度使用JSON,核心字段仍用列存储
- jsonb_set/jsonb_insert用于修改JSON
xingliuhua