如果...
INSERT INTO TABLE_NAME
SELECT
STRING_AGG(COLUMN_NAME, ',')
FROM
TABLE_NAME
引入了反模式 (即可能导致性能不佳、结果不正确(请验证这一点)以及 T-SQL 查询的可维护性问题);
这个怎么样:
INSERT INTO TABLE_NAME(COLUMN_NAME)
SELECT
N'{"KEY_VALUE": [' + TBN.STR_AGG + '"]}' JSON_FORMAT_VALUE
FROM
(SELECT
STRING_AGG('"' + COLUMN_NAME, + '", ') STR_AGG
FROM
TABLE_NAME) TBN
这是我的实际测试示例查询:
DECLARE @users TABLE(
id INT IDENTITY(1,1) NOT NULL PRIMARY KEY,
username NVARCHAR(100) NOT NULL,
email NVARCHAR(100) NOT NULL,
status VARCHAR(50)
);
DECLARE @features TABLE(
id INT IDENTITY(1,1) NOT NULL PRIMARY KEY,
name VARCHAR(100) NOT NULL,
description VARCHAR(200) NOT NULL,
is_enabled BIT
);
DECLARE @feature_user TABLE(
user_id INT NOT NULL,
feature_id INT NOT NULL
);
INSERT INTO @users(
username,
email,
status
)
VALUES(
N'john_doe',
N'[email protected]',
'active'
),
(
N'mark_daniels',
N'[email protected]',
'inactive'
),
(
N'alice_jane',
N'[email protected]',
'active'
);
INSERT INTO @features(
name,
description,
is_enabled
)
VALUES(
'notifications',
'Send notifications to users',
'TRUE'
),
(
'csv export',
'Export data to CSV format',
'FALSE'
),
(
'redesign landing page',
'Revamp the landing page layout',
'TRUE'
);
INSERT INTO @feature_user
VALUES(
1,
1
),
(
1,
2
),
(
1,
3
),
(
2,
1
),
(
2,
2
),
(
3,
3
)
-- Produces comma-delimited data structure
SELECT
u.id AS user_id,
u.username,
u.email,
u.status,
STRING_AGG(f.name, ', ') AS feature_names
FROM @users u
LEFT JOIN @feature_user fu ON fu.user_id = u.id
LEFT JOIN @features f ON fu.feature_id = f.id
GROUP BY u.id, u.username, u.email, u.status;
-- Produces denormalized data structure
SELECT
feature_data.user_id,
feature_data.username,
feature_data.email,
feature_data.status,
N'{"feature_data": [' + feature_data.feature_names + '"]}' feature_names
--ADD HERE THE RECORDS THAT YOU DON'T WANT TO BE DUPLICATED ROWS, WHERE YOU NEED TO USE THE SAME APPROACH WITH feature_names
FROM(
SELECT
u.id AS user_id,
u.username,
u.email,
u.status,
STRING_AGG('"' + f.name, + '", ') feature_names
--ADD HERE THE RECORDS THAT YOU DON'T WANT TO BE DUPLICATED ROWS, WHERE YOU NEED TO USE THE SAME APPROACH WITH feature_names
FROM @users u
LEFT JOIN @feature_user fu ON fu.user_id = u.id
LEFT JOIN @features f ON fu.feature_id = f.id
GROUP BY u.id, u.username, u.email, u.status) feature_data;
-- The below query to check if the JSON data is valid
SELECT
ISJSON(N'{"feature_data": [' + feature_data.feature_names + '"]}') feature_names
--ADD HERE THE RECORDS THAT YOU DON'T WANT TO BE DUPLICATED ROWS, WHERE YOU NEED TO USE THE SAME APPROACH WITH feature_names
FROM(
SELECT
STRING_AGG('"' + f.name, + '", ') feature_names
--ADD HERE THE RECORDS THAT YOU DON'T WANT TO BE DUPLICATED ROWS, WHERE YOU NEED TO USE THE SAME APPROACH WITH feature_names
FROM @users u
LEFT JOIN @feature_user fu ON fu.user_id = u.id
LEFT JOIN @features f ON fu.feature_id = f.id
GROUP BY u.id, u.username, u.email, u.status) feature_data;
查询SELECT
输出如下:
感谢想法的所有者:在 SQL 中连接表时如何避免冗余行?
但是,这种情况反映了我的数据库设置。
解释:将JSON data
被解码以供稍后在 SQL Server 数据库中使用compatibility level of 130 or greater
,并且我正在使用compatibility level 140
或SQL Server 2017
。
与给定示例相关的此想法的应用是在连接单个属性(例如,特定列集)内的数据时。给定示例中的哪一个performance efficient
在考虑方面提供了很多帮助constantly growing database
。
除此之外,有人可以提供一组查询以避免反模式吗?
我想深入研究反模式方案,以便真正理解它是如何真正影响查询的。
更新:
DECLARE @compile_table_str_agg TABLE( --denormalizing data through comma-delimited data compilation
user_id INT NOT NULL,
username NVARCHAR(100) NOT NULL,
email NVARCHAR(100) NOT NULL,
status VARCHAR(50),
feature_names VARCHAR(100) NOT NULL
);
DECLARE @compile_table_json TABLE( --denormalizing data through json-structure data compilation
user_id INT NOT NULL,
username NVARCHAR(100) NOT NULL,
email NVARCHAR(100) NOT NULL,
status VARCHAR(50),
feature_names VARCHAR(100) NOT NULL
);
INSERT INTO @compile_table_str_agg
SELECT
u.id AS user_id,
u.username,
u.email,
u.status,
STRING_AGG(f.name, ', ') AS feature_names
FROM @users u
LEFT JOIN @feature_user fu ON fu.user_id = u.id
LEFT JOIN @features f ON fu.feature_id = f.id
GROUP BY u.id, u.username, u.email, u.status;
INSERT INTO @compile_table_json
SELECT
feature_data.user_id,
feature_data.username,
feature_data.email,
feature_data.status,
N'{"feature_data": [' + feature_data.feature_names + '"]}' feature_names
--ADD HERE THE RECORDS THAT YOU DON'T WANT TO BE DUPLICATED ROWS, WHERE YOU NEED TO USE THE SAME APPROACH WITH feature_names
FROM(
SELECT
u.id AS user_id,
u.username,
u.email,
u.status,
STRING_AGG('"' + f.name, + '", ') feature_names
--ADD HERE THE RECORDS THAT YOU DON'T WANT TO BE DUPLICATED ROWS, WHERE YOU NEED TO USE THE SAME APPROACH WITH feature_names
FROM @users u
LEFT JOIN @feature_user fu ON fu.user_id = u.id
LEFT JOIN @features f ON fu.feature_id = f.id
GROUP BY u.id, u.username, u.email, u.status) feature_data;
-- Retrieving the compiled STRING_AGG data
SELECT
user_id,
username,
email,
status,
ca.*
FROM
@compile_table_str_agg str_agg1
CROSS APPLY(
SELECT
com_delimited.*
FROM
STRING_SPLIT(
feature_names, ',') com_delimited
) ca;
-- Retrieving the compiled JSON structure data
SELECT
user_id,
username,
email,
status,
sa.ft_values
FROM
@compile_table_json json1
CROSS APPLY
OPENJSON(json1.feature_names) WITH(
feature_data NVARCHAR(MAX) '$.feature_data' AS JSON
) ca
CROSS APPLY OPENJSON(ca.feature_data) WITH (ft_values NVARCHAR(25) '$') sa;
查询UPDATE, SELECT
输出如下:
结论:两者提供相同的输出,但是即使数据库变得更大,哪一个能提供更高的性能或可维护性效率?
JSON 查询参考:
DECLARE @json NVARCHAR(MAX);
SET @json = '{"info": {"address": [{"town": "Belgrade"}, {"town": "Paris"}, {"town":"Madrid"}]}}';
SET @json = JSON_MODIFY(@json, '$.info.address[0].town', 'Philippines');
SELECT modifiedJson = @json;
DECLARE @json2 NVARCHAR(MAX);
SET @json2 = N'[
{"id": 2, "info": {"name": "John", "surname": "Smith"}, "age": 25},
{"id": 5, "info": {"name": "Jane", "surname": "Smith"}, "dob": "2005-11-04T12:00:00"}
]';
SELECT *
FROM OPENJSON(@json2) WITH (
id INT 'strict $.id',
firstName NVARCHAR(50) '$.info.name',
lastName NVARCHAR(50) '$.info.surname',
age INT,
dateOfBirth DATETIME2 '$.dob'
);
DECLARE @json3 NVARCHAR(MAX);
SET @json3 = N'[
{"id": 3, "info": {"name": "John", "surname": "Smith"}, "age": 25},
{"id": 5, "info": {"name": "Jane", "surname": "Smith", "skills": ["SQL", "C#", "Azure"]}, "dob": "2005-11-04T12:00:00"},
{"id": 1, "info": {"name": "DevQt", "surname": "PH", "skills": ["Dart", "Java", "C#", "VB", "Javascript", "SQL"]}, "age": 26, "dob": "2005-11-04T12:00:00"}
]';
SELECT id,
firstName,
lastName,
age,
dateOfBirth,
skill
FROM OPENJSON(@json3) WITH (
id INT 'strict $.id',
firstName NVARCHAR(50) '$.info.name',
lastName NVARCHAR(50) '$.info.surname',
age INT,
dateOfBirth DATETIME2 '$.dob',
skills NVARCHAR(MAX) '$.info.skills' AS JSON
)
OUTER APPLY OPENJSON(skills) WITH (skill NVARCHAR(15) '$')
ORDER BY id;
DECLARE @jsonVariable NVARCHAR(MAX);
SET @jsonVariable = N'[
{
"Order": {
"Number":"SO43659",
"Date":"2011-05-31T00:00:00"
},
"AccountNumber":"AW29825",
"Item": {
"Price":2024.9940,
"Quantity":1
}
},
{
"Order": {
"Number":"SO43661",
"Date":"2011-06-01T00:00:00"
},
"AccountNumber":"AW73565",
"Item": {
"Price":2024.9940,
"Quantity":3
}
}
]';
-- INSERT INTO <sampleTable>
SELECT SalesOrderJsonData.*
FROM OPENJSON(@jsonVariable, N'$') WITH (
Number VARCHAR(200) N'$.Order.Number',
Date DATETIME N'$.Order.Date',
Customer VARCHAR(200) N'$.AccountNumber',
Quantity INT N'$.Item.Quantity'
) AS SalesOrderJsonData;
查询JSON
输出如下:
附加说明:我已经包含了处理 JSON 的参考,以便其他人了解我对在 SQL Server 中将 JSON 与关系数据库嵌入的看法。
原文参考: SQL Server中的JSON数据
您是否注意到您的查询充斥着
group by
结构?这可能会对性能造成影响,因为您必须处理所有相关记录,然后“删除”重复项。最好从一开始就不要存储重复项。
此外,在我看来,您的数据库必须做大量的工作才能使数据恢复到它应该处于的状态。这确实会对性能造成影响。
我认为您可能没有注意到第一个“更新”查询返回了不正确的值。第 2、3 和 5 行似乎都带有前导空格!(请考虑此“已核实”。)
如果你需要查询这些数据以获取特定功能,那么你可以忘记任何索引。你的数据库将必须进行表扫描。
毕竟,您如何确定给定值是否出现在聚合字段中?
鉴于:
...可以工作但是 ...
...不会!
(您不能使用
like '%notifications%'
,因为其他人可能会在稍后添加包含此词的另一个功能(例如,“电子邮件通知”)!)至于整体性能,我们无法告诉你。
测试你的场景并查看它是如何工作的,但我怀疑它会随着数据的扩大而开始显著变慢。