SQL Server - 使用聚集索引时如何存储数据页

Question

VansFannel

Asked: 2018-03-09 05:45:26 +0800 CST2018-03-09 05:45:26 +0800 CST 2018-03-09 05:45:26 +0800 CST

使用 OPENJSON 生成嵌套数组非常慢

772

我刚开始使用OPENJSONSQL Server 2016 SP1。

我有这样一句话：

select c.Serial as Parent,
    (Select co.Serial, agc.Position
      from AggregationChildren agc, Aggregation ag, Code co
      where agc.AggregationId = a.AggregationId 
      and co.CodeId = agc.AggregationChildrenId for json path) as children
    from Aggregation a, Code c
    where c.CodeId = a.AggregationId for json path

要生成此 JSON：

{"Parent":"4244287599479491","Children":
[{"Serial":"8915753733724633","Position":"1"},
{"Serial":"1247782815710855","Position":"2"},
...]}

但它非常非常慢。

我的问题是Children数组，因为我不知道如何获取它。

有没有办法做得更快？

这些是表格：

CREATE TABLE [dbo].[Code] (
    [CodeId]            INT            IDENTITY (1, 1) NOT NULL,
    [Serial]            NVARCHAR (20)  NOT NULL,
    [ ... ],
    CONSTRAINT [PK_CODE] PRIMARY KEY CLUSTERED ([CodeId] ASC),
    [ ... ]
)

CREATE TABLE [dbo].[Aggregation] (
    [AggregationId] INT           NOT NULL,
    [ ... ], 
    CONSTRAINT [PK_AGGREGATIONS] PRIMARY KEY CLUSTERED ([AggregationId] ASC),
    CONSTRAINT [FK_Aggregation_Code]
           FOREIGN KEY ([AggregationId])
            REFERENCES [dbo].[Code] ([CodeId])
)

CREATE TABLE [dbo].[AggregationChildren] (
    [AggregationChildrenId] INT NOT NULL,
    [AggregationId]         INT NOT NULL,
    [Position]              INT NOT NULL,
    CONSTRAINT [PK_AGGREGATION_CHILDS] PRIMARY KEY CLUSTERED ([AggregationChildrenId] ASC),
    CONSTRAINT [FK_AggregationChildren_Code]
           FOREIGN KEY ([AggregationChildrenId])
            REFERENCES [dbo].[Code] ([CodeId]),
    CONSTRAINT [FK_AggregationChildren_Aggregation]
           FOREIGN KEY ([AggregationId])
            REFERENCES [dbo].[Aggregation] ([AggregationId]) ON DELETE CASCADE
)

该Serial列是一个，nvarchar(20)因为值可以是字母数字的任意组合，即使我的示例只显示数字。

1 个回答

Voted

Hannah Vernon · Answer 1 · 2018-03-09T08:24:38+08:00

我很难解析您的查询，但我相信这会返回相同的结果，而且速度要快得多：

SELECT Parent = c.Serial
    , Children = (
        SELECT c.Serial 
            , cac.Position
        FROM dbo.Code cc
            INNER JOIN dbo.AggregationChildren cac ON cac.AggregationChildrenId = cc.CodeId
        WHERE cac.AggregationId = a.AggregationId
        FOR JSON PATH 
    )
FROM dbo.Code c
    INNER JOIN dbo.Aggregation a ON c.CodeId = a.AggregationId
FOR JSON PATH;

上述查询的计划如下所示：

您的查询计划如下所示：

如果我们添加以下索引，我们可以使第一个变体更快：

CREATE NONCLUSTERED INDEX IX_AggregationChildren_IX0
ON dbo.AggregationChildren (AggregationId)
INCLUDE (AggregationChildrenId,Position);

但是，显然，您需要根据您的工作量对此进行评估。

我创建了一个最低限度可行的完整示例设置以用于测试：

USE tempdb;

IF OBJECT_ID(N'dbo.AggregationChildren', N'U') IS NOT NULL 
DROP TABLE dbo.AggregationChildren;
IF OBJECT_ID(N'dbo.Aggregation', N'U') IS NOT NULL 
DROP TABLE dbo.Aggregation;
IF OBJECT_ID(N'dbo.Code', N'U') IS NOT NULL 
DROP TABLE dbo.Code;
GO

CREATE TABLE dbo.Code (
    CodeId int NOT NULL
        CONSTRAINT PK_CODE 
        PRIMARY KEY 
        CLUSTERED
    , Serial nvarchar(20) NOT NULL
);


CREATE TABLE dbo.Aggregation (
    AggregationId int NOT NULL
        CONSTRAINT PK_AGGREGATIONS 
        PRIMARY KEY 
        CLUSTERED
        CONSTRAINT FK_Aggregation_Code
        FOREIGN KEY (AggregationId)
        REFERENCES dbo.Code (CodeId)
)

CREATE TABLE dbo.AggregationChildren (
    AggregationChildrenId int NOT NULL
        CONSTRAINT PK_AGGREGATION_CHILDS 
        PRIMARY KEY 
        CLUSTERED
        CONSTRAINT FK_AggregationChildren_Code
        FOREIGN KEY (AggregationChildrenId)
        REFERENCES dbo.Code (CodeId)
    , AggregationId int NOT NULL
        CONSTRAINT FK_AggregationChildren_Aggregation
        FOREIGN KEY (AggregationId)
        REFERENCES dbo.Aggregation (AggregationId) 
        ON DELETE CASCADE
    , Position int NOT NULL
)

我重新格式化了约束条款，使其更适合我的大脑；本质上，上面的代码与您问题中的 DDL 相同。

这会用足够的数据填充三个表以进行有意义的比较：

;WITH src AS 
(
    SELECT n.Val
    FROM (VALUES (0), (1), (2), (3), (4), (5), (6), (7), (8), (9)) n(Val)
)
INSERT INTO dbo.Code (CodeId, Serial)
SELECT s1.Val 
        + (s2.Val * 10)
        + (s3.Val * 100)
        + (s4.Val * 1000)
        + (s5.Val * 10000)
    , CONVERT(bigint, CRYPT_GEN_RANDOM(8))
FROM src s1
    CROSS JOIN src s2
    CROSS JOIN src s3
    CROSS JOIN src s4
    CROSS JOIN src s5


;WITH src AS 
(
    SELECT n.Val
    FROM (VALUES (0), (1), (2), (3), (4), (5), (6), (7), (8), (9)) n(Val)
)
INSERT INTO dbo.Aggregation (AggregationId)
SELECT s1.Val 
    + (s2.Val * 10)
    + (s3.Val * 100)
FROM src s1
    CROSS JOIN src s2
    CROSS JOIN src s3;



;WITH src AS 
(
    SELECT n.Val
    FROM (VALUES (0), (1), (2), (3), (4), (5), (6), (7), (8), (9)) n(Val)
)
INSERT INTO dbo.AggregationChildren (AggregationChildrenId, AggregationId, Position)
SELECT s1.Val 
        + (s2.Val * 10)
        + (s3.Val * 100)
        + (s4.Val * 1000)
        + (s5.Val * 10000)
    , s1.Val 
        + (s2.Val * 10)
        + (s3.Val * 100)
    , s1.Val 
FROM src s1
    CROSS JOIN src s2
    CROSS JOIN src s3
    CROSS JOIN src s4
    CROSS JOIN src s5;

这些是每个表的行数：

╔════════╦═════════════╦══════════════════════
║ 代码 ║ 聚合 ║ AggregationChildren ║
╠════════╬═════════════╬═════════════════════════╬══
║ 100000 ║ 1000 ║ 100000 ║
╚════════╩═════════════╩═════════════════════════╩══

我的查询版本：

SELECT Parent = c.Serial
    , Children = (
        SELECT c.Serial 
            , cac.Position
        FROM dbo.Code cc
            INNER JOIN dbo.AggregationChildren cac ON cac.AggregationChildrenId = cc.CodeId
        WHERE cac.AggregationId = a.AggregationId
        FOR JSON PATH 
    )
FROM dbo.Code c
    INNER JOIN dbo.Aggregation a ON c.CodeId = a.AggregationId
FOR JSON PATH;

为了比较两个查询的输出，我创建了两个用户定义的函数，如下所示：

CREATE FUNCTION dbo.fn_json_test_1()
RETURNS nvarchar(max)
AS
BEGIN
    RETURN (
        SELECT Parent = c.Serial
            , Children = (
                SELECT c.Serial 
                    , cac.Position
                FROM dbo.Code cc
                    INNER JOIN dbo.AggregationChildren cac ON cac.AggregationChildrenId = cc.CodeId
                WHERE cac.AggregationId = a.AggregationId
                FOR JSON PATH 
            )
        FROM dbo.Code c
            INNER JOIN dbo.Aggregation a ON c.CodeId = a.AggregationId
        FOR JSON PATH
    );
END;
GO


GO
CREATE FUNCTION dbo.fn_json_test_2()
RETURNS nvarchar(max)
AS
BEGIN
    RETURN (
        SELECT c.Serial as Parent,
            (Select co.Serial, agc.Position
              from AggregationChildren agc, Aggregation ag, Code co
              where agc.AggregationId = a.AggregationId 
              and co.CodeId = agc.AggregationChildrenId for json path) as children
        from Aggregation a, Code c
        where c.CodeId = a.AggregationId for json path
    );
END;
GO

现在，我可以通过以下方式比较两个查询的输出：

DECLARE @res1 nvarchar(max) = dbo.fn_json_test_1();
DECLARE @res2 nvarchar(max) = dbo.fn_json_test_2();

SELECT CASE WHEN @res1 <> @res2 THEN 'mismatch' ELSE 'match' END;

结果是：

结果不匹配。我查询的输出包含的子节点少于您的查询。我要回到绘图板，并将简化测试平台以查看差异所在。

简化的测试床由表中的 10 行、（父）表Code中的 2 行和（子）表中的 8 行组成：AggregationAggregationChildren

;WITH src AS 
(
    SELECT n.Val
    FROM (VALUES (0), (1), (2), (3), (4), (5), (6), (7), (8), (9)) n(Val)
)
INSERT INTO dbo.Code (CodeId, Serial)
SELECT s1.Val 
    , CONVERT(bigint, CRYPT_GEN_RANDOM(8))
FROM src s1


;WITH src AS 
(
    SELECT n.Val
    FROM (VALUES (0), (1)) n(Val)
)
INSERT INTO dbo.Aggregation (AggregationId)
SELECT s1.Val 
FROM src s1;



;WITH src AS 
(
    SELECT n.Val
    FROM (VALUES (0), (1), (2), (3), (4), (5), (6), (7)) n(Val)
)
INSERT INTO dbo.AggregationChildren (AggregationChildrenId, AggregationId, Position)
SELECT s1.Val + 2
    , s1.Val % 2
    , s1.Val 
FROM src s1;

行数：

SELECT Code = (SELECT COUNT(1) FROM dbo.Code)
    , Aggregation = (SELECT COUNT(1) FROM dbo.Aggregation)
    , AggregationChildren = (SELECT COUNT(1) FROM dbo.AggregationChildren)

╔══════╦═════════════╦══════════════════════
║ 代码 ║ 聚合 ║ AggregationChildren ║
╠══════╬═════║
║ 10 ║ 2 ║ 8 ║
╚══════╩═════════════╩══════════════════════

预测的模式应该是两个父 json 数组，每个都有 4 个子数组。

我的结果：

[
  {
    “家长”：“-5601362097731340301”，
    “孩子们”： [
      {
        “序列号”：“-5601362097731340301”，
        “职位”：0
      },
      {
        “序列号”：“-5601362097731340301”，
        “位置”：2
      },
      {
        “序列号”：“-5601362097731340301”，
        “位置”：4
      },
      {
        “序列号”：“-5601362097731340301”，
        “位置”：6
      }
    ]
  },
  {
    “家长”：“-8896860091721838065”，
    “孩子们”： [
      {
        “序列号”：“-8896860091721838065”，
        “位置”：1
      },
      {
        “序列号”：“-8896860091721838065”，
        “位置”：3
      },
      {
        “序列号”：“-8896860091721838065”，
        “位置”：5
      },
      {
        “序列号”：“-8896860091721838065”，
        “位置”：7
      }
    ]
  }

您的查询：

[
  {
    “家长”：“-5601362097731340301”，
    “孩子们”： [
      {
        “序列号”：“5802227619253639548”，
        “职位”：0
      },
      {
        “序列号”：“5802227619253639548”，
        “职位”：0
      },
      {
        “序列号”：“4504664379821512162”，
        “位置”：2
      },
      {
        “序列号”：“4504664379821512162”，
        “位置”：2
      },
      {
        “序列号”：“6561435639659176802”，
        “位置”：4
      },
      {
        “序列号”：“6561435639659176802”，
        “位置”：4
      },
      {
        “系列”：“-7417083263182709739”，
        “位置”：6
      },
      {
        “系列”：“-7417083263182709739”，
        “位置”：6
      }
    ]
  },
  {
    “家长”：“-8896860091721838065”，
    “孩子们”： [
      {
        “系列”：“-7646118996434234523”，
        “位置”：1
      },
      {
        “系列”：“-7646118996434234523”，
        “位置”：1
      },
      {
        “序列号”：“-6372739442099935942”，
        “位置”：3
      },
      {
        “序列号”：“-6372739442099935942”，
        “位置”：3
      },
      {
        “系列”：“-882384147532911428”，
        “位置”：5
      },
      {
        “系列”：“-882384147532911428”，
        “位置”：5
      },
      {
        “序列号”：“4293317573306886053”，
        “位置”：7
      },
      {
        “序列号”：“4293317573306886053”，
        “位置”：7
      }
    ]
  }
]

您的查询有太多孩子；我的查询返回了预测的孩子数量，它返回了正确的Position值，但是返回了不正确的Serial值。

我查询中的“错误”出现在内部查询中。不正确的查询是：

SELECT c.Serial 
    , cac.Position
FROM dbo.Code cc
    INNER JOIN dbo.AggregationChildren cac ON cac.AggregationChildrenId = cc.CodeId
WHERE cac.AggregationId = a.AggregationId
ORDER BY c.Serial
FOR JSON PATH

正确的版本是：

SELECT cc.Serial --changed "c." to "cc."
    , cac.Position
FROM dbo.Code cc
    INNER JOIN dbo.AggregationChildren cac ON cac.AggregationChildrenId = cc.CodeId
WHERE cac.AggregationId = a.AggregationId
ORDER BY cc.CodeId --not a big deal, but different order for children in output
FOR JSON PATH

更正后的查询现在看起来像：

SELECT  Parent = c.Serial
    , Children = (
        SELECT cc.Serial 
            , cac.Position
        FROM dbo.Code cc
            INNER JOIN dbo.AggregationChildren cac ON cac.AggregationChildrenId = cc.CodeId
        WHERE cac.AggregationId = a.AggregationId
        ORDER BY cc.CodeId
        FOR JSON PATH 
    )
FROM dbo.Code c
    INNER JOIN dbo.Aggregation a ON c.CodeId = a.AggregationId
ORDER BY c.Serial
FOR JSON PATH;

并返回以下结果：

[
  {
    “家长”：“-195930341251513493”，
    "Children": [
      {
        "Serial": "-6126601633786720400",
        "Position": 1
      },
      {
        "Serial": "5216562173012877678",
        "Position": 3
      },
      {
        "Serial": "-1992909345438478098",
        "Position": 5
      },
      {
        "Serial": "8329388691987940194",
        "Position": 7
      }
    ]
  },
  {
    "Parent": "8774608126018975726",
    "Children": [
      {
        "Serial": "-3380643917643646211",
        "Position": 0
      },
      {
        "Serial": "-2042609074595538493",
        "Position": 2
      },
      {
        "Serial": "7345460002653774160",
        "Position": 4
      },
      {
        "Serial": "-2126530822210070443",
        “位置”：6
      }
    ]
  }
]

使用 OPENJSON 生成嵌套数组非常慢

连接到 PostgreSQL 服务器：致命：主机没有 pg_hba.conf 条目

如何让sqlplus的输出出现在一行中？

选择具有最大日期或最晚日期的日期

如何列出 PostgreSQL 中的所有模式？

列出指定表的所有列

如何在不修改我自己的 tnsnames.ora 的情况下使用 sqlplus 连接到位于另一台主机上的 Oracle 数据库

你如何mysqldump特定的表？

使用 psql 列出数据库权限

如何从 PostgreSQL 中的选择查询中将值插入表中？

如何使用 psql 列出所有数据库和表？

使用 OPENJSON 生成嵌套数组非常慢

1 个回答

相关问题