我试图回答以下 stackoverflow 问题:
在发布了一个有点幼稚的答案后,我想我会把钱放在嘴边,并实际测试我建议的场景,以确保我不会让 OP 去追逐野鹅。好吧,事实证明它比我想象的要困难得多(我敢肯定,这对任何人来说都不足为奇)。
以下是我尝试和思考的内容:
首先,我在派生表中尝试了带有 ORDER BY 的 TOP 1 UPDATE,使用
ROWLOCK, READPAST
. 这会产生死锁并且还会乱序处理项目。它必须尽可能接近 FIFO,除非出现需要多次尝试处理同一行的错误。然后,我尝试使用、 、 和的各种组合将所需的下一个 QueueID 选择到一个变量中
READPAST
,并专门保留该行以供该会话更新。我尝试的所有变体都遇到了与以前相同的问题,并且对于某些与 的组合,抱怨:UPDLOCK
HOLDLOCK
ROWLOCK
READPAST
您只能在 READ COMMITTED 或 REPEATABLE READ 隔离级别中指定 READPAST 锁。
这很令人困惑,因为它是READ COMMITTED 的。我以前遇到过这种情况,这很令人沮丧。
自从我开始写这个问题以来,Remus Rusani 就发布了一个新的答案。我阅读了他的链接文章,发现他正在使用破坏性读取,因为他在回答中说“在网络调用期间保持锁定实际上是不可能的”。在阅读了他关于热点和需要锁定以进行任何更新或删除的页面的文章后,我担心即使我能够找出正确的锁来完成我正在寻找的事情,它也无法扩展并且可能无法处理大量并发。
现在我不知道该去哪里。在处理行的同时保持锁是真的无法实现(即使它不支持高 tps 或海量并发)?我错过了什么?
希望比我聪明的人和比我更有经验的人能提供帮助,下面是我正在使用的测试脚本。它已切换回 TOP 1 UPDATE 方法,但我将另一种方法留在了注释中,以防您也想探索它。
将这些中的每一个粘贴到一个单独的会话中,运行会话 1,然后快速运行所有其他会话。大约 50 秒后,测试将结束。查看来自每个会话的消息以了解它做了什么工作(或它是如何失败的)。第一个会话将显示一个行集,其中每秒拍摄一次快照,详细说明存在的锁和正在处理的队列项。它有时会起作用,而其他时候则根本不起作用。
第 1 节
/* Session 1: Setup and control - Run this session first, then immediately run all other sessions */
IF Object_ID('dbo.Queue', 'U') IS NULL
CREATE TABLE dbo.Queue (
QueueID int identity(1,1) NOT NULL,
StatusID int NOT NULL,
QueuedDate datetime CONSTRAINT DF_Queue_QueuedDate DEFAULT (GetDate()),
CONSTRAINT PK_Queue PRIMARY KEY CLUSTERED (QueuedDate, QueueID)
);
IF Object_ID('dbo.QueueHistory', 'U') IS NULL
CREATE TABLE dbo.QueueHistory (
HistoryDate datetime NOT NULL,
QueueID int NOT NULL
);
IF Object_ID('dbo.LockHistory', 'U') IS NULL
CREATE TABLE dbo.LockHistory (
HistoryDate datetime NOT NULL,
ResourceType varchar(100),
RequestMode varchar(100),
RequestStatus varchar(100),
ResourceDescription varchar(200),
ResourceAssociatedEntityID varchar(200)
);
IF Object_ID('dbo.StartTime', 'U') IS NULL
CREATE TABLE dbo.StartTime (
StartTime datetime NOT NULL
);
SET NOCOUNT ON;
IF (SELECT Count(*) FROM dbo.Queue) < 10000 BEGIN
TRUNCATE TABLE dbo.Queue;
WITH A (N) AS (SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1),
B (N) AS (SELECT 1 FROM A Z, A I, A P),
C (N) AS (SELECT Row_Number() OVER (ORDER BY (SELECT 1)) FROM B O, B W)
INSERT dbo.Queue (StatusID, QueuedDate)
SELECT 1, DateAdd(millisecond, C.N * 3, GetDate() - '00:05:00')
FROM C
WHERE C.N <= 10000;
END;
TRUNCATE TABLE dbo.StartTime;
INSERT dbo.StartTime SELECT GetDate() + '00:00:15'; -- or however long it takes you to go run the other sessions
GO
TRUNCATE TABLE dbo.QueueHistory;
SET NOCOUNT ON;
DECLARE
@Time varchar(8),
@Now datetime;
SELECT @Time = Convert(varchar(8), StartTime, 114)
FROM dbo.StartTime;
WAITFOR TIME @Time;
DECLARE @i int,
@QueueID int;
SET @i = 1;
WHILE @i <= 33 BEGIN
SET @Now = GetDate();
INSERT dbo.QueueHistory
SELECT
@Now,
QueueID
FROM
dbo.Queue Q WITH (NOLOCK)
WHERE
Q.StatusID <> 1;
INSERT dbo.LockHistory
SELECT
@Now,
L.resource_type,
L.request_mode,
L.request_status,
L.resource_description,
L.resource_associated_entity_id
FROM
sys.dm_tran_current_transaction T
INNER JOIN sys.dm_tran_locks L
ON L.request_owner_id = T.transaction_id;
WAITFOR DELAY '00:00:01';
SET @i = @i + 1;
END;
WITH Cols AS (
SELECT *, Row_Number() OVER (PARTITION BY HistoryDate ORDER BY QueueID) Col
FROM dbo.QueueHistory
), P AS (
SELECT *
FROM
Cols
PIVOT (Max(QueueID) FOR Col IN ([1], [2], [3], [4], [5], [6], [7], [8])) P
)
SELECT L.*, P.[1], P.[2], P.[3], P.[4], P.[5], P.[6], P.[7], P.[8]
FROM
dbo.LockHistory L
FULL JOIN P
ON L.HistoryDate = P.HistoryDate
/* Clean up afterward
DROP TABLE dbo.StartTime;
DROP TABLE dbo.LockHistory;
DROP TABLE dbo.QueueHistory;
DROP TABLE dbo.Queue;
*/
第 2 节
/* Session 2: Simulate an application instance holding a row locked for a long period, and eventually abandoning it. */
SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
SET NOCOUNT ON;
SET XACT_ABORT ON;
DECLARE
@QueueID int,
@Time varchar(8);
SELECT @Time = Convert(varchar(8), StartTime + '0:00:01', 114)
FROM dbo.StartTime;
WAITFOR TIME @Time;
BEGIN TRAN;
--SET @QueueID = (
-- SELECT TOP 1 QueueID
-- FROM dbo.Queue WITH (READPAST, UPDLOCK)
-- WHERE StatusID = 1 -- ready
-- ORDER BY QueuedDate, QueueID
--);
--UPDATE dbo.Queue
--SET StatusID = 2 -- in process
----OUTPUT Inserted.*
--WHERE QueueID = @QueueID;
SET @QueueID = NULL;
UPDATE Q
SET Q.StatusID = 1, @QueueID = Q.QueueID
FROM (
SELECT TOP 1 *
FROM dbo.Queue WITH (ROWLOCK, READPAST)
WHERE StatusID = 1
ORDER BY QueuedDate, QueueID
) Q
PRINT @QueueID;
WAITFOR DELAY '00:00:20'; -- Release it partway through the test
ROLLBACK TRAN; -- Simulate client disconnecting
第三节
/* Session 3: Run a near-continuous series of "failed" queue processing. */
SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
SET XACT_ABORT ON;
SET NOCOUNT ON;
DECLARE
@QueueID int,
@EndDate datetime,
@NextDate datetime,
@Time varchar(8);
SELECT
@EndDate = StartTime + '0:00:33',
@Time = Convert(varchar(8), StartTime, 114)
FROM dbo.StartTime;
WAITFOR TIME @Time;
WHILE GetDate() < @EndDate BEGIN
BEGIN TRAN;
--SET @QueueID = (
-- SELECT TOP 1 QueueID
-- FROM dbo.Queue WITH (READPAST, UPDLOCK)
-- WHERE StatusID = 1 -- ready
-- ORDER BY QueuedDate, QueueID
--);
--UPDATE dbo.Queue
--SET StatusID = 2 -- in process
----OUTPUT Inserted.*
--WHERE QueueID = @QueueID;
SET @QueueID = NULL;
UPDATE Q
SET Q.StatusID = 1, @QueueID = Q.QueueID
FROM (
SELECT TOP 1 *
FROM dbo.Queue WITH (ROWLOCK, READPAST)
WHERE StatusID = 1
ORDER BY QueuedDate, QueueID
) Q
PRINT @QueueID;
SET @NextDate = GetDate() + '00:00:00.015';
WHILE GetDate() < @NextDate SET NOCOUNT ON;
ROLLBACK TRAN;
END
会话 4 及以上 - 任意数量
/* Session 4: "Process" the queue normally, one every second for 30 seconds. */
SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
SET XACT_ABORT ON;
SET NOCOUNT ON;
DECLARE @Time varchar(8);
SELECT @Time = Convert(varchar(8), StartTime, 114)
FROM dbo.StartTime;
WAITFOR TIME @Time;
DECLARE @i int,
@QueueID int;
SET @i = 1;
WHILE @i <= 30 BEGIN
BEGIN TRAN;
--SET @QueueID = (
-- SELECT TOP 1 QueueID
-- FROM dbo.Queue WITH (READPAST, UPDLOCK)
-- WHERE StatusID = 1 -- ready
-- ORDER BY QueuedDate, QueueID
--);
--UPDATE dbo.Queue
--SET StatusID = 2 -- in process
--WHERE QueueID = @QueueID;
SET @QueueID = NULL;
UPDATE Q
SET Q.StatusID = 1, @QueueID = Q.QueueID
FROM (
SELECT TOP 1 *
FROM dbo.Queue WITH (ROWLOCK, READPAST)
WHERE StatusID = 1
ORDER BY QueuedDate, QueueID
) Q
PRINT @QueueID;
WAITFOR DELAY '00:00:01'
SET @i = @i + 1;
DELETE dbo.Queue
WHERE QueueID = @QueueID;
COMMIT TRAN;
END
您需要3个锁定提示
我之前回答过这个问题:https ://stackoverflow.com/questions/939831/sql-server-process-queue-race-condition/940001#940001
正如 Remus 所说,使用服务代理更好,但这些提示确实有效
您关于隔离级别的错误通常意味着涉及复制或 NOLOCK。
SQL Server 非常适合存储关系数据。至于作业队列,它不是那么好。请参阅为 MySQL 编写的这篇文章,但它也可以在这里应用。https://blog.engineyard.com/2011/5-subtle-ways-youre-using-mysql-as-a-queue-and-why-itll-bite-you