我正在运行 SQL Server 2019 企业版。每个月,我们都会运行一个存储过程来加载数百万条服务日期可以追溯到五年前的记录。我为 4 个数据文件分配了 400GB 的空间,并为日志文件分配了 100GB 的空间。作业经常失败,因为日志文件因活动事务而被填满。数据库处于简单恢复模式。所以,我相信它应该在每笔交易结束时清除。开发人员更改了工作,使其一次循环并加载一年的记录。
DROP TABLE IF EXISTS #UnpvtDx;
SELECT ClaimHeader_ID
,ClaimDetail_ID
,ClaimServiceLine
,Unpvt.CodeLine
,Unpvt.DxCode
INTO #UnpvtDx
FROM PRINCE.Claim.ClaimDetail det WITH (NOLOCK)
UNPIVOT
(
DxCode FOR CodeLine
IN
(
Diagnosis1CD,Diagnosis2CD,Diagnosis3CD,Diagnosis4CD,Diagnosis5CD,
Diagnosis6CD,Diagnosis7CD,Diagnosis8CD,Diagnosis9CD,
Diagnosis10CD,Diagnosis11CD,Diagnosis12CD,Diagnosis13CD
)
) as Unpvt ---53 secs
WHERE YEAR(ServiceFromDT) = @year;
DROP TABLE IF EXISTS #UnpvtPointer;
SELECT ClaimHeader_ID
,ClaimDetail_ID
,ClaimServiceLine
,Unpvt.CodeLine
,Unpvt.Pointer
INTO #UnpvtPointer
FROM PRINCE.Claim.ClaimDetail det WITH (NOLOCK)
UNPIVOT
(
Pointer FOR CodeLine
IN (DiagPointer1,DiagPointer2,DiagPointer3,DiagPointer4)
) as Unpvt ---40 secs
WHERE YEAR(ServiceFromDT) = @year;
INSERT INTO PROD.Claim.ClaimDiag
(
ClaimHeader_ID,ClaimDetail_ID,SourceID,EDWLoadDTS,PartnerCD,
PartnerNM,ClaimID,ClaimServiceLine,ClaimStatus,CCOMemberID,
MemberID,PlaceOfServiceCD,ServiceFromDT,ServiceToDT,ClaimForm,
TypeOfBillCD,DiagnosisCD,DiagnosisDESC,DiagPointer
)
SELECT DISTINCT
det.ClaimHeader_ID,det.ClaimDetail_ID,det.SourceID,det.EDWLoadDTS,
det.PartnerCD,det.PartnerNM,det.ClaimID,det.ClaimServiceLine,
det.ClaimStatus,det.CCOMemberID,det.MemberID,det.PlaceOfServiceCD,
det.ServiceFromDT,det.ServiceToDT,det.ClaimForm,det.TypeOfBillCD,
DiagnosisCD = dx.DxCode,
DiagnosisDESC = diag.DiagnosisDESC,
DiagPointer = point.Pointer
FROM PROD.Claim.ClaimDetail det WITH (NOLOCK)
INNER JOIN PROD.Claim.ClaimHeader ch WITH (NOLOCK)
ON ch.ClaimHeader_ID = det.ClaimHeader_ID
INNER JOIN #UnpvtDx dx
ON dx.ClaimDetail_ID = det.ClaimDetail_ID
AND dx.ClaimHeader_ID = det.ClaimHeader_ID
AND dx.ClaimServiceLine = det.ClaimServiceLine
LEFT JOIN #UnpvtPointer point
ON point.ClaimDetail_ID = det.ClaimDetail_ID
AND point.ClaimHeader_ID = det.ClaimHeader_ID
AND point.ClaimServiceLine = det.ClaimServiceLine
LEFT OUTER JOIN Reference.Reference.Diagnosis diag WITH (NOLOCK)
ON dx.DxCode = diag.DiagnosisCD
AND diag.ICDVersion = 'ICD10CM'
AND diag.ActiveFLG = 1
WHERE YEAR(det.ServiceFromDT) = @year;
使用以下命令从 SQL 代理作业执行存储过程:
DECLARE @year INT
DECLARE cur CURSOR FOR
SELECT yr = YEAR(hdr.MinServiceFromDT)
FROM PROD.Claim.ClaimHeader hdr WITH (NOLOCK)
GROUP BY YEAR(hdr.MinServiceFromDT)
ORDER BY YEAR(hdr.MinServiceFromDT)
OPEN cur
FETCH NEXT FROM cur INTO @year
WHILE @@FETCH_STATUS = 0
BEGIN
EXEC Claim.sp_UpdateClaimDiag @year
FETCH NEXT FROM cur INTO @year
END
CLOSE cur
DEALLOCATE cur
循环的结束是否被视为一个事务,因此日志文件应该在处理完每一年的记录后清空,或者日志文件是否继续填充,直到作业遍历每个循环并加载所有年份的记录?
我还将日志文件增加到 150GB,但这会最大化可用空间(不会低于 10% 的缓冲区)。
发布整个存储过程代码。
USE [Prod]
GO
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE PROCEDURE [Claim].[sp_UpdateClaimDiag] @year INT
AS
BEGIN
SET ANSI_DEFAULTS, ARITHABORT, NOCOUNT ON
SET IMPLICIT_TRANSACTIONS OFF
SET TRANSACTION ISOLATION LEVEL READ COMMITTED
-- variable declaration
DECLARE
@transactional BIT
, @trancount INT
, @err INT
, @procname SYSNAME
, @error INT
, @message VARCHAR(4000)
, @xstate INT
, @RecordCount int;
SELECT @procname = OBJECT_SCHEMA_NAME(@@PROCID, DB_ID()) + '.' + OBJECT_NAME(@@PROCID, DB_ID())
-- 0 = no: will not execute batches inside a transaction; a partial success of procedure is possible
-- 1 = yes: batches in procedure will be bound together by a transaction, partial success is impossible
, @transactional = 0
-- optionally begin transaction and begin try block
IF @transactional = 1 SET @trancount = @@TRANCOUNT
BEGIN TRY
IF @trancount = 0 and @transactional = 1
BEGIN TRANSACTION
ELSE IF @transactional = 1
SAVE TRANSACTION p1
----------------------------------------------------------------------------------------
---Unpivot columns to Rows into Temp tables
----------------------------------------------------------------------------------------
---ICD9CM & ICD10CM
DROP TABLE IF EXISTS #UnpvtDx;
SELECT
ClaimHeader_ID
,ClaimDetail_ID
,ClaimServiceLine
,Unpvt.CodeLine
,Unpvt.DxCode
INTO #UnpvtDx
FROM Prod.Claim.ClaimDetail det WITH (NOLOCK)
UNPIVOT
(
DxCode FOR CodeLine IN
(
Diagnosis1CD,Diagnosis2CD,Diagnosis3CD,
Diagnosis4CD,Diagnosis5CD,Diagnosis6CD,
Diagnosis7CD,Diagnosis8CD,Diagnosis9CD,
Diagnosis10CD,Diagnosis11CD,Diagnosis12CD,
Diagnosis13CD
)
) as Unpvt ---53 secs
WHERE YEAR(ServiceFromDT) = @year;
--Select top 100 * from #UnpvtDx where DxCode is null
DROP TABLE IF EXISTS #UnpvtPointer;
SELECT
ClaimHeader_ID
,ClaimDetail_ID
,ClaimServiceLine
,Unpvt.CodeLine
,Unpvt.Pointer
INTO #UnpvtPointer
FROM Prod.Claim.ClaimDetail det WITH (NOLOCK)
UNPIVOT
(
Pointer FOR CodeLine IN
(
DiagPointer1, DiagPointer2,
DiagPointer3,DiagPointer4
)
) as Unpvt ---40 secs
WHERE YEAR(ServiceFromDT) = @year;
--Select top 100 * from #UnpvtPointer
----------------------------------------------------------------------------------------
--- INSERT INTO yearly records from the temp table
----------------------------------------------------------------------------------------
INSERT INTO Prod.Claim.ClaimDiag (
ClaimHeader_ID,
ClaimDetail_ID,
SourceID,
EDWLoadDTS,
PartnerCD,
PartnerNM,
ClaimID,
ClaimServiceLine,
ClaimStatus,
CCOMemberID,
MemberID,
PlaceOfServiceCD,
ServceFromDT,
ServiceToDT,
ClaimForm,
TypeOfBillCD,
DiagnosisCD,
DiagnosisDESC,
DiagPointer
)
SELECT DISTINCT
det.ClaimHeader_ID,
det.ClaimDetail_ID,
det.SourceID,
det.EDWLoadDTS,
det.PartnerCD,
det.PartnerNM,
det.ClaimID,
det.ClaimServiceLine,
det.ClaimStatus,
det.CCOMemberID,
det.MemberID,
det.PlaceOfServiceCD,
det.ServiceFromDT,
det.ServiceToDT,
det.ClaimForm,
det.TypeOfBillCD,
DiagnosisCD = dx.DxCode,
DiagnosisDESC = diag.DiagnosisDESC,
DiagPointer = point.Pointer
FROM Prod.Claim.ClaimDetail det WITH (NOLOCK)
INNER JOIN Prod.Claim.ClaimHeader ch WITH (NOLOCK)
ON ch.ClaimHeader_ID = det.ClaimHeader_ID
INNER JOIN #UnpvtDx dx
ON dx.ClaimDetail_ID = det.ClaimDetail_ID
AND dx.ClaimHeader_ID = det.ClaimHeader_ID
AND dx.ClaimServiceLine = det.ClaimServiceLine
LEFT JOIN #UnpvtPointer point
ON point.ClaimDetail_ID = det.ClaimDetail_ID
AND point.ClaimHeader_ID = det.ClaimHeader_ID
AND point.ClaimServiceLine = det.ClaimServiceLine
LEFT OUTER JOIN Reference.Reference.Diagnosis diag WITH (NOLOCK)
ON dx.DxCode = diag.DiagnosisCD
AND diag.ICDVersion = 'ICD10CM'
AND diag.ActiveFLG = 1
WHERE YEAR(det.ServiceFromDT) = @year
--AND Year(det.ServiceFromDT) = 2021--for testing
--and det.ClaimID ='21006E06455'--for testing
----------------------------------------------------------------------------------------
--insert into updatelog table
SET @RecordCount = @@ROWCOUNT;
DECLARE @procName1 SYSNAME
SET @procName1 = @procname + ' ' + CAST(@year AS varchar(4))
INSERT INTO Prod.dbo.UpdateLog(EventTimestamp,EventDescription,ProcName,TableName)
SELECT GETDATE(),
'Inserted ' + CAST(@RecordCount AS varchar(100)) + ' records',
@procName1,
'Claim.ClaimDiag'
----------------------------------------------------------------------------------------
DROP TABLE IF EXISTS #UnpvtDx
DROP TABLE IF EXISTS #UnpvtPointer
----------------------------------------------------------------------------------------
SPEXIT:
IF @transactional = 1 and @trancount = 0 COMMIT
END TRY
----------------------------------------------------------------------------------------
-- error handling with catch
BEGIN CATCH
SELECT @error = ERROR_NUMBER(), @message = ERROR_MESSAGE(), @xstate = XACT_STATE()
IF @transactional = 1 and @xstate = -1 ROLLBACK
IF @transactional = 1 and @xstate = 1 and @trancount = 0 ROLLBACK
IF @transactional = 1 and @xstate = 1 and @trancount > 0 ROLLBACK TRANSACTION p1
DROP TABLE IF EXISTS #claims
SET @procName1 = @procname + ' ' + CAST(@year AS varchar(4)) ---+ ', ' + CAST(@month AS varchar(4))
RAISERROR ('%s, Error %d, %s', 16, 1, @procname1, @error, @message)
RETURN @error
END CATCH
RETURN 0
END
GO
使用
SIMPLE RECOVERY
模型时,事务日志在每个检查点被截断。不一定在您的交易结束时。检查点每 60 秒发生一次。查看代码的以下部分,您实际上根本没有明确地开始事务。你最初
set @transactional = 0
,然后才开始一个事务如果@transactional = 1
。此外,您只是@trancount
根据IF
语句在此处设置一个值。该块仍然开始,因为您在语句之后BEGIN TRY
没有BEGIN/END
块。IF
不过,这不是您的问题,它只是意味着您每次都在进行隐式事务,而不是创建显式事务。我怀疑即使一次处理数年的数据,您生成的事务日志吞吐量也会超出驱动器的支持能力。这张表中一年的数据占用了多少空间?
考虑将批量大小从一年减少到一个月,看看是否有帮助。当您使用它时,请停止使用 YEAR() 函数围绕您的日期列。这使得这些列是非 SARGABLE,这意味着 SQL Server 在检索数据时不能使用任何现有索引。这可能会导致您的事务运行时间超过所需时间,从而导致其他正在进行的事务在此事务运行时保留在日志中。
考虑以下对存储过程的更改。我将其简化了一点,以突出要更改的重要部分。您需要将其与您拥有的内容合并并进行测试。这使您在调用过程时更容易控制批量大小。
另外,我删除了
NOLOCK
提示。在此处阅读更多内容以了解为什么这是一个坏主意。NOLCOK
由于非 SARGABLE where 子句,您可能认为您需要提示。