我正在寻找表分区设计帮助,以便在 UAT 和 PROD 中实现。
当前流程是从 SP 将数据从暂存表插入到主表,并使用业务逻辑更新表并删除超过 90 天的数据。主表是 120 GB 表,包含 1 亿数据,插入、更新和删除需要更多时间,大约 1 小时才能完成该过程。我在这里发布了 --> ETL 数据加载 DML SP 缓慢建议
此过程每天运行 4 次。我正在尝试使用表分区测试并自动化此过程。问题是我每天加载数据 4 次,有时我们还会根据要求运行额外的批处理。我无法使用每日或每月分区,因为我无法在数据存在的情况下切换。我相信我需要使用基于最大当前日期的每次加载分区。有人可以帮忙吗?
除了主键作为聚集索引之外,我们没有其他索引。这是我测试过的测试和示例。
表创建:
--select top 10 * from [C1810429].[STYTOTAL_RAW]
use master
go
alter database DB_Partition set single_user with rollback immediate
drop database DB_Partition
create database DB_Partition
go
use master
go
go
use DB_Partition
go
create schema [C1810429]
go
--*/
use DB_Partition
go
---1
--drop table [C1810429].[STYTOTAL_RAW]
CREATE TABLE [C1810429].[STYTOTAL_RAW](
[STYTOTALID] [int] IDENTITY(1,1) NOT NULL,
[STYLE] [decimal](5, 0) NOT NULL,
[InsertedDateTime] [datetime2](7) NOT NULL,
[UpdatedDateTime] [datetime2](7) NULL,
[IsCurrent] [bit] NULL,
[DELDATTIM] [datetime2](7) NULL,
[DELFLAG] [char](1) NULL,
--/*
CONSTRAINT [PK_C1810429_STYTOTAL_RAW] PRIMARY KEY CLUSTERED
(
[STYTOTALID] ASC,
[STYLE] ASC,
[InsertedDateTime] ASC
)WITH (STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF)
--*/
)
--*/
--drop table [C1810429].[DLSTYTOTAL]
CREATE TABLE [C1810429].[DLSTYTOTAL](
[STYLE] [decimal](5, 0) NULL,
[DELDATTIM] [datetime2](7) NULL,
[DELFLAG] [char](1) NULL
)
插入样本:
use DB_Partition
SET NOCOUNT ON
DECLARE @DateTime DATETIME2(7) = '2024-05-01 00:00:00.000'
WHILE @DateTime <= '2024-10-18 23:59:59.999'
BEGIN
INSERT INTO [C1810429].[STYTOTAL_RAW] ([STYLE],[InsertedDateTime])
SELECT ABS(CHECKSUM(NEWID())%8)+1,@DateTime
SET @DateTime = DATEADD(Minute, 1, @DateTime)
END
GO
下一插入/批次示例:
/*
use DB_Partition
-- populate further test data
SET NOCOUNT ON
DECLARE @DateTime DATETIME2(7) = '20240701'
WHILE @DateTime <= '20241016'
BEGIN
insert into [C1810429].[DLSTYTOTAL] ([STYLE])
SELECT ABS(CHECKSUM(NEWID())%8)+1
SET @DateTime = DATEADD(Minute, 1, @DateTime)
END
GO
*/
INSERT INTO [C1810429].[STYTOTAL_RAW] ([STYLE],[DELDATTIM],[DELFLAG],[InsertedDateTime],[IsCurrent])
SELECT STG.[STYLE],STG.[DELDATTIM],STG.[DELFLAG],DATEADD(MICROSECOND, ROW_NUMBER() OVER (ORDER BY (SELECT NULL)), SYSDATETIME()) as [InsertedDateTime],1 as [IsCurrent]
FROM [C1810429].[DLSTYTOTAL] STG Left Join [C1810429].[STYTOTAL_RAW] TGT
on STG.STYLE = TGT.STYLE AND STG.DELDATTIM = TGT.DELDATTIM
Where TGT.DELDATTIM is null
更新和删除:
--update
UPDATE [C1810429].[STYTOTAL_RAW]
SET ISCURRENT = 0,UpdatedDateTime = SYSDATETIME()
WHERE IsCurrent = 1
--delete
delete FROM [C1810429].[STYTOTAL_RAW]
WHERE InsertedDateTime < DATEADD(DAY, -90, SYSDATETIME()) and IsCurrent = 0
感谢您的帮助。
我已经测试了这个月度分区:
use DB_Partition
go
--drop PARTITION FUNCTION PF_myDateRange
CREATE PARTITION FUNCTION PF_myDateRange ( [datetime2](7))
AS RANGE RIGHT FOR VALUES
(
'20240601',
'20240701',
'20240801',
'20240901',
'20241001',
'20241101'
)
GO
-- not sure I need to time as well in the function
CREATE PARTITION FUNCTION PF_myDateRange ( [datetime2](7))
AS RANGE RIGHT FOR VALUES
(
'2024-06-01 23:59:59.997',
'2024-07-01 23:59:59.997',
'2024-08-01 23:59:59.997',
'2024-09-01 23:59:59.997',
'2024-10-01 23:59:59.997',
'2024-11-01 23:59:59.997'
)
GO
CREATE PARTITION SCHEME PS_myPartitionScheme AS PARTITION PF_myDateRange ALL TO ([PRIMARY]);
-- drop and create index to make existing table into partition data alignment
CREATE UNIQUE CLUSTERED INDEX [PK_C1810429_STYTOTAL_RAW] ON [C1810429].[STYTOTAL_RAW]
(
[STYTOTALID] ASC,
[STYLE] ASC,
[InsertedDateTime] ASC
)
WITH (DROP_EXISTING=ON, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF)
ON PS_myPartitionScheme([InsertedDateTime]);
已编辑:我尝试使用最大时间和+1 秒等进行自动化,但是当我加载 2 和 3 等时,数据在切换时会转到不同的分区边界。
---1
--drop table [C1810429].[STYTOTAL_RAW_Intermediate_Staging]
CREATE TABLE [C1810429].STYTOTAL_RAW_Intermediate_Staging(
[STYTOTALID] [int] IDENTITY(1,1) NOT NULL,
[STYLE] [decimal](5, 0) NOT NULL,
[InsertedDateTime] [datetime2](7) NOT NULL,
[UpdatedDateTime] [datetime2](7) NULL,
[IsCurrent] [bit] NULL,
[DELDATTIM] [datetime2](7) NULL,
[DELFLAG] [char](1) NULL,
--/*
CONSTRAINT [PK_C1810429_STYTOTAL_RAW_Intermediate_Staging] PRIMARY KEY CLUSTERED
(
[STYTOTALID] ASC,
[STYLE] ASC,
[InsertedDateTime] ASC
)WITH (STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF)
) on [Primary]--on PS_myPartitionScheme([InsertedDateTime]);
----
use DB_Partition
--truncate table C1810429.STYTOTAL_RAW_Intermediate_Staging
select * from C1810429.STYTOTAL_RAW_Intermediate_Staging
--00,06,12,18,23:59.59
-- load 1
select max([InsertedDateTime]) from [C1810429].[STYTOTAL_RAW]
insert into C1810429.STYTOTAL_RAW_Intermediate_Staging (STYLE,InsertedDateTime) values (1,'2024-10-19 00:00:00.000')
insert into C1810429.STYTOTAL_RAW_Intermediate_Staging (STYLE,InsertedDateTime) values (1,'2024-10-19 01:00:00.000')
insert into C1810429.STYTOTAL_RAW_Intermediate_Staging (STYLE,InsertedDateTime) values (1,'2024-10-19 11:59:59.999')
-- load 2
select max([InsertedDateTime]) from [C1810429].[STYTOTAL_RAW]
insert into C1810429.STYTOTAL_RAW_Intermediate_Staging (STYLE,InsertedDateTime) values (1,'2024-10-19 12:00:00.000')
insert into C1810429.STYTOTAL_RAW_Intermediate_Staging (STYLE,InsertedDateTime) values (1,'2024-10-19 13:00:00.000')
insert into C1810429.STYTOTAL_RAW_Intermediate_Staging (STYLE,InsertedDateTime) values (1,'2024-10-19 17:59:59.999')
-- load 3
select max([InsertedDateTime]) from [C1810429].[STYTOTAL_RAW]
insert into C1810429.STYTOTAL_RAW_Intermediate_Staging (STYLE,InsertedDateTime) values (1,'2024-10-19 18:00:00.000')
insert into C1810429.STYTOTAL_RAW_Intermediate_Staging (STYLE,InsertedDateTime) values (1,'2024-10-19 19:00:00.000')
insert into C1810429.STYTOTAL_RAW_Intermediate_Staging (STYLE,InsertedDateTime) values (1,'2024-10-19 23:59:59.999')
--create new partitions for new data load use the primary FG
--get the staging table max date and add one minute and create new boundary
select max([InsertedDateTime]) from [C1810429].STYTOTAL_RAW_Intermediate_Staging
Declare @New_PartitionBoundaryDate_Plus datetime2 (7)
select @New_PartitionBoundaryDate_Plus = max (DATEADD(minute,+1,[InsertedDateTime])) from [C1810429].STYTOTAL_RAW_Intermediate_Staging
select @New_PartitionBoundaryDate_Plus as [Plus]
--load 1 max([InsertedDateTime])
7 25920 2024-10-19 00:00:00.000
8 0 NULL
--load 2 max([InsertedDateTime])+ 1 minute
ALTER PARTITION SCHEME PS_myPartitionScheme NEXT USED [Primary];
ALTER PARTITION FUNCTION PF_myDateRange () SPLIT RANGE('2024-10-19 11:59:59.9970000');
---------
--select * from [C1810429].[STYTOTAL_RAW_Intermediate_Staging]
select min([InsertedDateTime]) ,max([InsertedDateTime]) from [C1810429].[STYTOTAL_RAW_Intermediate_Staging]
-- pass the min and max value of staging table
--load 1 >= min([InsertedDateTime]) <=max([InsertedDateTime])
--load 2 >= min([InsertedDateTime]) <=max([InsertedDateTime])
8 3 2024-10-19 12:00:59.997 -- Here the data is failling under
9 0 2024-10-19 18:00:59.997
10 0 NULL
ALTER TABLE [C1810429].STYTOTAL_RAW_Intermediate_Staging
WITH CHECK ADD CONSTRAINT ck_Min_InsertedDateTime
CHECK ([InsertedDateTime] IS NOT NULL AND [InsertedDateTime] >= '2024-10-19 00:00:00.0000000' )
ALTER TABLE [C1810429].STYTOTAL_RAW_Intermediate_Staging
WITH CHECK ADD CONSTRAINT ck_Max_InsertedDateTime
CHECK ([InsertedDateTime] IS NOT NULL AND [InsertedDateTime] <= '2024-10-19 11:59:59.9970000' )
-- get the data for bounday and enter the partition no or date
-- Create partition for each load by getting max datetime of [InsertedDateTime] in table [C1810429].[STYTOTAL_RAW]
--2024-10-19 12:00:00.001
Declare @New_PartitionBoundaryDate_SWITCH datetime2 (7)
select @New_PartitionBoundaryDate_SWITCH = max (DATEADD(minute,0,[InsertedDateTime])) from [C1810429].STYTOTAL_RAW_Intermediate_Staging
select @New_PartitionBoundaryDate_SWITCH as [SWITCH] --Get the swith parttion
--load 1 is not working since the last data is in that boundary value
ALTER TABLE [C1810429].STYTOTAL_RAW_Intermediate_Staging SWITCH TO [C1810429].[STYTOTAL_RAW] PARTITION $PARTITION.PF_myDateRange('2024-10-19 11:59:59.997');
--load 1 taken last not null boundary partition no
--load 1 taken last not null boundary partition no
ALTER TABLE [C1810429].STYTOTAL_RAW_Intermediate_Staging SWITCH TO [C1810429].[STYTOTAL_RAW] PARTITION 8 -- 2024-10-19 12:00:00.001
declare @ck_Min_InsertedDateTime_drop varchar (max)
set @ck_Min_InsertedDateTime_drop = 'ALTER TABLE [C1810429].STYTOTAL_RAW_Intermediate_Staging
drop CONSTRAINT ck_Min_InsertedDateTime;'
--select @ck_Min_InsertedDateTime_drop
Exec (@ck_Min_InsertedDateTime_drop)
declare @ck_Max_InsertedDateTime_drop varchar (max)
set @ck_Max_InsertedDateTime_drop = 'ALTER TABLE [C1810429].STYTOTAL_RAW_Intermediate_Staging
drop CONSTRAINT ck_Max_InsertedDateTime;'
--select @@ck_Max_InsertedDateTime_drop
Exec (@ck_Max_InsertedDateTime_drop)
有以下几点可能会导致你失败:
我重新整理了你的代码:
这是代码,测试一下