Estou procurando ajuda para projeto de particionamento de tabelas para implementar no UAT e depois no PROD.
O processo atual é do SP inserindo dados da tabela de preparação para a tabela principal e atualizando a tabela com lógica de negócios e excluindo dados com mais de 90 dias. A tabela principal é uma tabela de 120 GB com 100 milhões de dados, a inserção, atualização e exclusão leva mais tempo, cerca de 1 hora para concluir o processo. Aqui eu postei --> Sugestão de lentidão do DML SP para carregamento de dados ETL
Este processo está sendo executado diariamente 4 vezes. Estou tentando testar e automatizar este processo com particionamento de tabela. O problema é que os dados são carregados diariamente 4 vezes e às vezes também executamos lotes extras com base na necessidade. Não consegui usar o particionamento diário ou mensal, pois não consigo alternar com os dados existentes. Acredito que preciso usar o particionamento por carga com base na data máxima atual. Alguém pode ajudar?
Não temos nenhum índice diferente da chave primária como índice clusterizado. Aqui está o teste e a amostra que testei.
Criação de tabela:
--select top 10 * from [C1810429].[STYTOTAL_RAW]
use master
go
alter database DB_Partition set single_user with rollback immediate
drop database DB_Partition
create database DB_Partition
go
use master
go
go
use DB_Partition
go
create schema [C1810429]
go
--*/
use DB_Partition
go
---1
--drop table [C1810429].[STYTOTAL_RAW]
CREATE TABLE [C1810429].[STYTOTAL_RAW](
[STYTOTALID] [int] IDENTITY(1,1) NOT NULL,
[STYLE] [decimal](5, 0) NOT NULL,
[InsertedDateTime] [datetime2](7) NOT NULL,
[UpdatedDateTime] [datetime2](7) NULL,
[IsCurrent] [bit] NULL,
[DELDATTIM] [datetime2](7) NULL,
[DELFLAG] [char](1) NULL,
--/*
CONSTRAINT [PK_C1810429_STYTOTAL_RAW] PRIMARY KEY CLUSTERED
(
[STYTOTALID] ASC,
[STYLE] ASC,
[InsertedDateTime] ASC
)WITH (STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF)
--*/
)
--*/
--drop table [C1810429].[DLSTYTOTAL]
CREATE TABLE [C1810429].[DLSTYTOTAL](
[STYLE] [decimal](5, 0) NULL,
[DELDATTIM] [datetime2](7) NULL,
[DELFLAG] [char](1) NULL
)
Inserir amostra:
use DB_Partition
SET NOCOUNT ON
DECLARE @DateTime DATETIME2(7) = '2024-05-01 00:00:00.000'
WHILE @DateTime <= '2024-10-18 23:59:59.999'
BEGIN
INSERT INTO [C1810429].[STYTOTAL_RAW] ([STYLE],[InsertedDateTime])
SELECT ABS(CHECKSUM(NEWID())%8)+1,@DateTime
SET @DateTime = DATEADD(Minute, 1, @DateTime)
END
GO
Próxima Inserção / Amostra de lote:
/*
use DB_Partition
-- populate further test data
SET NOCOUNT ON
DECLARE @DateTime DATETIME2(7) = '20240701'
WHILE @DateTime <= '20241016'
BEGIN
insert into [C1810429].[DLSTYTOTAL] ([STYLE])
SELECT ABS(CHECKSUM(NEWID())%8)+1
SET @DateTime = DATEADD(Minute, 1, @DateTime)
END
GO
*/
INSERT INTO [C1810429].[STYTOTAL_RAW] ([STYLE],[DELDATTIM],[DELFLAG],[InsertedDateTime],[IsCurrent])
SELECT STG.[STYLE],STG.[DELDATTIM],STG.[DELFLAG],DATEADD(MICROSECOND, ROW_NUMBER() OVER (ORDER BY (SELECT NULL)), SYSDATETIME()) as [InsertedDateTime],1 as [IsCurrent]
FROM [C1810429].[DLSTYTOTAL] STG Left Join [C1810429].[STYTOTAL_RAW] TGT
on STG.STYLE = TGT.STYLE AND STG.DELDATTIM = TGT.DELDATTIM
Where TGT.DELDATTIM is null
Atualizar e Excluir:
--update
UPDATE [C1810429].[STYTOTAL_RAW]
SET ISCURRENT = 0,UpdatedDateTime = SYSDATETIME()
WHERE IsCurrent = 1
--delete
delete FROM [C1810429].[STYTOTAL_RAW]
WHERE InsertedDateTime < DATEADD(DAY, -90, SYSDATETIME()) and IsCurrent = 0
Obrigado pela ajuda.
Testei esta partição mensal:
use DB_Partition
go
--drop PARTITION FUNCTION PF_myDateRange
CREATE PARTITION FUNCTION PF_myDateRange ( [datetime2](7))
AS RANGE RIGHT FOR VALUES
(
'20240601',
'20240701',
'20240801',
'20240901',
'20241001',
'20241101'
)
GO
-- not sure I need to time as well in the function
CREATE PARTITION FUNCTION PF_myDateRange ( [datetime2](7))
AS RANGE RIGHT FOR VALUES
(
'2024-06-01 23:59:59.997',
'2024-07-01 23:59:59.997',
'2024-08-01 23:59:59.997',
'2024-09-01 23:59:59.997',
'2024-10-01 23:59:59.997',
'2024-11-01 23:59:59.997'
)
GO
CREATE PARTITION SCHEME PS_myPartitionScheme AS PARTITION PF_myDateRange ALL TO ([PRIMARY]);
-- drop and create index to make existing table into partition data alignment
CREATE UNIQUE CLUSTERED INDEX [PK_C1810429_STYTOTAL_RAW] ON [C1810429].[STYTOTAL_RAW]
(
[STYTOTALID] ASC,
[STYLE] ASC,
[InsertedDateTime] ASC
)
WITH (DROP_EXISTING=ON, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF)
ON PS_myPartitionScheme([InsertedDateTime]);
Editado: Tentei automatizar usando tempo máximo e +1 segundo etc., mas os dados, ao alternar, vão para limites de partição diferentes quando carrego 2 e 3 etc.
---1
--drop table [C1810429].[STYTOTAL_RAW_Intermediate_Staging]
CREATE TABLE [C1810429].STYTOTAL_RAW_Intermediate_Staging(
[STYTOTALID] [int] IDENTITY(1,1) NOT NULL,
[STYLE] [decimal](5, 0) NOT NULL,
[InsertedDateTime] [datetime2](7) NOT NULL,
[UpdatedDateTime] [datetime2](7) NULL,
[IsCurrent] [bit] NULL,
[DELDATTIM] [datetime2](7) NULL,
[DELFLAG] [char](1) NULL,
--/*
CONSTRAINT [PK_C1810429_STYTOTAL_RAW_Intermediate_Staging] PRIMARY KEY CLUSTERED
(
[STYTOTALID] ASC,
[STYLE] ASC,
[InsertedDateTime] ASC
)WITH (STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF)
) on [Primary]--on PS_myPartitionScheme([InsertedDateTime]);
----
use DB_Partition
--truncate table C1810429.STYTOTAL_RAW_Intermediate_Staging
select * from C1810429.STYTOTAL_RAW_Intermediate_Staging
--00,06,12,18,23:59.59
-- load 1
select max([InsertedDateTime]) from [C1810429].[STYTOTAL_RAW]
insert into C1810429.STYTOTAL_RAW_Intermediate_Staging (STYLE,InsertedDateTime) values (1,'2024-10-19 00:00:00.000')
insert into C1810429.STYTOTAL_RAW_Intermediate_Staging (STYLE,InsertedDateTime) values (1,'2024-10-19 01:00:00.000')
insert into C1810429.STYTOTAL_RAW_Intermediate_Staging (STYLE,InsertedDateTime) values (1,'2024-10-19 11:59:59.999')
-- load 2
select max([InsertedDateTime]) from [C1810429].[STYTOTAL_RAW]
insert into C1810429.STYTOTAL_RAW_Intermediate_Staging (STYLE,InsertedDateTime) values (1,'2024-10-19 12:00:00.000')
insert into C1810429.STYTOTAL_RAW_Intermediate_Staging (STYLE,InsertedDateTime) values (1,'2024-10-19 13:00:00.000')
insert into C1810429.STYTOTAL_RAW_Intermediate_Staging (STYLE,InsertedDateTime) values (1,'2024-10-19 17:59:59.999')
-- load 3
select max([InsertedDateTime]) from [C1810429].[STYTOTAL_RAW]
insert into C1810429.STYTOTAL_RAW_Intermediate_Staging (STYLE,InsertedDateTime) values (1,'2024-10-19 18:00:00.000')
insert into C1810429.STYTOTAL_RAW_Intermediate_Staging (STYLE,InsertedDateTime) values (1,'2024-10-19 19:00:00.000')
insert into C1810429.STYTOTAL_RAW_Intermediate_Staging (STYLE,InsertedDateTime) values (1,'2024-10-19 23:59:59.999')
--create new partitions for new data load use the primary FG
--get the staging table max date and add one minute and create new boundary
select max([InsertedDateTime]) from [C1810429].STYTOTAL_RAW_Intermediate_Staging
Declare @New_PartitionBoundaryDate_Plus datetime2 (7)
select @New_PartitionBoundaryDate_Plus = max (DATEADD(minute,+1,[InsertedDateTime])) from [C1810429].STYTOTAL_RAW_Intermediate_Staging
select @New_PartitionBoundaryDate_Plus as [Plus]
--load 1 max([InsertedDateTime])
7 25920 2024-10-19 00:00:00.000
8 0 NULL
--load 2 max([InsertedDateTime])+ 1 minute
ALTER PARTITION SCHEME PS_myPartitionScheme NEXT USED [Primary];
ALTER PARTITION FUNCTION PF_myDateRange () SPLIT RANGE('2024-10-19 11:59:59.9970000');
---------
--select * from [C1810429].[STYTOTAL_RAW_Intermediate_Staging]
select min([InsertedDateTime]) ,max([InsertedDateTime]) from [C1810429].[STYTOTAL_RAW_Intermediate_Staging]
-- pass the min and max value of staging table
--load 1 >= min([InsertedDateTime]) <=max([InsertedDateTime])
--load 2 >= min([InsertedDateTime]) <=max([InsertedDateTime])
8 3 2024-10-19 12:00:59.997 -- Here the data is failling under
9 0 2024-10-19 18:00:59.997
10 0 NULL
ALTER TABLE [C1810429].STYTOTAL_RAW_Intermediate_Staging
WITH CHECK ADD CONSTRAINT ck_Min_InsertedDateTime
CHECK ([InsertedDateTime] IS NOT NULL AND [InsertedDateTime] >= '2024-10-19 00:00:00.0000000' )
ALTER TABLE [C1810429].STYTOTAL_RAW_Intermediate_Staging
WITH CHECK ADD CONSTRAINT ck_Max_InsertedDateTime
CHECK ([InsertedDateTime] IS NOT NULL AND [InsertedDateTime] <= '2024-10-19 11:59:59.9970000' )
-- get the data for bounday and enter the partition no or date
-- Create partition for each load by getting max datetime of [InsertedDateTime] in table [C1810429].[STYTOTAL_RAW]
--2024-10-19 12:00:00.001
Declare @New_PartitionBoundaryDate_SWITCH datetime2 (7)
select @New_PartitionBoundaryDate_SWITCH = max (DATEADD(minute,0,[InsertedDateTime])) from [C1810429].STYTOTAL_RAW_Intermediate_Staging
select @New_PartitionBoundaryDate_SWITCH as [SWITCH] --Get the swith parttion
--load 1 is not working since the last data is in that boundary value
ALTER TABLE [C1810429].STYTOTAL_RAW_Intermediate_Staging SWITCH TO [C1810429].[STYTOTAL_RAW] PARTITION $PARTITION.PF_myDateRange('2024-10-19 11:59:59.997');
--load 1 taken last not null boundary partition no
--load 1 taken last not null boundary partition no
ALTER TABLE [C1810429].STYTOTAL_RAW_Intermediate_Staging SWITCH TO [C1810429].[STYTOTAL_RAW] PARTITION 8 -- 2024-10-19 12:00:00.001
declare @ck_Min_InsertedDateTime_drop varchar (max)
set @ck_Min_InsertedDateTime_drop = 'ALTER TABLE [C1810429].STYTOTAL_RAW_Intermediate_Staging
drop CONSTRAINT ck_Min_InsertedDateTime;'
--select @ck_Min_InsertedDateTime_drop
Exec (@ck_Min_InsertedDateTime_drop)
declare @ck_Max_InsertedDateTime_drop varchar (max)
set @ck_Max_InsertedDateTime_drop = 'ALTER TABLE [C1810429].STYTOTAL_RAW_Intermediate_Staging
drop CONSTRAINT ck_Max_InsertedDateTime;'
--select @@ck_Max_InsertedDateTime_drop
Exec (@ck_Max_InsertedDateTime_drop)
Existem alguns pontos que podem causar sua falha:
Reorganizei seu código:
Aqui está o código, teste-o