Aqui está o SQL Fiddle para minha pergunta: https://sqlfiddle.com/sql-server/online-compiler?id=ab1634d7-fec7-4918-ac1c-3f4fcac8dc92
Eu tenho os seguintes dados de amostra:
DROP TABLE IF EXISTS #Price
CREATE TABLE #Price (DataId INT IDENTITY(1,1), NameOfWidget VARCHAR(50), Price MONEY, PriceScheduleId INT,
StartEffectiveWhen DATE, EndEffectiveWhen DATE)
INSERT INTO #Price (NameOfWidget, Price, PriceScheduleId, StartEffectiveWhen, EndEffectiveWhen)
VALUES
('CoolWidget', 3.51, 1, '2015-1-1', '2021-12-31'),
('CoolWidget', 2.00, 2, '2017-1-1', '2022-12-31'),
('CoolWidget', 4.23, 1, '2021-1-1', '2100-12-31'),
('CoolWidget', 2.00, 2, '2021-1-1', '2100-12-31'),
('OtherWidget', 13.24, 1, '2014-1-1', '2100-12-31')
Agora preciso colocar esses dados no seguinte formato:
NameOfWidget StartEffectiveWhen EndEffectiveWhen
CoolWidget 2015-01-01 2016-12-31
CoolWidget 2017-01-01 2021-12-31
CoolWidget 2021-01-01 2022-12-31
CoolWidget 2023-01-01 2100-12-31
OtherWidget 2015-01-01 2100-12-31
Isso segue a seguinte lógica, agrupado por NameOfWidget
:
- Encontra o mais baixo
StartEffectiveWhen
. - Encontra o próximo valor mais baixo
StartEffectiveWhen
ouEndEffectiveWhen
. Essa data se torna a próximaEndEffectiveWhen
. Mas se fosse umEndEffectiveWhen
, então subtraímos um dia dele. - Em seguida, repete as etapas acima, exceto que exclui os dados já utilizados.
O objetivo é ter uma linha para cada "janela" do período de tempo.
O código abaixo faz o que preciso, mas usa um loop para fazer isso.
Como sempre, meus dados reais são muito mais complexos. Ele também possui 56 milhões de linhas. (O código abaixo leva 3 horas para ser executado em meus dados reais)
Estou esperando uma maneira de fazer o que tenho abaixo sem precisar fazer um loop.
Meu código (lento, baseado em loop)
DROP TABLE IF EXISTS #EffectiveRange
CREATE TABLE #EffectiveRange (EffectiveDateId INT IDENTITY(1,1), StartEffectiveWhen DATE, EndEffectiveWhen DATE, EndWhenOfRowsThatMatchStartDate DATE, SecondStartWhen DATE, NameOfWidget VARCHAR(50), CalculationRound INT)
DECLARE @CalculationRound INT = 1
-- This is < 15 in my real code
WHILE (@CalculationRound < 5)
BEGIN
-- Find the first/next range in source price table.
INSERT INTO #EffectiveRange(StartEffectiveWhen, EndWhenOfRowsThatMatchStartDate, SecondStartWhen, NameOfWidget, CalculationRound)
SELECT MIN(price.StartEffectiveWhen) StartWhen, NULL, NULL, price.NameOfWidget, @CalculationRound
FROM #Price price
WHERE price.StartEffectiveWhen >
(SELECT MAX(maxValue.StartWhen)
FROM
(SELECT MAX(rangesSub.StartEffectiveWhen) AS StartWhen
FROM #EffectiveRange AS rangesSub
WHERE rangesSub.NameOfWidget = price.NameOfWidget
UNION ALL
SELECT CAST('1/1/1900' AS DATE) AS StartWhen) AS maxValue)
GROUP BY price.NameOfWidget
-- Find the end date for the rows that match the start date we just found.
UPDATE #EffectiveRange SET
EndWhenOfRowsThatMatchStartDate = calc.EndWhenOfRowsThatMatchStartDateCalc
FROM
(
SELECT MIN(price.EndEffectiveWhen) AS EndWhenOfRowsThatMatchStartDateCalc, price.NameOfWidget
FROM #Price price
JOIN #EffectiveRange ranges
ON ranges.NameOfWidget = price.NameOfWidget
AND ranges.CalculationRound = @CalculationRound
WHERE price.StartEffectiveWhen = ranges.StartEffectiveWhen
GROUP BY price.NameOfWidget
) AS calc
JOIN #EffectiveRange ranges
ON ranges.NameOfWidget = calc.NameOfWidget
AND ranges.CalculationRound = @CalculationRound
-- Find the next largest start date for the calculation round.
UPDATE #EffectiveRange SET
SecondStartWhen = calc.SecondStartWhen
FROM
(
SELECT MIN(price.StartEffectiveWhen) SecondStartWhen, price.NameOfWidget
FROM #Price price
JOIN #EffectiveRange ranges
ON ranges.NameOfWidget = price.NameOfWidget
AND ranges.CalculationRound = @CalculationRound
WHERE price.StartEffectiveWhen > ranges.StartEffectiveWhen
GROUP BY price.NameOfWidget
) AS calc
JOIN #EffectiveRange ranges
ON ranges.NameOfWidget = calc.NameOfWidget
AND ranges.CalculationRound = @CalculationRound
-- Send the EndWhen to be the lesser of EndWhenOfRowsThatMatchStartDate and secondStartDate.
-- This will define our window of effectiveness for this round of the test. (once we have all of the windows (aka each time a change was made),
-- we will caclulate the price for each window.
UPDATE #EffectiveRange SET
EndEffectiveWhen = IIF((EndWhenOfRowsThatMatchStartDate < SecondStartWhen) OR SecondStartWhen IS NULL, EndWhenOfRowsThatMatchStartDate, DATEADD(DAY, -1, SecondStartWhen))
WHERE CalculationRound = @CalculationRound
SET @CalculationRound = @CalculationRound + 1
END
-- Show the final result
SELECT ranges.NameOfWidget, ranges.StartEffectiveWhen, ranges.EndEffectiveWhen
FROM #EffectiveRange ranges
ORDER BY ranges.NameOfWidget, ranges.StartEffectiveWhen
DROP TABLE IF EXISTS #EffectiveRange
DROP TABLE IF EXISTS #Price
Atualizar
Este SQL Fiddle mostra o que acabei fazendo:
https://sqlfiddle.com/sql-server/online-compiler?id=b0d81632-b14d-4374-a80e-0835750f48bc
@Akina me fez pensar sobre meu problema na direção certa. (Obrigado @Akina!)
Por precaução, aqui está a consulta que acabei usando:
DROP TABLE IF EXISTS #Price
CREATE TABLE #Price (DataId INT IDENTITY(1,1), NameOfWidget VARCHAR(50), Price MONEY, PriceScheduleId INT, StartEffectiveWhen DATE, EndEffectiveWhen DATE)
INSERT INTO #Price (NameOfWidget, Price, PriceScheduleId, StartEffectiveWhen, EndEffectiveWhen)
VALUES
('CoolWidget', 3.51, 1, '2015-1-1', '2021-12-31'),
('CoolWidget', 2.00, 2, '2017-1-1', '2022-12-31'),
('CoolWidget', 4.23, 1, '2021-1-1', '2100-12-31'),
('CoolWidget', 2.00, 2, '2021-1-1', '2100-12-31'),
('OtherWidget', 13.24, 1, '2014-1-1', '2018-5-4'),
('OtherWidget', 13.24, 1, '2018-5-6', '2019-12-31'),
('OtherWidget', 13.24, 1, '2020-1-1', '2100-12-31')
;WITH OrderedDates AS
(
SELECT priceStart.NameOfWidget, priceStart.StartEffectiveWhen AS DateWhen, 1 AS IsStartDate, 0 AS IsEndDate
FROM #Price priceStart
UNION
SELECT priceStart.NameOfWidget, priceStart.EndEffectiveWhen AS DateWhen, 0 AS IsStartDate, 1 AS IsEndDate
FROM #Price priceStart
)
SELECT OrderedDates.NameOfWidget,
CASE
WHEN LAG(OrderedDates.DateWhen) OVER (PARTITION BY OrderedDates.NameOfWidget ORDER BY OrderedDates.DateWhen) IS NULL THEN '1900-1-1'
WHEN LAG(OrderedDates.IsStartDate ) OVER (PARTITION BY OrderedDates.NameOfWidget ORDER BY OrderedDates.DateWhen) = 1
THEN LAG(OrderedDates.DateWhen) OVER (PARTITION BY OrderedDates.NameOfWidget ORDER BY OrderedDates.DateWhen)
ELSE DATEADD(DAY, 1, LAG(OrderedDates.DateWhen) OVER (PARTITION BY OrderedDates.NameOfWidget ORDER BY OrderedDates.DateWhen))
END AS StartEffectiveWhen,
CASE
WHEN OrderedDates.IsEndDate = 1 THEN OrderedDates.DateWhen
ELSE DATEADD(DAY, -1, OrderedDates.DateWhen)
END AS EndEffectiveWhen
FROM OrderedDates
ORDER BY OrderedDates.NameOfWidget
violino
PS. Os dados de origem são fixos e
('CoolWidget', 2.00, 2, '2021-1-1', '2100-12-31')
alterados para('CoolWidget', 2.00, 2, '2023-1-1', '2100-12-31')
.