这是我的问题的 SQL Fiddle: https://sqlfiddle.com/sql-server/online-compiler? id=ab1634d7-fec7-4918-ac1c-3f4fcac8dc92
我有以下示例数据:
DROP TABLE IF EXISTS #Price
CREATE TABLE #Price (DataId INT IDENTITY(1,1), NameOfWidget VARCHAR(50), Price MONEY, PriceScheduleId INT,
StartEffectiveWhen DATE, EndEffectiveWhen DATE)
INSERT INTO #Price (NameOfWidget, Price, PriceScheduleId, StartEffectiveWhen, EndEffectiveWhen)
VALUES
('CoolWidget', 3.51, 1, '2015-1-1', '2021-12-31'),
('CoolWidget', 2.00, 2, '2017-1-1', '2022-12-31'),
('CoolWidget', 4.23, 1, '2021-1-1', '2100-12-31'),
('CoolWidget', 2.00, 2, '2021-1-1', '2100-12-31'),
('OtherWidget', 13.24, 1, '2014-1-1', '2100-12-31')
我现在需要将此数据转换为以下格式:
NameOfWidget StartEffectiveWhen EndEffectiveWhen
CoolWidget 2015-01-01 2016-12-31
CoolWidget 2017-01-01 2021-12-31
CoolWidget 2021-01-01 2022-12-31
CoolWidget 2023-01-01 2100-12-31
OtherWidget 2015-01-01 2100-12-31
这遵循以下逻辑,按 分组NameOfWidget
:
- 找到最低的
StartEffectiveWhen
. - 查找下一个最低值
StartEffectiveWhen
或EndEffectiveWhen
。该日期成为下一个日期EndEffectiveWhen
。但如果它是EndEffectiveWhen
,那么我们从中减去一天。 - 然后,它会重复上述步骤,但排除已使用的数据。
目标是每个时间范围“窗口”都有一行。
下面的代码满足我的需要,但它使用循环来完成。
和往常一样,我的真实数据要复杂得多。它还具有 5600 万行。(下面的代码在我的真实数据上运行需要3个小时)
我希望有一种方法可以完成下面的操作而无需循环。
我的(缓慢的、基于循环的)代码
DROP TABLE IF EXISTS #EffectiveRange
CREATE TABLE #EffectiveRange (EffectiveDateId INT IDENTITY(1,1), StartEffectiveWhen DATE, EndEffectiveWhen DATE, EndWhenOfRowsThatMatchStartDate DATE, SecondStartWhen DATE, NameOfWidget VARCHAR(50), CalculationRound INT)
DECLARE @CalculationRound INT = 1
-- This is < 15 in my real code
WHILE (@CalculationRound < 5)
BEGIN
-- Find the first/next range in source price table.
INSERT INTO #EffectiveRange(StartEffectiveWhen, EndWhenOfRowsThatMatchStartDate, SecondStartWhen, NameOfWidget, CalculationRound)
SELECT MIN(price.StartEffectiveWhen) StartWhen, NULL, NULL, price.NameOfWidget, @CalculationRound
FROM #Price price
WHERE price.StartEffectiveWhen >
(SELECT MAX(maxValue.StartWhen)
FROM
(SELECT MAX(rangesSub.StartEffectiveWhen) AS StartWhen
FROM #EffectiveRange AS rangesSub
WHERE rangesSub.NameOfWidget = price.NameOfWidget
UNION ALL
SELECT CAST('1/1/1900' AS DATE) AS StartWhen) AS maxValue)
GROUP BY price.NameOfWidget
-- Find the end date for the rows that match the start date we just found.
UPDATE #EffectiveRange SET
EndWhenOfRowsThatMatchStartDate = calc.EndWhenOfRowsThatMatchStartDateCalc
FROM
(
SELECT MIN(price.EndEffectiveWhen) AS EndWhenOfRowsThatMatchStartDateCalc, price.NameOfWidget
FROM #Price price
JOIN #EffectiveRange ranges
ON ranges.NameOfWidget = price.NameOfWidget
AND ranges.CalculationRound = @CalculationRound
WHERE price.StartEffectiveWhen = ranges.StartEffectiveWhen
GROUP BY price.NameOfWidget
) AS calc
JOIN #EffectiveRange ranges
ON ranges.NameOfWidget = calc.NameOfWidget
AND ranges.CalculationRound = @CalculationRound
-- Find the next largest start date for the calculation round.
UPDATE #EffectiveRange SET
SecondStartWhen = calc.SecondStartWhen
FROM
(
SELECT MIN(price.StartEffectiveWhen) SecondStartWhen, price.NameOfWidget
FROM #Price price
JOIN #EffectiveRange ranges
ON ranges.NameOfWidget = price.NameOfWidget
AND ranges.CalculationRound = @CalculationRound
WHERE price.StartEffectiveWhen > ranges.StartEffectiveWhen
GROUP BY price.NameOfWidget
) AS calc
JOIN #EffectiveRange ranges
ON ranges.NameOfWidget = calc.NameOfWidget
AND ranges.CalculationRound = @CalculationRound
-- Send the EndWhen to be the lesser of EndWhenOfRowsThatMatchStartDate and secondStartDate.
-- This will define our window of effectiveness for this round of the test. (once we have all of the windows (aka each time a change was made),
-- we will caclulate the price for each window.
UPDATE #EffectiveRange SET
EndEffectiveWhen = IIF((EndWhenOfRowsThatMatchStartDate < SecondStartWhen) OR SecondStartWhen IS NULL, EndWhenOfRowsThatMatchStartDate, DATEADD(DAY, -1, SecondStartWhen))
WHERE CalculationRound = @CalculationRound
SET @CalculationRound = @CalculationRound + 1
END
-- Show the final result
SELECT ranges.NameOfWidget, ranges.StartEffectiveWhen, ranges.EndEffectiveWhen
FROM #EffectiveRange ranges
ORDER BY ranges.NameOfWidget, ranges.StartEffectiveWhen
DROP TABLE IF EXISTS #EffectiveRange
DROP TABLE IF EXISTS #Price
更新
这个 SQL Fiddle 显示了我最终做了什么:
https://sqlfiddle.com/sql-server/online-compiler?id=b0d81632-b14d-4374-a80e-0835750f48bc
@Akina 让我朝着正确的方向思考我的问题。(谢谢@Akina!)
以防万一,这是我最终使用的查询:
DROP TABLE IF EXISTS #Price
CREATE TABLE #Price (DataId INT IDENTITY(1,1), NameOfWidget VARCHAR(50), Price MONEY, PriceScheduleId INT, StartEffectiveWhen DATE, EndEffectiveWhen DATE)
INSERT INTO #Price (NameOfWidget, Price, PriceScheduleId, StartEffectiveWhen, EndEffectiveWhen)
VALUES
('CoolWidget', 3.51, 1, '2015-1-1', '2021-12-31'),
('CoolWidget', 2.00, 2, '2017-1-1', '2022-12-31'),
('CoolWidget', 4.23, 1, '2021-1-1', '2100-12-31'),
('CoolWidget', 2.00, 2, '2021-1-1', '2100-12-31'),
('OtherWidget', 13.24, 1, '2014-1-1', '2018-5-4'),
('OtherWidget', 13.24, 1, '2018-5-6', '2019-12-31'),
('OtherWidget', 13.24, 1, '2020-1-1', '2100-12-31')
;WITH OrderedDates AS
(
SELECT priceStart.NameOfWidget, priceStart.StartEffectiveWhen AS DateWhen, 1 AS IsStartDate, 0 AS IsEndDate
FROM #Price priceStart
UNION
SELECT priceStart.NameOfWidget, priceStart.EndEffectiveWhen AS DateWhen, 0 AS IsStartDate, 1 AS IsEndDate
FROM #Price priceStart
)
SELECT OrderedDates.NameOfWidget,
CASE
WHEN LAG(OrderedDates.DateWhen) OVER (PARTITION BY OrderedDates.NameOfWidget ORDER BY OrderedDates.DateWhen) IS NULL THEN '1900-1-1'
WHEN LAG(OrderedDates.IsStartDate ) OVER (PARTITION BY OrderedDates.NameOfWidget ORDER BY OrderedDates.DateWhen) = 1
THEN LAG(OrderedDates.DateWhen) OVER (PARTITION BY OrderedDates.NameOfWidget ORDER BY OrderedDates.DateWhen)
ELSE DATEADD(DAY, 1, LAG(OrderedDates.DateWhen) OVER (PARTITION BY OrderedDates.NameOfWidget ORDER BY OrderedDates.DateWhen))
END AS StartEffectiveWhen,
CASE
WHEN OrderedDates.IsEndDate = 1 THEN OrderedDates.DateWhen
ELSE DATEADD(DAY, -1, OrderedDates.DateWhen)
END AS EndEffectiveWhen
FROM OrderedDates
ORDER BY OrderedDates.NameOfWidget
小提琴
附言。源数据已固定,
('CoolWidget', 2.00, 2, '2021-1-1', '2100-12-31')
更改为('CoolWidget', 2.00, 2, '2023-1-1', '2100-12-31')
.