描述:
我尝试在 MSSQL 2012 Express 的空表中插入 100 万行。这是我的脚本:
-- set statistics time off
drop table t1
create table t1 (id int, a text, b text)
go
-- #1 - 1,000,000 - 30s -> 45s
with ID(number) as
(
select 1 as number
union all
select number + 1
from ID
where number < 1000000 + 1
)
insert into t1
select number, 'a_' + cast (number as varchar), 'b_' + cast (number/2 as varchar)
from ID
option(maxrecursion 0)
-- #2 - 1 million rows => ~140,000 rows = 120s (have to cancel query)
declare @count int
set @count = 0
while @count < 1000000
begin
set @count = @count + 1
insert into t1
values(@count, 'a_' + cast (@count as varchar), 'b_' + cast (@count/2 as varchar))
end
-- #3 - ~1,300,000 rows - 18s -> 20s
with temp as
(
SELECT ROW_NUMBER() OVER(ORDER BY a.object_id) as tcount
from sys.all_columns a, sys.all_columns b
where a.object_id = b.object_id
)
insert into t1
select tcount, 'a_' + cast (tcount as varchar), 'b_' + cast (tcount/2 as varchar)
from temp
go
declare @count int
set @count = 0
while @count < 3
begin
with temp as (select max(id) + 1 as max_id from t1)
insert into t1
select max_id, 'a_' + cast (max_id as varchar), 'b_' + cast (max_id/2 as varchar)
from t1, temp
set @count = @count + 1
end
-- #4 -- 1,000,000 = 3s -> 4s (have to drop t1 first)
with a(k) as
(
select 1 as k
union all
select k + 1 from a where k < 99 + 1
) ,
t2 as (
select row_number() over(order by x.k) as k
from a x , a y , a z
)
select k as id , 'a_' + cast (k as varchar) as a, 'b_' + cast (k/2 as varchar) as b into t1
from t2
问题:
经过研究,我找到了4个解决方案。有没有更好的解决方案(不使用文件中的复制数据)?
Itzik Ben-Gan 使用以下方法 这可能是他发现的最快的方法,而且他很聪明 :-)
dnoeth答案的变体:
这避免了存储 a 和 b 的值;它们的值将根据需要在运行时计算。这可能有点作弊,但它确实有优势:
ROW_NUMBER
返回bigint(未压缩的 8 个字节)。或者,将所有列存储在表中:
请注意在id列上转换为整数,以及在varchar类型上使用特定长度。看:
要踢的坏习惯:Aaron Bertrand声明没有(长度)的VARCHAR
方法一:@dnoeth 上面,插入时间:1077ms - 1180ms(10次测试)
方法二:我尝试用这种方法插入,插入时间 989ms -> 1132ms
很简单。
方法3:来自Paul White的想法,450ms
dnoeth 答案的另一种变体:
因为 6^8 (1 679 616) 比 2^32 (4 294 967 296) 更接近 1000000,所以使用 6 作为基础更有效