我有一台配备 PERC H310 Mini (LSI 2008 MegaRAID) 控制器的 Dell R720,我刚刚加载了 4 个新的 Seagate Nytro XS1600LE70045 1.6TB SSD,配置为用于 Linux 软件 raid 的单独磁盘。(请暂时忽略软件与硬件raid的争论,解释是题外话)
我可以很好地读取和写入所有驱动器,没有错误。我可以成功地使用 Ext4 对其进行分区和格式化,但在 ext4 格式化过程中出现一个内核错误:
[15962.216694] sd 0:0:3:0: [sdd] tag#0 FAILED Result: hostbyte=DID_ERROR driverbyte=DRIVER_OK
[15962.216698] sd 0:0:3:0: [sdd] tag#0 CDB: Unmap/Read sub-channel 42 00 00 00 00 00 00 00 18 00
[15962.216699] print_req_error: I/O error, dev sdd, sector 27469824
不会smartctl -a
报告驱动器本身的任何错误。
当我组装一个 raid 设备并尝试对其进行格式化时,这会引发数千个内核错误。这些错误似乎像超时一样发生,在内核日志中出现大约 1 秒,并且滞后于驱动器的所有其他通信,包括 smartctl 命令。
使用 strace,普通分区上的 BLKDISCARD 似乎失败,然后 mkfs.ext4 不再尝试。但是,当它在 md 设备上运行 BLKDISCARD 时,BLKDISCARD 返回 0,然后它发出更多 BLKDISCARD 命令,同时内核驱动程序使它们全部失败。我的内核是Linux 4.19.23,x86_64
除了必须等待数十分钟才能让所有 I/O 错误流动(mkfs.ext4 最终成功)这一烦人的问题之外,我想知道这是否意味着我的硬件或软件链中的某些内容无法正确发出BLKDISCARD 命令到底是什么?在这种情况下,SSD 的性能可能不会那么好。
有谁知道运行 Linux 4.19.23 的配备 PERC H310 Mini 和 SAS Nytro 驱动器(在本例中为 12GB/秒,连接到 6GB/秒控制器)的 Dell R720 是否会出现发出 BLKDISCARD(修剪)命令的问题?
更新
PERC H310 的固件版本为 20.13.3-0001。
TRIM 是 SAS 驱动器不太可能支持的 SATA 命令。SAS 等效项是“UNMAP”,可以从命令行实用程序调用sg_unmap
。sg_unmap 命令在我的系统上也失败,并出现相同的 I/O 错误。
还使用内核 4.19.307 进行了测试,但没有变化。
sg_unmap
$ sg_unmap --lba=1 --num=1 -f /dev/sdb
unmap: transport: Host_status=0x07 [DID_ERROR]
sg_unmap failed: Transport error
$ dmesg | tail
[ 1284.253975] sd 0:0:1:0: [sdb] tag#0 FAILED Result: hostbyte=DID_ERROR driverbyte=DRIVER_OK
[ 1284.253979] sd 0:0:1:0: [sdb] tag#0 CDB: Unmap/Read sub-channel 42 00 00 00 00 00 00 00 18 00
[ 1284.253981] print_req_error: I/O error, dev sdb, sector 0
长沙
*-pci:3
description: PCI bridge
product: Xeon E5/Core i7 IIO PCI Express Root Port 2c
vendor: Intel Corporation
physical id: 2.2
bus info: pci@0000:00:02.2
version: 07
width: 32 bits
clock: 33MHz
capabilities: pci msi pciexpress pm normal_decode bus_master cap_list
configuration: driver=pcieport
resources: irq:24 ioport:f000(size=4096) memory:dd000000-ddffffff
*-storage
description: RAID bus controller
product: MegaRAID SAS 2008 [Falcon]
vendor: LSI Logic / Symbios Logic
physical id: 0
bus info: pci@0000:03:00.0
logical name: scsi0
version: 03
width: 64 bits
clock: 33MHz
capabilities: storage pm pciexpress vpd msi msix bus_master cap_list rom
configuration: driver=megaraid_sas latency=0
resources: irq:26 ioport:fc00(size=256) memory:ddffc000-ddffffff memory:ddf80000-ddfbffff memory:dd000000-dd01ffff
...
*-disk:1
description: SCSI Disk
physical id: 0.1.0
bus info: scsi@0:0.1.0
logical name: /dev/sdb
size: 1490GiB (1600GB)
capabilities: gpt-1.00 partitioned partitioned:gpt
configuration: guid=f46bd41c-6e46-4c0c-b19f-ea2d3fe79f86
系统调用跟踪:
strace mkfs.ext4
在单个分区上:
ioctl(3, BLKALIGNOFF, 0) = 0
ioctl(3, BLKIOMIN, 4096) = 0
ioctl(3, BLKIOOPT, 0) = 0
ioctl(3, BLKPBSZGET, 4096) = 0
ioctl(3, BLKSSZGET, 512) = 0
close(3) = 0
access("/sys/fs/ext4/features/lazy_itable_init", R_OK) = 0
open("/dev/sdd4", O_RDONLY|O_EXCL) = 3
stat("/dev/sdd4", {st_mode=S_IFBLK|0660, st_rdev=makedev(8, 52), ...}) = 0
ioctl(3, BLKDISCARDZEROES, 0) = 0
pread(3, "01\245\5\0\200\211\26\231y \1\256\6.\26%1\245\5\0\0\0\0\2\0\0\0\2\0\0\0"..., 1024, 1024) = 1024
close(3) = 0
access("/var/lib/e2fsprogs", W_OK) = -1 ENOENT (No such file or directory)
open("/dev/sdd4", O_RDWR|O_EXCL) = 3
stat("/dev/sdd4", {st_mode=S_IFBLK|0660, st_rdev=makedev(8, 52), ...}) = 0
ioctl(3, BLKDISCARDZEROES, 0) = 0
ioctl(3, BLKROGET, 0) = 0
mmap(NULL, 376832, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f06c4527000
ioctl(3, BLKDISCARD, {0, 7ffc9a2a78a0}) = -1 EIO (Input/output error)
pwrite(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 3072, 1024) = 3072
stat("/dev/random", {st_mode=S_IFCHR|0666, st_rdev=makedev(1, 9), ...}) = 0
open("/dev/urandom", O_RDONLY|O_CLOEXEC) = 4
fcntl(4, F_GETFD) = 0x1 (flags FD_CLOEXEC)
fcntl(4, F_SETFD, FD_CLOEXEC) = 0
getuid() = 0
getppid() = 19277
read(4, "(=\233R-\333\371\3*}\357\205\226\31&t", 16) = 16
close(4) = 0
gettid() = 19281
stat("/dev/random", {st_mode=S_IFCHR|0666, st_rdev=makedev(1, 9), ...}) = 0
open("/dev/urandom", O_RDONLY|O_CLOEXEC) = 4
fcntl(4, F_GETFD) = 0x1 (flags FD_CLOEXEC)
fcntl(4, F_SETFD, FD_CLOEXEC) = 0
getuid() = 0
getppid() = 19277
read(4, "\264\27\315\3155X\376\220\355\347\353\366\26\237\214!", 16) = 16
close(4) = 0
gettid() = 19281
write(1, "Creating filesystem with 3781099"..., 65Creating filesystem with 378109952 4k blocks and 94712112 inodes
strace mkfs.ext4
在 md 设备上:
ioctl(3, BLKDISCARDZEROES, 0) = 0
pread(3, "\0\0\24\0\0\360O\0003\377\3\0\257\2N\0\365\377\23\0\0\0\0\0\2\0\0\0\2\0\0\0"..., 1024, 1024) = 1024
close(3) = 0
access("/var/lib/e2fsprogs", W_OK) = -1 ENOENT (No such file or directory)
open("/dev/md125", O_RDWR|O_EXCL) = 3
stat("/dev/md125", {st_mode=S_IFBLK|0660, st_rdev=makedev(9, 125), ...}) = 0
ioctl(3, BLKDISCARDZEROES, 0) = 0
ioctl(3, BLKROGET, 0) = 0
uname({sys="Linux", node="cildat1", ...}) = 0
ioctl(3, BLKDISCARD, {0, 7ffd8c6d0930}) = 0
write(1, "Discarding device blocks: ", 26Discarding device blocks: ) = 26
write(1, " 4096/5238784", 15 4096/5238784) = 15
write(1, "\10\10\10\10\10\10\10\10\10\10\10\1) = 150\10", 15
ioctl(3, BLKDISCARD, {1000000, 7ffd8c6d0930}