(更新:今天,tar 在没有段错误的情况下工作,系统上没有已知的更改。“rm”命令现在是段错误。啊!也许这是由 ESXi 引起的?或者,也许是硬件。)
我有一个在 VMWare ESXi 上运行的 Centos 5.3 系统。它已经运行了一段时间,没有太大问题。然而,最近,我注意到 tar 命令和 rpm 命令在我运行它们时会出现分段错误。
所以,我尝试使用 strace(发现有人在网上提出了这个建议),下面是我使用 strace 时得到的结果。我想知道如何修复我的系统以及如何防止将来发生这种情况。
提前谢谢大家!
输出:
[root@wagon init.d]# strace -f rpm execve("/bin/rpm", ["rpm"], [/* 20 vars */]) = 0 brk(0) = 0x9e98000 access("/etc/ld.so.preload", R_OK) = -1 ENOENT (没有这样的文件或目录) 打开(“/etc/ld.so.cache”,O_RDONLY)= 3 fstat64(3, {st_mode=S_IFREG|0644, st_size=51424, ...}) = 0 mmap2(NULL, 51424, PROT_READ, MAP_PRIVATE, 3, 0) = 0xb7f79000 关闭(3)= 0 打开(“/usr/lib/librpm-4.4.so”,O_RDONLY)= 3 读(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\360\ 257\0\0004\0\0\0"..., 512) = 512 fstat64(3, {st_mode=S_IFREG|0755, st_size=378528, ...}) = 0 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7f78000 mmap2(NULL, 588416, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0xd43000 mmap2(0xd9d000, 16384, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x59) = 0xd9d000 mmap2(0xda1000, 203392, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0xda1000 关闭(3)= 0 打开(“/usr/lib/librpmdb-4.4.so”,O_RDONLY)= 3 读(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0P\307\ 1\0004\0\0\0"..., 512) = 512 fstat64(3, {st_mode=S_IFREG|0755, st_size=1134540, ...}) = 0 mmap2(NULL, 1139296, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x627000 mmap2(0x739000, 16384, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x111) = 0x739000 mmap2(0x73d000, 608, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x73d000 关闭(3)= 0 打开(“/lib/libselinux.so.1”,O_RDONLY)= 3 读取(3,“\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\0005\ 0\0004\0\0\0"..., 512) = 512 fstat64(3, {st_mode=S_IFREG|0755, st_size=91892, ...}) = 0 mmap2(NULL, 97112, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0xc25000 mmap2(0xc3b000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x15) = 0xc3b000 关闭(3)= 0 打开(“/usr/lib/librpmio-4.4.so”,O_RDONLY)= 3 读(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0`\276 \0\0004\0\0\0"..., 512) = 512 fstat64(3, {st_mode=S_IFREG|0755, st_size=396276, ...}) = 0 mmap2(NULL, 540192, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0xb64000 mmap2(0xbc3000, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x5e) = 0xbc3000 mmap2(0xbc6000, 138784, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0xbc6000 关闭(3)= 0 打开(“/usr/lib/libpopt.so.0”,O_RDONLY)= 3 读(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\260\ 20\0\0004\0\0\0"..., 512) = 512 fstat64(3, {st_mode=S_IFREG|0755, st_size=29008, ...}) = 0 mmap2(NULL, 31856, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7c5000 mmap2(0x7cc000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x6) = 0x7cc000 关闭(3)= 0 打开(“/usr/lib/libsqlite3.so.0”,O_RDONLY)= 3 读(3,“\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\340= \327\0004\0\0\0"..., 512) = 512 fstat64(3, {st_mode=S_IFREG|0755, st_size=385180, ...}) = 0 mmap2(0xd6a000, 387072, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x110000 mmap2(0x16d000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x5c) = 0x16d000 关闭(3)= 0 打开(“/usr/lib/libelf.so.1”,O_RDONLY)= 3 读(3,“\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0000\35\ 0\0004\0\0\0"..., 512) = 512 fstat64(3, {st_mode=S_IFREG|0755, st_size=87508, ...}) = 0 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7f77000 mmap2(NULL, 90296, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x9cf000 mmap2(0x9e4000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x14) = 0x9e4000 关闭(3)= 0 打开(“/lib/libm.so.6”,O_RDONLY)= 3 读(3,“\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\0204\ 0\0004\0\0\0"..., 512) = 512 fstat64(3, {st_mode=S_IFREG|0755, st_size=206380, ...}) = 0 mmap2(NULL, 155760, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x16f000 mmap2(0x194000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x24) = 0x194000 关闭(3)= 0 打开(“/usr/lib/libz.so.1”,O_RDONLY)= 3 读(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\300\ 245\303\0004\0\0\0"..., 512) = 512 fstat64(3, {st_mode=S_IFREG|0755, st_size=75028, ...}) = 0 mmap2(0xc39000, 76400, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x196000 mmap2(0x1a8000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x11) = 0x1a8000 关闭(3)= 0 打开(“/usr/lib/libnss3.so”,O_RDONLY)= 3 读取(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\0\ 0054\0\0\0"..., 512) = 512 fstat64(3, {st_mode=S_IFREG|0755, st_size=1306252, ...}) = 0 mmap2(0x5714000, 1310584, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x5714000 mmap2(0x584e000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x139) = 0x584e000 关闭(3)= 0 打开(“/usr/lib/libplds4.so”,O_RDONLY)= 3 读(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0000\312\ 205\0054\0\0\0"..., 512) = 512 fstat64(3, {st_mode=S_IFREG|0755, st_size=11460, ...}) = 0 mmap2(0x585c000, 8672, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x585c000 mmap2(0x585e000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2) = 0x585e000 关闭(3)= 0 打开(“/usr/lib/libplc4.so”,O_RDONLY)= 3 读(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\360m\ 205\0054\0\0\0"..., 512) = 512 fstat64(3, {st_mode=S_IFREG|0755, st_size=15556, ...}) = 0 mmap2(0x5856000, 12744, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x5856000 mmap2(0x5859000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x3) = 0x5859000 关闭(3)= 0 打开(“/usr/lib/libnspr4.so”,O_RDONLY)= 3 读(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0P\227\ 206\0054\0\0\0"..., 512) = 512 fstat64(3, {st_mode=S_IFREG|0755, st_size=228028, ...}) = 0 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7f76000 mmap2(0x5861000, 232928, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x5861000 mmap2(0x5897000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x36) = 0x5897000 mmap2(0x5898000, 7648, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x5898000 关闭(3)= 0 打开(“/lib/libdl.so.2”,O_RDONLY)= 3 读(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0P\n\ 0\0004\0\0\0"..., 512) = 512 fstat64(3, {st_mode=S_IFREG|0755, st_size=14644, ...}) = 0 mmap2(NULL, 12408, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x2a7000 mmap2(0x2a9000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1) = 0x2a9000 关闭(3)= 0 打开(“/lib/librt.so.1”,O_RDONLY)= 3 读(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\200\ 30\0\0004\0\0\0"..., 512) = 512 fstat64(3, {st_mode=S_IFREG|0755, st_size=42048, ...}) = 0 mmap2(NULL, 33324, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7d0000 mmap2(0x7d7000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x6) = 0x7d7000 关闭(3)= 0 打开(“/lib/libpthread.so.0”,O_RDONLY)= 3 读(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0000H\0\ 0004\0\0\0"..., 512) = 512 fstat64(3, {st_mode=S_IFREG|0755, st_size=123596, ...}) = 0 mmap2(NULL, 90592, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x1a9000 mmap2(0x1bc000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x12) = 0x1bc000 mmap2(0x1be000, 4576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x1be000 关闭(3)= 0 打开(“/usr/lib/libbz2.so.1”,O_RDONLY)= 3 读(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\240\ 17\364\0034\0\0\0"..., 512) = 512 fstat64(3, {st_mode=S_IFREG|0755, st_size=71852, ...}) = 0 mmap2(0x3f40000, 69128, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x3f40000 mmap2(0x3f50000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x10) = 0x3f50000 关闭(3)= 0 打开(“/lib/libc.so.6”,O_RDONLY)= 3 读(3,“\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\320_\ 1\0004\0\0\0"..., 512) = 512 fstat64(3, {st_mode=S_IFREG|0755, st_size=1603392, ...}) = 0 mmap2(NULL, 1324452, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x2ab000 mmap2(0x3e9000, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x13e) = 0x3e9000 mmap2(0x3ec000, 9636, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x3ec000 关闭(3)= 0 打开(“/lib/libsepol.so.1”,O_RDONLY)= 3 读(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\340\ 256\306\0004\0\0\0"..., 512) = 512 fstat64(3, {st_mode=S_IFREG|0755, st_size=242880, ...}) = 0 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7f75000 mmap2(0xc68000, 286624, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0xc68000 mmap2(0xca3000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x3a) = 0xca3000 mmap2(0xca4000, 40864, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0xca4000 关闭(3)= 0 打开(“/lib/libgcc_s.so.1”,O_RDONLY)= 3 读(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0`\26 \0\0004\0\0\0"..., 512) = 512 fstat64(3, {st_mode=S_IFREG|0755, st_size=44992, ...}) = 0 mmap2(NULL, 48036, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x435000 mmap2(0x440000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0xa) = 0x440000 关闭(3)= 0 打开(“/usr/lib/libnssutil3.so”,O_RDONLY)= 3 读(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\20\ 350o\0054\0\0\0"..., 512) = 512 fstat64(3, {st_mode=S_IFREG|0755, st_size=96764, ...}) = 0 mmap2(0x56fa000, 98028, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x56fa000 mmap2(0x570f000, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x14) = 0x570f000 关闭(3)= 0 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7f74000 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7f73000 set_thread_area({entry_number:-1 -> 6, base_addr:0xb7f736d0, limit:1048575, seg_32bit:1, contents:0, read_exec_only:0, limit_in_pages:1, seg_not_present:0, useable:1}) = 0 mprotect(0x3e9000, 8192, PROT_READ) = 0 mprotect(0x1bc000, 4096, PROT_READ) = 0 mprotect(0x7d7000, 4096, PROT_READ) = 0 mprotect(0x2a9000, 4096, PROT_READ) = 0 mprotect(0x194000, 4096, PROT_READ) = 0 mprotect(0xff9000, 4096, PROT_READ) = 0 munmap(0xb7f79000, 51424) = 0 set_tid_address(0xb7f73718) = 5604 set_robust_list(0xb7f73720, 0xc) = 0 futex(0xbf9763c4, FUTEX_WAKE_PRIVATE, 1) = 0 rt_sigaction(SIGRTMIN, {0x1ad3d0, [], SA_SIGINFO}, NULL, 8) = 0 rt_sigaction(SIGRT_1, {0x1ad2e0, [], SA_RESTART|SA_SIGINFO}, NULL, 8) = 0 rt_sigprocmask(SIG_UNBLOCK, [RTMIN RT_1], NULL, 8) = 0 getrlimit(RLIMIT_STACK, {rlim_cur=10240*1024, rlim_max=RLIM_INFINITY}) = 0 uname({sys="Linux", node="wagon.localdomain", ...}) = 0 访问(“/etc/selinux/”,F_OK)= 0 brk(0) = 0x9e98000 brk(0x9eb9000) = 0x9eb9000 打开(“/etc/selinux/config”,O_RDONLY|O_LARGEFILE)= 3 fstat64(3, {st_mode=S_IFREG|0644, st_size=511, ...}) = 0 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7f85000 read(3, "#这个文件控制状态o"..., 4096) = 511 读取(3,“”,4096)= 0 关闭(3)= 0 munmap(0xb7f85000, 4096) = 0 打开(“/proc/mounts”,O_RDONLY|O_LARGEFILE)= 3 fstat64(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7f85000 读取(3,“rootfs / rootfs rw 0 0\n/dev/root”...,4096)= 587 读取(3,“”,4096)= 0 关闭(3)= 0 munmap(0xb7f85000, 4096) = 0 --- SIGSEGV(分段错误)@ 0 (0) --- +++ 被 SIGSEGV 杀死 +++ [root@wagon init.d]#
即使没有太多调查,根据您的描述,一些程序开始出现段错误,然后它们开始正常工作,而其他程序开始出现故障,您的内存模块或预链接损坏。
首先,停止所有虚拟机,重新启动主机并运行内存测试。您必须从 ESXi 外部执行此操作。如果您发现任何内存缺陷,那就是您的问题。更换硬件。
如果没有发现内存缺陷,请检查您是否正在为您的架构运行来自 CentOS 的最新可用内核。
再次启动主机和服务器,服务器处于单用户模式(将“single”传递给 grub 内核参数)并运行:
预链接完成后,您应该重新启动服务器。您也可以
telinit u && init 3
恢复启动,但最好重新启动以确保所有二进制文件都将使用新的内存映射重新加载。这是一个猜测,但是,请尝试关闭 se-linix 并查看它是否有效。
它不应该是段错误,但是,可能有一个文件由于 se-linux 而无法打开,这是 tar 从未想过会发生的。
否则,您必须安装 tar 的源 rpm,并通过调试构建它,然后在调试器中运行它以查看原因。
好吧,没有我想象的那么复杂。服务器遭到入侵,天才黑客搞砸了 root 工具包安装。所以,影响是二进制segfaulting。另一个影响是来自服务器的意外网络流量。感谢所有回复的人!
好吧,听起来很有趣。
首先,您是否有两个相同的最新框(其中一个不是段错误的)?
如果是这样,请检查存在段错误的二进制文件的 md5sum,确保它们相同。
接下来,在失败的二进制文件上运行 ldd,然后在库上运行 md5sum 以查看它们是否不同。
现在,假设库是相同的,还有其他问题,可能是机器环境或配置,但让我们看看 strace 是否有任何启发。
引用您的 strace,接近尾声,您有:
下一行是段错误消息。
在我的 CentOS 5.3 机器(rpm 版本 4.4.2.3)上,我的 rpm 立即执行以下操作:
因此,我敢说问题是在读取 /proc/mounts 或打开 /usr/lib/locale/locale-archive 后关闭内存。由于 munmap() 返回 0,我会在语言环境树上咆哮。
顺便说一句,如果您的配置不包括语言环境,我执行的下一个块是:
祝你好运,让我们知道你发现了什么。