Estou trabalhando em uma tarefa de programação de sistema operacional para entender pipes. texto A tarefa envolve o uso de expressões regulares para extrair informações da saída do strace e tempos de execução de chamadas de sistema estatísticos.
No entanto, estou enfrentando problemas com o cálculo incorreto dos tempos. Suspeito que a função parse_strace_line não esteja analisando corretamente as linhas de saída do strace, especialmente a parte do carimbo de data/hora (por exemplo, <0,000123>), resultando em valores de tempo incorretos ou ausentes nas estatísticas. A função pode não estar lidando corretamente com o formato da linha, com casos extremos como linhas incompletas, ou com o padrão regex, que pode não capturar com precisão o componente de tempo da saída do strace.
Meu ambiente é o Ubuntu rodando no WSL2.
Aqui está o código:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/wait.h>
#include <regex.h>
#define MAX_SYSCALLS 1024 // Maximum distinct syscalls we can track
#define TOP_N 5 // Number of top results to display
/* Syscall tracking structure
* name: syscall name (e.g., "open", "read")
* time: accumulated time spent in this syscall (seconds)
*/
typedef struct {
char name[64];
double time;
} syscall_stat;
/* Main statistics container
* stats: array of individual syscall metrics
* count: number of unique syscalls tracked
* total_time: sum of all syscall times for percentage calculation
*/
typedef struct {
syscall_stat stats[MAX_SYSCALLS];
int count;
double total_time;
} syscall_stats;
/* Parse a line of strace output with timing information (-T option)
* line: input string from strace output
* syscall_name: output parameter for extracted syscall name
* time: output parameter for extracted duration
* Returns: 1 if successful, 0 if line doesn't contain timing info
*
* Strace line format examples:
* openat(AT_FDCWD, "/lib/libc.so.6", O_RDONLY|O_CLOEXEC) = 3 <0.000014>
* read(0, "hello\n", 1024) = 6 <0.000010>
*/
int parse_strace_line(char* line, char* syscall_name, double* time) {
// Filter lines without timing information
if (strstr(line, " = ") == NULL || strstr(line, "<") == NULL) {
return 0;
}
// Regular expression pattern breakdown:
// ^(\w+) Capture group 1: syscall name at line start
// \(.*?\) Non-greedy match of arguments between parentheses
// \s+=\s+.*? Match return value portion
// <(\d+\.?\d*)> Capture group 2: time value between <>
regex_t regex;
regcomp(®ex, "^(\\w+)\\(.*?\\)\\s+=\\s+.*?<(\\d+\\.?\\d*)>$", REG_EXTENDED);
regmatch_t matches[3]; // Array for regex capture groups
if (regexec(®ex, line, 3, matches, 0) != 0) {
regfree(®ex);
return 0;
}
// Extract syscall name from first capture group
int name_len = matches[1].rm_eo - matches[1].rm_so;
strncpy(syscall_name, line + matches[1].rm_so, name_len);
syscall_name[name_len] = '\0';
// Convert time string to double
char* time_str = line + matches[2].rm_so;
*time = atof(time_str);
regfree(®ex);
return 1;
}
/* Update statistics with new syscall data
* stats: pointer to statistics container
* name: syscall name to add/update
* time: duration to accumulate
*
* If the syscall already exists, we sum the time.
* If new, we add it to the list (until MAX_SYSCALLS).
*/
void add_syscall(syscall_stats *stats, const char *name, double time) {
// Safety check against array overflow
if (stats->count >= MAX_SYSCALLS) {
fprintf(stderr, "Warning: Reached maximum tracked syscalls (%d)\n", MAX_SYSCALLS);
return;
}
// Check existing entries for duplicate syscall
for (int i = 0; i < stats->count; i++) {
if (strcmp(stats->stats[i].name, name) == 0) {
stats->stats[i].time += time;
stats->total_time += time;
return;
}
}
// Add new entry if space available
if (stats->count < MAX_SYSCALLS) {
strncpy(stats->stats[stats->count].name, name, sizeof(stats->stats[0].name));
stats->stats[stats->count].time = time;
stats->total_time += time;
stats->count++;
}
}
/* Display top N syscalls by accumulated time
* stats: statistics container with all data
* n: number of top results to show
*
* Uses bubble sort for simplicity with small datasets.
* Prints percentages relative to total tracked time.
* Flushes output with nulls for clean pipe termination.
*/
void print_top_syscalls(syscall_stats *stats, int n) {
if (stats->count == 0 || stats->total_time == 0) {
fprintf(stderr, "Error: No valid syscall data to display\n");
return;
}
// Simple O(n²) sort acceptable for MAX_SYSCALLS=1024
for (int i = 0; i < stats->count - 1; i++) {
for (int j = i+1; j < stats->count; j++) {
// Swap if later entry has higher time
if (stats->stats[j].time > stats->stats[i].time) {
syscall_stat temp = stats->stats[i];
stats->stats[i] = stats->stats[j];
stats->stats[j] = temp;
}
}
}
// Determine number of results to display
n = stats->count < n ? stats->count : n;
for (int i = 0; i < n; i++) {
double ratio = (stats->stats[i].time / stats->total_time) * 100;
printf("%s (%d%%)\n", stats->stats[i].name, (int)ratio);
}
/* Null-terminate output for clean pipe communication
* Some programs may read fixed buffer sizes - this ensures
* we flush the pipe completely and avoid hanging readers */
for (int i = 0; i < 80; i++) putchar('\0');
fflush(stdout);
}
/* Main execution flow:
* 1. Validate command line arguments
* 2. Set up parent/child process communication
* 3. Child: execute strace with command
* 4. Parent: process and analyze strace output
*/
int main(int argc, char *argv[]) {
if (argc < 2) {
fprintf(stderr, "Usage: %s COMMAND [ARGS...]\n", argv[0]);
fprintf(stderr, "Example: %s ls -l\n", argv[0]);
return 1;
}
// Create pipe for inter-process communication
int pipefd[2];
if (pipe(pipefd) == -1) {
perror("Failed to create pipe");
return 1;
}
pid_t pid = fork();
if (pid == -1) {
perror("Process fork failed");
return 1;
}
if (pid == 0) { // Child process - execute strace
// Redirect stdout to pipe write end
close(pipefd[0]);
dup2(pipefd[1], STDOUT_FILENO);
close(pipefd[1]);
// Build strace argument array:
// strace -T [user command] [args...]
char* strace_argv[argc + 2];
strace_argv[0] = "/usr/bin/strace";
strace_argv[1] = "-T"; // Enable timing reports
for (int i = 1; i < argc; i++) {
strace_argv[i+1] = argv[i];
}
strace_argv[argc+1] = NULL; // NULL-terminate array
// Execute strace with clean environment
execve("/usr/bin/strace", strace_argv, (char *[]){ "PATH=/bin:/usr/bin", NULL });
perror("Failed to execute strace");
return 1;
} else { // Parent process - analyze output
close(pipefd[1]); // Close unused write end
syscall_stats stats = {0}; // Initialize all fields to zero
char buffer[1024];
char* line_start = buffer;
ssize_t bytes_read;
// Read output in chunks until EOF
while ((bytes_read = read(pipefd[0],
line_start,
sizeof(buffer) - (line_start - buffer))) > 0) {
// Null-terminate the received data
buffer[bytes_read + (line_start - buffer)] = '\0';
line_start = buffer;
// Process complete lines (delimited by newlines)
char* line_end;
while ((line_end = strchr(line_start, '\n')) != NULL) {
*line_end = '\0'; // Temporarily terminate at newline
char syscall_name[64];
double time;
if (parse_strace_line(line_start, syscall_name, &time)) {
add_syscall(&stats, syscall_name, time);
}
line_start = line_end + 1; // Move to next line
}
// Handle partial line remaining in buffer
if (line_start != buffer) {
size_t remaining = buffer + sizeof(buffer) - line_start;
// Move partial line to buffer start for next read
memmove(buffer, line_start, remaining);
line_start = buffer + remaining;
}
}
// Cleanup and display results
close(pipefd[0]);
wait(NULL); // Wait for strace to terminate completely
print_top_syscalls(&stats, TOP_N);
}
return 0;
}
Esta é a linha de comando para teste:
root@DimLights:/mnt/f/WSL/Data/OperatingSystems/2025/os2025/sperf# gcc -g -o sperf sperf.c -lm -lrt
root@DimLights:/mnt/f/WSL/Data/OperatingSystems/2025/os2025/sperf# ./sperf echo "Hello WSL"
execve("/bin/echo", ["echo", "Hello WSL"], 0x7ffe6258ee20 /* 1 var */) = 0 <0.000363>
brk(NULL) = 0x55d4dee1c000 <0.000104>
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f483b684000 <0.000137>
access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory) <0.000115>
openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3 <0.000133>
fstat(3, {st_mode=S_IFREG|0644, st_size=36191, ...}) = 0 <0.000115>
mmap(NULL, 36191, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f483b67b000 <0.000097>
close(3) = 0 <0.000079>
openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3 <0.000108>
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\220\243\2\0\0\0\0\0"..., 832) = 832 <0.000094>
pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 784, 64) = 784 <0.000101>
fstat(3, {st_mode=S_IFREG|0755, st_size=2125328, ...}) = 0 <0.000096>
pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 784, 64) = 784 <0.000088>
mmap(NULL, 2170256, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f483b469000 <0.000107>
mmap(0x7f483b491000, 1605632, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x28000) = 0x7f483b491000 <0.000117>
mmap(0x7f483b619000, 323584, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1b0000) = 0x7f483b619000 <0.000099>
mmap(0x7f483b668000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1fe000) = 0x7f483b668000 <0.000102>
mmap(0x7f483b66e000, 52624, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f483b66e000 <0.000103>
close(3) = 0 <0.000104>
mmap(NULL, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f483b466000 <0.000085>
arch_prctl(ARCH_SET_FS, 0x7f483b466740) = 0 <0.000049>
set_tid_address(0x7f483b466a10) = 1543 <0.000319>
set_robust_list(0x7f483b466a20, 24) = 0 <0.000098>
rseq(0x7f483b467060, 0x20, 0, 0x53053053) = 0 <0.000123>
mprotect(0x7f483b668000, 16384, PROT_READ) = 0 <0.000096>
mprotect(0x55d4de99a000, 4096, PROT_READ) = 0 <0.000093>
mprotect(0x7f483b6bc000, 8192, PROT_READ) = 0 <0.000103>
prlimit64(0, RLIMIT_STACK, NULL, {rlim_cur=8192*1024, rlim_max=RLIM64_INFINITY}) = 0 <0.000095>
munmap(0x7f483b67b000, 36191) = 0 <0.000118>
getrandom("\xc0\x8d\xa3\xbc\x88\x59\xbb\xd3", 8, GRND_NONBLOCK) = 8 <0.000098>
brk(NULL) = 0x55d4dee1c000 <0.000113>
brk(0x55d4dee3d000) = 0x55d4dee3d000 <0.000097>
fstat(1, {st_mode=S_IFIFO|0600, st_size=0, ...}) = 0 <0.000092>
write(1, "Hello WSL\n", 10Received from pipe: Hello WSL
) = 10 <0.000216>
close(1) = 0 <0.000165>
close(2) = 0 <0.000092>
exit_group(0) = ?
+++ exited with 0 +++
Total count is zero
Aqui está o que eu tentei e o que eu esperava: usei a depuração do GDB e notei que o loop while ((line = strchr(line_start, '\n')) != NULL) foi executado apenas uma vez ao testar o comando. Não consigo entender por que a saída mostra +++ exited with 0 +++ e Total count is zero. Também pedi ajuda à IA e modifiquei partes da função parse_strace_line e o padrão de expressão regular, mas com pouco sucesso.
O que realmente aconteceu: As estatísticas permanecem vazias (a contagem total é zero), indicando que a análise ou extração de dados falhou.
O que eu esperava: O código deveria analisar corretamente os registros de data e hora das chamadas do sistema a partir da saída do strace, acumular seus tempos e calcular a proporção do tempo de cada chamada em relação ao total.
Como estudante, eu agradeceria muito se você pudesse apontar os erros no meu código e explicar como ler corretamente os tempos de chamada do sistema e convertê-los na proporção.