示例数据如下:
library(dplyr)
library(lubridate)
library(tidyr)
set.seed(123)
dummy_data <- tibble(
focalbear_event = rep(c("event1", "event2"), each = 10),
timestamp_focal.bhv.change = c(
seq(ymd_hms("2023-09-22 10:00:00"), by = "1 min", length.out = 10), # Event 1 timestamps
seq(ymd_hms("2023-09-22 11:00:00"), by = "2 min", length.out = 10) # Event 2 with missing intervals
),
behavior = sample(c("Fishing", "Alertness", "Resting"), 20, replace = TRUE),
land_human_num = sample(0:5, 20, replace = TRUE),
num_boats = sample(0:3, 20, replace = TRUE)
)
dummy_data <- dummy_data %>%
slice(-c(5, 16)) %>% # Removing some rows to create gaps
bind_rows(tibble(focalbear_event = "event1", timestamp_focal.bhv.change = ymd_hms("2023-09-22 10:03:00"), behavior = "Fishing", land_human_num = 2, num_boats = 1)) # Adding a duplicate
我希望每个案例都从时间戳中提取 2 分钟的时间间隔,如下所示:
# Remove duplicate timestamps within the same minute for each group, keeping only the first occurrence
dummy_data <- dummy_data %>%
group_by(focalbear_event) %>%
arrange(timestamp_focal.bhv.change) %>%
distinct(floor_timestamp = floor_date(timestamp_focal.bhv.change, unit = "2 minutes"), .keep_all = TRUE) %>%
ungroup() %>%
select(-floor_timestamp)
# Function to expand timestamps for each group
expand_timestamps <- function(df) {
df %>%
group_by(focalbear_event) %>%
complete(timestamp_focal.bhv.change = seq(min(timestamp_focal.bhv.change), max(timestamp_focal.bhv.change), by = "2 mins")) %>%
fill(everything(), .direction = "down") %>%
ungroup()
}
dummy_data <- expand_timestamps(dummy_data)
但在某些情况下,时间间隔是 1 分钟,而不是 2 分钟。知道为什么吗?
根据代码中的逻辑,您可以使用: