将复制活动的序列号添加到 Blob

Question

kalex

Asked: 2024-09-27 03:23:41 +0800 CST2024-09-27 03:23:41 +0800 CST 2024-09-27 03:23:41 +0800 CST

重新格式化数据以包含 2 分钟的时间间隔

772

示例数据如下：

library(dplyr)
library(lubridate)
library(tidyr)

set.seed(123)
dummy_data <- tibble(
  focalbear_event = rep(c("event1", "event2"), each = 10),
  timestamp_focal.bhv.change = c(
    seq(ymd_hms("2023-09-22 10:00:00"), by = "1 min", length.out = 10),  # Event 1 timestamps
    seq(ymd_hms("2023-09-22 11:00:00"), by = "2 min", length.out = 10)   # Event 2 with missing intervals
  ),
  behavior = sample(c("Fishing", "Alertness", "Resting"), 20, replace = TRUE),
  land_human_num = sample(0:5, 20, replace = TRUE),
  num_boats = sample(0:3, 20, replace = TRUE)
)

dummy_data <- dummy_data %>%
  slice(-c(5, 16)) %>%  # Removing some rows to create gaps
  bind_rows(tibble(focalbear_event = "event1", timestamp_focal.bhv.change = ymd_hms("2023-09-22 10:03:00"), behavior = "Fishing", land_human_num = 2, num_boats = 1))  # Adding a duplicate

我希望每个案例都从时间戳中提取 2 分钟的时间间隔，如下所示：

# Remove duplicate timestamps within the same minute for each group, keeping only the first occurrence
dummy_data <- dummy_data %>%
  group_by(focalbear_event) %>%
  arrange(timestamp_focal.bhv.change) %>%
  distinct(floor_timestamp = floor_date(timestamp_focal.bhv.change, unit = "2 minutes"), .keep_all = TRUE) %>%
  ungroup() %>%
  select(-floor_timestamp)

# Function to expand timestamps for each group
expand_timestamps <- function(df) {
  df %>%
    group_by(focalbear_event) %>%
    complete(timestamp_focal.bhv.change = seq(min(timestamp_focal.bhv.change), max(timestamp_focal.bhv.change), by = "2 mins")) %>%
    fill(everything(), .direction = "down") %>%
    ungroup()
}


dummy_data <- expand_timestamps(dummy_data)

但在某些情况下，时间间隔是 1 分钟，而不是 2 分钟。知道为什么吗？

1 个回答

Voted

L Tyrone · Answer 1 · 2024-09-27T05:32:55+08:00

根据代码中的逻辑，您可以使用：

library(dplyr)
library(lubridate)
library(tidyr)

dummy_data <- structure(list(focalbear_event = c("event1", "event1", "event1", 
"event1", "event1", "event1", "event1", "event1", "event1", "event2", 
"event2", "event2", "event2", "event2", "event2", "event2", "event2", 
"event2", "event1"), timestamp_focal.bhv.change = structure(c(1695376800, 
1695376860, 1695376920, 1695376980, 1695377100, 1695377160, 1695377220, 
1695377280, 1695377340, 1695380400, 1695380520, 1695380640, 1695380760, 
1695380880, 1695381120, 1695381240, 1695381360, 1695381480, 1695376980
), tzone = "UTC", class = c("POSIXct", "POSIXt")), behavior = c("Resting", 
"Resting", "Resting", "Alertness", "Alertness", "Alertness", 
"Alertness", "Resting", "Fishing", "Alertness", "Alertness", 
"Fishing", "Alertness", "Resting", "Resting", "Resting", "Fishing", 
"Fishing", "Fishing"), land_human_num = c(0, 4, 2, 1, 0, 5, 2, 
3, 5, 0, 2, 4, 3, 1, 0, 0, 1, 2, 2), num_boats = c(3, 0, 2, 0, 
3, 1, 0, 1, 0, 0, 3, 3, 2, 0, 0, 0, 2, 0, 1)), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -19L))


dummy_data |>
  group_by(focalbear_event) |>
  arrange(timestamp_focal.bhv.change) |>
  distinct(timestamp_focal.bhv.change, .keep_all = TRUE) |>
  complete(
    timestamp_focal.bhv.change = seq(min(timestamp_focal.bhv.change),
                                     max(timestamp_focal.bhv.change),
                                     by = "1 mins")) |>
  fill(everything(), .direction = "down") |>
  filter(minute(timestamp_focal.bhv.change) %% 2 == 0) |>
  ungroup()

# # A tibble: 15 × 5
#    focalbear_event timestamp_focal.bhv.change behavior  land_human_num num_boats
#    <chr>           <dttm>                     <chr>              <dbl>     <dbl>
#  1 event1          2023-09-22 10:00:00        Resting                0         3
#  2 event1          2023-09-22 10:02:00        Resting                2         2
#  3 event1          2023-09-22 10:04:00        Alertness              1         0
#  4 event1          2023-09-22 10:06:00        Alertness              5         1
#  5 event1          2023-09-22 10:08:00        Resting                3         1
#  6 event2          2023-09-22 11:00:00        Alertness              0         0
#  7 event2          2023-09-22 11:02:00        Alertness              2         3
#  8 event2          2023-09-22 11:04:00        Fishing                4         3
#  9 event2          2023-09-22 11:06:00        Alertness              3         2
# 10 event2          2023-09-22 11:08:00        Resting                1         0
# 11 event2          2023-09-22 11:10:00        Resting                1         0
# 12 event2          2023-09-22 11:12:00        Resting                0         0
# 13 event2          2023-09-22 11:14:00        Resting                0         0
# 14 event2          2023-09-22 11:16:00        Fishing                1         2
# 15 event2          2023-09-22 11:18:00        Fishing                2         0

重新格式化数据以包含 2 分钟的时间间隔

为什么要通过 where 子句中绑定的通用特征来约束单位类型（如 `where () : Trait<…>`）？

`(表达式，左值) = 右值` 在 C 或 C++ 中是有效的赋值吗？为什么有些编译器会接受/拒绝它？

何时应使用 std::inplace_vector 而不是 std::vector？

在 C++ 中，一个不执行任何操作的空程序需要 204KB 的堆，但在 C 中则不需要

如果 T 既不可构造、不可复制、也不可移动，那么我可以拥有 std::optional<T> 吗？

为什么我可以定义一个 constinit 的 std::string 实例？如果对象需要动态初始化，constinit 不是被禁止的吗？

如何分配以后放置的新“如同新”

PowerBI 目前与 BigQuery 不兼容：Simba 驱动程序与 Windows 更新有关

AdMob：MobileAds.initialize() - 对于某些设备，“java.lang.Integer 无法转换为 java.lang.String”

我正在尝试仅使用海龟随机和数学模块来制作吃豆人游戏

重新格式化数据以包含 2 分钟的时间间隔

1 个回答

相关问题