将复制活动的序列号添加到 Blob

Question

Chris Ruehlemann

Asked: 2024-05-04 01:45:47 +0800 CST2024-05-04 01:45:47 +0800 CST 2024-05-04 01:45:47 +0800 CST

插入新行指示行之间的时间间隙

772

我正在处理演讲稿：

  Utterance                       Starttime_ms Endtime_ms
  <chr>                                  <dbl>      <dbl>
1 on this                                  210        780
2 okay                                    3403       3728
3 cool thanks everyone um                 4221       5880
4 so yes in terms of our projects         5910      11960
5 let's have a look so the               11980      13740
6 LGBTQ plus                             13813      16110

并希望在每行之后插入Utterance一个新行，指示与前一行的时间差距Utterance。所需的输出看起来有点像这样：

  Utterance                       Starttime_ms Endtime_ms
  <chr>                                  <dbl>      <dbl>
1 on this                                  210        780
  NA                                       780       3403
2 okay                                    3403       3728
  NA                                      3728       4221
3 cool thanks everyone um                 4221       5880
  NA                                      5880       5910
4 so yes in terms of our projects         5910      11960
  NA                                     11960      11980
5 let's have a look so the               11980      13740
  NA                                     13740      13813
6 LGBTQ plus                             13813      16110

我知道如何做到这一点data.table：

library(data.table)
unq <- c(0, sort(unique(setDT(df)[, c(Starttime_ms, Endtime_ms)])))
df <- df[.(unq[-length(unq)], unq[-1]), on=c("Starttime_ms", "Endtime_ms")]

但我正在寻找dplyr解决方案。

数据：

df <-   structure(list(Utterance = c("on this", "okay", "cool thanks everyone um", 
                                     "so yes in terms of our projects", 
                                     "let's have a look so the", "LGBTQ plus"), Starttime_ms = c(210, 
                                                                                                 3403, 4221, 5910, 11980, 13813), Endtime_ms = c(780, 3728, 5880, 
                                                                                                                                                 11960, 13740, 16110)), row.names = c(NA, -6L), class = c("tbl_df", 
                                                                                                                                                                                                          "tbl", "data.frame"))

5 个回答

Voted

LMc · Answer 1 · 2024-05-04T02:28:17+08:00

library(dplyr)

df |>
  mutate(Utterance = NA, 
         local(data.frame(Starttime_ms = lag(Endtime_ms), Endtime_ms = Starttime_ms))) |>
  filter(!is.na(Starttime_ms)) |>
  bind_rows(df) |>
  arrange(Starttime_ms)

我local()在这里使用创建一个本地执行环境，如果您这样做，Starttime_ms则Endtime_ms相互覆盖：

 mutate(Utterance = NA, 
         Starttime_ms = lag(Endtime_ms), 
         Endtime_ms = Starttime_ms)

我不是输出单个值，而是返回一个数据框，该数据框利用了省略号mutate()可以...采用数据框或小标题在输出中创建多个列的事实。

输出

   Utterance                       Starttime_ms Endtime_ms
   <chr>                                  <dbl>      <dbl>
 1 on this                                  210        780
 2 NA                                       780       3403
 3 okay                                    3403       3728
 4 NA                                      3728       4221
 5 cool thanks everyone um                 4221       5880
 6 NA                                      5880       5910
 7 so yes in terms of our projects         5910      11960
 8 NA                                     11960      11980
 9 let's have a look so the               11980      13740
10 NA                                     13740      13813
11 LGBTQ plus                             13813      16110

Adriano Mello · Answer 2 · 2024-05-04T02:33:36+08:00

不是特别优雅，但dplyr有一个：

my_df %>% 
  pivot_longer(ends_with("ms"), values_to = "Starttime_ms") %>% 
  mutate(
    Endtime_ms = dplyr::lead(Starttime_ms, default = NA),
    Utterance = if_else(row_number() %% 2 == 0, NA_character_, Utterance)) %>% 
  slice_head(n = -1)

输出：

# A tibble: 11 × 4
   Utterance                       name         Starttime_ms Endtime_ms
   <chr>                           <chr>               <dbl>      <dbl>
 1 on this                         Starttime_ms          210        780
 2 NA                              Endtime_ms            780       3403
 3 okay                            Starttime_ms         3403       3728
 4 NA                              Endtime_ms           3728       4221
 5 cool thanks everyone um         Starttime_ms         4221       5880
 6 NA                              Endtime_ms           5880       5910
 7 so yes in terms of our projects Starttime_ms         5910      11960
 8 NA                              Endtime_ms          11960      11980
 9 let's have a look so the        Starttime_ms        11980      13740
10 NA                              Endtime_ms          13740      13813
11 LGBTQ plus                      Starttime_ms        13813      16110

Andre Wildberg · Answer 3 · 2024-05-04T02:44:00+08:00

一种方法使用uncount

library(dplyr)
library(tidyr)

df %>% 
  mutate(count = 2) %>% 
  uncount(count) %>% 
  mutate(Starttime_ms = if_else(n() == row_number(), lag(Endtime_ms), Starttime_ms), 
         Utterance = if_else(n() == row_number(), NA, Utterance), .by = Utterance) %>% 
  mutate(Endtime_ms = lead(Starttime_ms)) %>% 
  filter(!is.na(Endtime_ms))
# A tibble: 11 × 3
   Utterance                       Starttime_ms Endtime_ms
   <chr>                                  <dbl>      <dbl>
 1 on this                                  210        780
 2 NA                                       780       3403
 3 okay                                    3403       3728
 4 NA                                      3728       4221
 5 cool thanks everyone um                 4221       5880
 6 NA                                      5880       5910
 7 so yes in terms of our projects         5910      11960
 8 NA                                     11960      11980
 9 let's have a look so the               11980      13740
10 NA                                     13740      13813
11 LGBTQ plus                             13813      16110

ThomasIsCoding · Answer 4 · 2024-05-04T04:27:33+08:00

Best Answer

ThomasIsCoding

2024-05-04T04:27:33+08:002024-05-04T04:27:33+08:00

你可以尝试下面的代码

df %>%
   pivot_longer(-Utterance, values_to = "Starttime_ms") %>%
   mutate(Endtime_ms = lead(Starttime_ms)) %>%
   drop_na() %>%
   select(-name) %>%
   mutate(Utterance = replace(Utterance, !row_number() %% 2, NA_character_))

这使

# A tibble: 11 × 3
   Utterance                       Starttime_ms Endtime_ms
   <chr>                                  <dbl>      <dbl>
 1 on this                                  210        780
 2 NA                                       780       3403
 3 okay                                    3403       3728
 4 NA                                      3728       4221
 5 cool thanks everyone um                 4221       5880
 6 NA                                      5880       5910
 7 so yes in terms of our projects         5910      11960
 8 NA                                     11960      11980
 9 let's have a look so the               11980      13740
10 NA                                     13740      13813
11 LGBTQ plus                             13813      16110

3

TarJae · Answer 5 · 2024-05-04T06:58:27+08:00

这是一个简单的dplyr解决方案。请注意，我使用的大部分元素之前同事也提到过！

library(dplyr)

df %>% 
    slice(rep(1:n(), each=2)) %>% 
    mutate(Starttime_ms = ifelse(!row_number() %% 2, Endtime_ms, Starttime_ms),
           Endtime_ms = lead(Starttime_ms), 
    ) %>%
    slice(-n()) %>% 
    mutate(Utterance = ifelse(row_number() %% 2, NA_character_, Utterance))

一些基准：

library(dplyr)
library(tidyr)
library(microbenchmark)

thomas <- function() {
  df |>
    pivot_longer(-Utterance, values_to = "Starttime_ms") %>%
    mutate(Endtime_ms = lead(Starttime_ms)) %>%
    drop_na() %>%
    select(-name) %>%
    mutate(Utterance = replace(Utterance, !row_number() %% 2, NA_character_))
}

adriano <- function(){
  df %>% 
    pivot_longer(ends_with("ms"), values_to = "Starttime_ms") %>% 
    mutate(
      Endtime_ms = dplyr::lead(Starttime_ms, default = NA),
      Utterance = if_else(row_number() %% 2 == 0, NA_character_, Utterance)) %>% 
    slice_head(n = -1)
}

lmc <- function(){
  df |>
    mutate(Utterance = NA, 
           local(data.frame(Starttime_ms = lag(Endtime_ms), Endtime_ms = Starttime_ms))) |>
    filter(!is.na(Starttime_ms)) |>
    bind_rows(df) |>
    arrange(Starttime_ms)
}

andre <- function(){
  df %>% 
    mutate(count = 2) %>% 
    uncount(count) %>% 
    mutate(Starttime_ms = if_else(n() == row_number(), lag(Endtime_ms), Starttime_ms), 
           Utterance = if_else(n() == row_number(), NA, Utterance), .by = Utterance) %>% 
    mutate(Endtime_ms = lead(Starttime_ms)) %>% 
    filter(!is.na(Endtime_ms))
}

tarjae <- function() {
  df %>% 
    slice(rep(1:n(), each=2)) %>% 
    mutate(Starttime_ms = ifelse(!row_number() %% 2, Endtime_ms, Starttime_ms),
           Endtime_ms = lead(Starttime_ms), 
    ) %>%
    slice(-n()) %>% 
    mutate(Utterance = ifelse(row_number() %% 2, NA_character_, Utterance))
}

# benchmark

mbm = microbenchmark(
  thomas = thomas_operation(),
  adriano = adriano(),
  lmc = lmc(),
  andre = andre(),
  tarjae = tarjae(),
  times = 100
)
autoplot(mbm)
print(mbm)

插入新行指示行之间的时间间隙

Vue 3：创建时出错“预期标识符但发现‘导入’”[重复]

为什么这个简单而小的 Java 代码在所有 Graal JVM 上的运行速度都快 30 倍，但在任何 Oracle JVM 上却不行？

具有指定基础类型但没有枚举器的“枚举类”的用途是什么？

如何修复未手动导入的模块的 MODULE_NOT_FOUND 错误？

`(表达式，左值) = 右值` 在 C 或 C++ 中是有效的赋值吗？为什么有些编译器会接受/拒绝它？

何时应使用 std::inplace_vector 而不是 std::vector？

在 C++ 中，一个不执行任何操作的空程序需要 204KB 的堆，但在 C 中则不需要

PowerBI 目前与 BigQuery 不兼容：Simba 驱动程序与 Windows 更新有关

AdMob：MobileAds.initialize() - 对于某些设备，“java.lang.Integer 无法转换为 java.lang.String”

我正在尝试仅使用海龟随机和数学模块来制作吃豆人游戏

插入新行指示行之间的时间间隙

5 个回答

相关问题