'Insert rows for missing time measurements of a negative event

I have data from an experiment where Subjects rated an Event called f:

df <- structure(list(Subject = c("A", "A", "A", "B", "B", "B"),
                     Timestamp = c("00:00:00.146 - 00:00:00.889", 
                                   "00:00:01.568 - 00:00:02.183", "00:00:03.642 - 00:00:04.522", 
                                   "00:00:00.000 - 00:00:00.660", "00:00:01.247 - 00:00:02.229", 
                                   "00:00:03.697 - 00:00:04.926"),
                     Starttime_ms = c(146, 1568, 3642, 0, 1247, 3697), 
                     Endtime_ms = c(889, 2183, 4522, 660, 2229, 4926), 
                     Duration = c(743, 615, 880, 660, 982, 1229), 
                     Event = c("f", "f", "f", "f", "f", "f")), row.names = c(NA, 6L), class = "data.frame")

What is missing are the respective measurements for those in-between timespans where the Subjects did not rate the event. That's an event too, just a negative one; let's call it nf. How can I insert rows for those missing timespans to obtain the complete record including both positive and negative events f and nf shown below?

Expected:

  Subject                   Timestamp Starttime_ms Endtime_ms Duration Event
1       A 00:00:00.000 - 00:00:00.146          0          146      146    nf
2       A 00:00:00.146 - 00:00:00.889          146        889      743     f
3       A 00:00:00.889 - 00:00:01.568          889       1568      679    nf
4       A 00:00:01.568 - 00:00:02.183         1568       2183      615     f
5       A 00:00:02.183 - 00:00:03.642         2183       3642     1459    nf
6       A 00:00:03.642 - 00:00:04.522         3642       4522      880     f
7       B 00:00:00.000 - 00:00:00.660            0        660      660     f
8       B 00:00:00.660 - 00:00:01.247          660       1247      587    nf
9       B 00:00:01.247 - 00:00:02.229         1247       2229      982     f
10      B 00:00:02.229 - 00:00:03.697         2229       3697     1468    nf
11      B 00:00:03.697 - 00:00:04.926         3697       4926     1229     f

I've experimented with data.tablebut since I'm not fluent with this, this attempt only brought partial success:

unq <- c(0, sort(unique(setDT(df)[, c(Starttime_ms, Endtime_ms)])))
df[.(unq[-length(unq)], unq[-1]), on=c("Starttime_ms", "Endtime_ms")]

While I'm open to any solution my preference is for a dplyr one.

r dplyr data.table

Solution 1:^[1]

This is a tidyverse solution.

library(tidyverse)
library(lubridate)

df %>%
  separate(Timestamp, c("start", "end"), sep = " - ", remove = FALSE) %>%
  group_by(Subject) %>%
  mutate(Starttime_ms = lag(end, default = "00:00:00.000"),
         Endtime_ms = start,
         Event = "nf",
         Timestamp = paste(Starttime_ms, Endtime_ms, sep = " - "),
         across(ends_with("_ms"), ~ hms(.x) * 1000),
         Duration = Endtime_ms - Starttime_ms,
         across(where(is.period), as.numeric), .keep = "unused") %>%
  bind_rows(df) %>%
  arrange(Starttime_ms, .by_group = TRUE) %>%
  filter(Duration > 0) %>%
  ungroup()

Output

# A tibble: 11 × 6
   Subject Timestamp                   Starttime_ms Endtime_ms Duration Event
   <chr>   <chr>                              <dbl>      <dbl>    <dbl> <chr>
 1 A       00:00:00.000 - 00:00:00.146            0        146      146 nf   
 2 A       00:00:00.146 - 00:00:00.889          146        889      743 f    
 3 A       00:00:00.889 - 00:00:01.568          889       1568      679 nf   
 4 A       00:00:01.568 - 00:00:02.183         1568       2183      615 f    
 5 A       00:00:02.183 - 00:00:03.642         2183       3642     1459 nf   
 6 A       00:00:03.642 - 00:00:04.522         3642       4522      880 f    
 7 B       00:00:00.000 - 00:00:00.660            0        660      660 f    
 8 B       00:00:00.660 - 00:00:01.247          660       1247      587 nf   
 9 B       00:00:01.247 - 00:00:02.229         1247       2229      982 f    
10 B       00:00:02.229 - 00:00:03.697         2229       3697     1468 nf   
11 B       00:00:03.697 - 00:00:04.926         3697       4926     1229 f

Solution 2:^[2]

This one is the exactly desired output:

library(dplyr)
library(tidyr)
library(lubridate)

df %>% 
  separate_rows(Timestamp, sep = " - ") %>% 
  mutate(Timestamp1 = lag(Timestamp, default = "00:00:00.000"), .before=Timestamp) %>% 
  mutate(Starttime_ms = as.numeric(hms(Timestamp1))*1000,
         Endtime_ms = as.numeric(hms(Timestamp))*1000,
         Duration = Endtime_ms-Starttime_ms) %>% 
  mutate(Timestamp = paste(Timestamp1, Timestamp, sep = " - ")) %>% 
  left_join(df, by="Timestamp") %>% 
  mutate(Event.y = replace_na(Event.y, "nf")) %>% 
  select(1, 3:6, Event.y) %>% 
  filter(Endtime_ms.x !=0) %>% 
  rename_with(., ~str_replace(., '\\.\\w', ''))

  Subject Timestamp                   Starttime_ms Endtime_ms Duration Event
   <chr>   <chr>                              <dbl>      <dbl>    <dbl> <chr>
 1 A       00:00:00.000 - 00:00:00.146            0        146      146 nf   
 2 A       00:00:00.146 - 00:00:00.889          146        889      743 f    
 3 A       00:00:00.889 - 00:00:01.568          889       1568      679 nf   
 4 A       00:00:01.568 - 00:00:02.183         1568       2183      615 f    
 5 A       00:00:02.183 - 00:00:03.642         2183       3642     1459 nf   
 6 A       00:00:03.642 - 00:00:04.522         3642       4522      880 f    
 7 B       00:00:00.000 - 00:00:00.660            0        660      660 f    
 8 B       00:00:00.660 - 00:00:01.247          660       1247      587 nf   
 9 B       00:00:01.247 - 00:00:02.229         1247       2229      982 f    
10 B       00:00:02.229 - 00:00:03.697         2229       3697     1468 nf   
11 B       00:00:03.697 - 00:00:04.926         3697       4926     1229 f

Solution 3:^[3]

You were nearly there. The issue is to complete the time sequence separately for each group. This is what would do:

library(data.table)
# create sequence of complete periods for each Subject
unq <- setDT(df)[, {
  tmp <- sort(unique(c(0, Starttime_ms, Endtime_ms)))
  list(Starttime_ms = head(tmp, -1L),
       Endtime_ms = tail(tmp, -1L))
}, by = Subject]
# join and complete missing values
fmt <- function(ms) format(as.POSIXct(ms / 1000, tz = "UTC", origin = "2000-01-01"), "%H:%M:%OS3")
df[unq, on = .(Subject, Starttime_ms, Endtime_ms)][
  is.na(Event), c("Timestamp", "Duration", "Event") := .(
    paste(fmt(Starttime_ms), fmt(Endtime_ms), sep = " - "),
    Endtime_ms - Starttime_ms, 
    "nf")][]

    Subject                   Timestamp Starttime_ms Endtime_ms Duration Event
 1:       A 00:00:00.000 - 00:00:00.146            0        146      146    nf
 2:       A 00:00:00.146 - 00:00:00.889          146        889      743     f
 3:       A 00:00:00.889 - 00:00:01.567          889       1568      679    nf
 4:       A 00:00:01.568 - 00:00:02.183         1568       2183      615     f
 5:       A 00:00:02.182 - 00:00:03.641         2183       3642     1459    nf
 6:       A 00:00:03.642 - 00:00:04.522         3642       4522      880     f
 7:       B 00:00:00.000 - 00:00:00.660            0        660      660     f
 8:       B 00:00:00.659 - 00:00:01.246          660       1247      587    nf
 9:       B 00:00:01.247 - 00:00:02.229         1247       2229      982     f
10:       B 00:00:02.228 - 00:00:03.697         2229       3697     1468    nf
11:       B 00:00:03.697 - 00:00:04.926         3697       4926     1229     f

unq contains all periods for each subject to join on:

unq

    Subject Starttime_ms Endtime_ms
 1:       A            0        146
 2:       A          146        889
 3:       A          889       1568
 4:       A         1568       2183
 5:       A         2183       3642
 6:       A         3642       4522
 7:       B            0        660
 8:       B          660       1247
 9:       B         1247       2229
10:       B         2229       3697
11:       B         3697       4926

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution	Source
Solution 1
Solution 2
Solution 3

'Insert rows for missing time measurements of a negative event

Solution 1:[1]

Output

Solution 2:[2]

Solution 3:[3]

Sources

Related Questions

Solution 1:^[1]

Solution 2:^[2]

Solution 3:^[3]