'Summary DataFrame: Aggregating data at specific timestamps for different categories and the day overall

I have a large dataframe that's updated every day containing the following:

structure(list(date = structure(c(19038, 19038, 19038, 19038, 
19038, 19038, 19038, 19038, 19038, 19038, 19038, 19038, 19038, 
19038, 19038), class = "Date"), type = c("USD", "USD", "USD", 
"USD", "USD", "USD", "USD", "USD", "USD", "USD", "USD", "USD", 
"USD", "USD", "USD"), quantity = c(0.035, 0.011, 0.02, 0.02, 
0.019, 0.036, 0.001, 0.003, 0.004, 0.065, 0.002, 0.001, 0.002, 
0.002, 0.013), cumvol = c(0.035, 0.046, 0.066, 0.086, 0.105, 
0.141, 0.142, 0.145, 0.149, 0.214, 0.216, 0.217, 0.219, 0.221, 
0.234), time = structure(c(24422, 24430, 24453, 24463, 24468, 
24476, 24485, 24583, 24790, 24862, 24950, 24953, 25047, 25085, 
25085), units = "secs", class = c("hms", "difftime"))), row.names = c(NA, 
-15L), class = c("data.table", "data.frame"))

This is the code used to attempt to achieve the result (helpfully produced by Dan Adams)

 times_of_interest <- c(7, 9, 11)

d %>%
  mutate(across(everything(), as.character)) %>% 
  mutate(across(quantity:cumvol, as.numeric)) %>% 
  mutate(type = factor(type)) %>% 
  mutate(date = ymd(date)) %>% 
  mutate(time = as_hms(time)) %>% 
  mutate(date = date + sample(0:1, nrow(.), T)) %>% 
  mutate(time_hr = hour(time)) %>% 
  filter(time_hr %in% times_of_interest) %>%
  group_by(date, type, time_hr) %>% 
  summarize(cat_total = sum(quantity), .groups = "drop") %>% 
  group_by(date) %>% 
  mutate(date_total = sum(cat_total)) %>% 
  ungroup()

The output delivers this:

structure(list(date = structure(c(19034, 19034, 19034, 19034, 
19035, 19035, 19035, 19035, 19035, 19035, 19035, 19035, 19037, 
19037, 19037, 19037, 19037, 19037, 19037, 19037, 19037, 19038, 
19038, 19038, 19038, 19038, 19038, 19038, 19038, 19038, 19039, 
19039, 19039), class = "Date"), type = structure(c(3L, 5L, 5L, 
5L, 2L, 2L, 3L, 3L, 3L, 5L, 5L, 5L, 2L, 2L, 3L, 3L, 3L, 1L, 5L, 
5L, 5L, 2L, 2L, 2L, 3L, 3L, 3L, 5L, 5L, 5L, 2L, 3L, 5L), .Label = c("DNB", 
"UKY", "UKS", "T/N cl DBV", "USD"), class = "factor"), time_hr = c(11L, 
7L, 9L, 11L, 9L, 11L, 7L, 9L, 11L, 7L, 9L, 11L, 9L, 11L, 7L, 
9L, 11L, 11L, 7L, 9L, 11L, 7L, 9L, 11L, 7L, 9L, 11L, 7L, 9L, 
11L, 7L, 7L, 7L), cat_total = c(0.739, 8.714, 3.281, 3.263, 0.815, 
1.025, 2.071, 0.661, 0.328, 5.633, 0.776, 2.126, 0.465, 0.15, 
2.046, 1.203, 0.137, 0.058, 2.341, 4.215, 1.705, 0.01, 0.335, 
0.15, 4.323, 1.157, 0.031, 8.607, 3.624, 2.603, 0.865, 1.599, 
5.721), date_total = c(9893.441, 9893.441, 9893.441, 9893.441, 
9893.441, 9893.441, 9893.441, 9893.441, 9893.441, 9893.441, 9893.441, 
9893.441, 9893.441, 9893.441, 9893.441, 9893.441, 9893.441, 9893.441, 
9893.441, 9893.441, 9893.441, 9893.441, 9893.441, 9893.441, 9893.441, 
9893.441, 9893.441, 9893.441, 9893.441, 9893.441, 9893.441, 9893.441, 
9893.441)), row.names = c(NA, -33L), class = c("tbl_df", "tbl", 
"data.frame"))

What I'd like to do is change the summary table so that it takes the total quantities in the types (and overall) at some specific timestamps; say 12:00, 15:00, and 16:00, for each day.

Unfortunately, for whatever reason the output for this dataframe is always at two times (the first two in the times_of_interest vector)./

str output is:

Classes ‘data.table’ and 'data.frame':  15 obs. of  5 variables:
 $ date    : Date, format: "2022-02-15" "2022-02-15" "2022-02-15" "2022-02-15" ...
 $ type    : chr  "USD" "USD" "USD" "USD" ...
 $ quantity: num  0.035 0.011 0.02 0.02 0.019 0.036 0.001 0.003 0.004 0.065 ...
 $ cumvol  : num  0.035 0.046 0.066 0.086 0.105 0.141 0.142 0.145 0.149 0.214 ...
 $ time    : 'hms' num  06:47:02 06:47:10 06:47:33 06:47:43 ...
  ..- attr(*, "units")= chr "secs"
 - attr(*, ".internal.selfref")=<externalptr> 


Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source