'Adjust code that uses data.table function
The code below uses the data.table function to generate an output table. However, I would like to know if it is possible to optimize the code somehow and still get the same result? The idea is to reduce the code in order to decrease the processing time.
library(dplyr)
library(tidyr)
library(lubridate)
library(data.table)
df1 <- structure(
list(date1= c("2021-06-28","2021-06-28","2021-06-28","2021-06-28","2021-06-28",
"2021-06-28","2021-06-28","2021-06-28"),
date2 = c("2021-06-25","2021-06-25","2021-06-27","2021-07-07","2021-07-07","2021-07-09","2021-07-09","2021-07-09"),
Code = c("FDE","ABC","ABC","ABC","CDE","FGE","ABC","CDE"),
Week= c("Wednesday","Wednesday","Friday","Wednesday","Wednesday","Friday","Friday","Friday"),
DR1 = c(4,1,4,3,6,4,3,5),
DR01 = c(4,1,4,3,3,4,3,6), DR02= c(4,2,6,7,3,2,7,4),DR03= c(9,5,4,3,3,2,1,5),
DR04 = c(5,4,3,3,6,2,1,9),DR05 = c(5,4,5,3,6,2,1,9),
DR06 = c(2,4,3,3,5,6,7,8),DR07 = c(2,5,4,4,9,4,7,8),
DR08 = c(4,0,0,1,2,4,4,4),DR09 = c(2,5,4,4,9,4,7,8),DR010 = c(2,5,4,4,9,4,7,8),DR011 = c(4,7,3,2,2,7,7,7),
DR012 = c(4,4,2,3,0,4,4,5),DR013 = c(4,4,1,4,0,3,2,0),DR014 = c(0,3,1,2,0,2,NA,NA)),
class = "data.frame", row.names = c(NA, -8L))
selection = startsWith(names(df1), "DR0")
df1[selection][is.na(df1[selection])] = 0
dt1 <- as.data.table(df1)
cols <- grep("^DR0", colnames(dt1), value = TRUE)
medi_ana <-
dt1[, (paste0(cols, "_PV")) := DR1 - .SD, .SDcols = cols
][, lapply(.SD, median), by = .(Code, Week), .SDcols = paste0(cols, "_PV") ]
f1 <- function(nm, pat) grep(pat, nm, value = TRUE)
nm1 <- f1(names(df1), "^DR0\\d+$")
nm2 <- f1(names(medi_ana), "_PV")
nm3 <- paste0("i.", nm2)
setDT(df1)[medi_ana, (nm2) := Map(`+`, mget(nm1), mget(nm3)), on = .(Code, Week)]
SPV1 <- df1[, c('date1', 'date2', 'Code', 'Week', nm2), with = FALSE]
dmda<-"2021-07-09"
code<-"CDE"
SPV2<-melt(SPV1[date2 == dmda & Code == code][,
lapply(.SD, sum, na.rm = TRUE), by = Code,
.SDcols = patterns("^DR0")],
id.var = "Code", variable.name = "name", value.name = "val")[,
name := readr::parse_number(as.character(name))][]
> SPV2
Code name val
1: CDE 1 5
2: CDE 2 5
3: CDE 3 5
4: CDE 4 5
5: CDE 5 5
6: CDE 6 5
7: CDE 7 5
8: CDE 8 5
9: CDE 9 5
10: CDE 10 5
11: CDE 11 5
12: CDE 12 5
13: CDE 13 5
14: CDE 14 5
result <- SPV2 %>%
group_by(Code) %>%
slice((as.Date(dmda) - min(as.Date(df1$date1) [
df1$Code == first(Code)])):max(name)+1) %>%
ungroup
> result
# A tibble: 3 x 3
Code name val
<chr> <dbl> <dbl>
1 CDE 12 5
2 CDE 13 5
3 CDE 14 5
Solution 1:[1]
The dplyr code can be converted to data.table as
SPV2[na.omit(SPV2[, .I[(as.Date(dmda) - min(as.Date(df1$date1) [
df1$Code == first(Code)])):max(name)+1], .(Code)]$V1)]
-output
Code name val
<char> <num> <num>
1: CDE 12 5
2: CDE 13 5
3: CDE 14 5
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 | akrun |
