'R : How to iterate sum across multiple columns?
I am trying to iterate/loop a sum across multiple non-consecutive columns. My objective is to compute the subscale score of multiple questionnaires measured repeatedly across time.
Dataset for one questionnaire of x items and n time-points:
df <- tibble(
ID = 1:5,
itemA_1 = sample(100, 5, TRUE),
itemB_1 = sample(100, 5, TRUE),
itemC_1 = sample(100, 5, TRUE),
itemD_1 = sample(100, 5, TRUE),
itemx_1 = sample(100, 5, TRUE),
itemA_3 = sample(100, 5, TRUE),
itemB_3 = sample(100, 5, TRUE),
itemC_3 = sample(100, 5, TRUE),
itemD_3 = sample(100, 5, TRUE),
itemx_3 = sample(100, 5, TRUE),
itemA_n = sample(100, 5, TRUE),
itemB_n = sample(100, 5, TRUE),
itemC_n = sample(100, 5, TRUE),
itemD_n = sample(100, 5, TRUE),
itemx_n = sample(100, 5, TRUE),
)
The sum for one specific time point works just fine:
df %>% mutate(total_1 = sum(c(itemA_1, itemC_1, itemD_1))
This loop does not work:
for (i in c(1, 3, n)) {
df %>% mutate(total_i = sum(c(itemA_i, itemC_i, itemD_i))
}
What am I doing wrong?
Solution 1:[1]
Here's a base R option without pivoting, where we first select the columns that we want to sum, then get the unique suffix names, then we can use rowSums to get the sum of each group (i.e., each unique suffix). Then, I update the column names, and then merge with the original dataframe.
df_sum <- df[, grepl( "ID|itemA|itemC|itemD", names(df))]
suffixes <- unique(sub("^[^_]*_", "", colnames(df_sum)))
df2 <- sapply(suffixes, function(x) rowSums(df_sum[,endsWith(colnames(df_sum), x)]))
colnames(df2)[-1] <- paste("total", colnames(df2)[-1], sep = "_")
merge(x = df, y = df2, by = "ID", all.x = TRUE)
Output
ID itemA_1 itemB_1 itemC_1 itemD_1 itemx_1 itemA_3 itemB_3 itemC_3 itemD_3 itemx_3 itemA_n itemB_n itemC_n itemD_n itemx_n total_1 total_3 total_n
1 1 92 84 31 74 77 26 71 92 59 70 47 54 7 6 95 197 177 60
2 2 49 6 40 6 94 61 69 58 49 62 66 13 94 52 23 95 168 212
3 3 67 69 34 56 44 94 69 1 52 96 62 64 34 78 67 157 147 174
4 4 86 33 85 87 30 33 26 15 70 97 34 36 74 58 87 258 118 166
5 5 49 25 23 56 63 4 84 35 92 34 33 62 95 77 50 128 131 205
Data
df <- structure(list(ID = 1:5, itemA_1 = c(92L, 49L, 67L, 86L, 49L),
itemB_1 = c(84L, 6L, 69L, 33L, 25L), itemC_1 = c(31L, 40L,
34L, 85L, 23L), itemD_1 = c(74L, 6L, 56L, 87L, 56L), itemx_1 = c(77L,
94L, 44L, 30L, 63L), itemA_3 = c(26L, 61L, 94L, 33L, 4L),
itemB_3 = c(71L, 69L, 69L, 26L, 84L), itemC_3 = c(92L, 58L,
1L, 15L, 35L), itemD_3 = c(59L, 49L, 52L, 70L, 92L), itemx_3 = c(70L,
62L, 96L, 97L, 34L), itemA_n = c(47L, 66L, 62L, 34L, 33L),
itemB_n = c(54L, 13L, 64L, 36L, 62L), itemC_n = c(7L, 94L,
34L, 74L, 95L), itemD_n = c(6L, 52L, 78L, 58L, 77L), itemx_n = c(95L,
23L, 67L, 87L, 50L)), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -5L))
Benchmark
df1 <- df
df2 <- df
bm <- microbenchmark::microbenchmark(akrun_tidyverse = {df %>%
pivot_longer(cols =-ID, names_to = c("item", ".value"), names_sep = "_") %>%
filter(item %in% c("itemA", "itemC", "itemD")) %>%
group_by(ID) %>%
summarise(across(where(is.numeric), sum, na.rm = TRUE,
.names = "total_{.col}")) %>%
left_join(df, .)},
akrun_loop = {for (i in c(1, 3, 'n')) {
df1 <- df1 %>%
mutate(!! str_c("total_", i) :=
rowSums(across(all_of(str_c(c("itemA_", "itemC_", "itemD_"), i)))))
}},
andrew_baseR = {df_sum <- df2[, grepl( "ID|itemA|itemC|itemD", names(df2))];
suffixes <- unique(sub("^[^_]*_", "", colnames(df_sum)));
df3 <- sapply(suffixes, function(x) rowSums(df_sum[,endsWith(colnames(df_sum), x)]));
colnames(df3)[-1] <- paste("total", colnames(df3)[-1], sep = "_");
merge(x = df, y = df3, by = "ID", all.x = TRUE)},
times = 1000)
autoplot(bm)
Solution 2:[2]
Another possible solution, based on purrr::map_dfc:
library(tidyverse)
map_dfc(c("1","3","n"),~ data.frame(apply(select(df, ends_with(.x)), 1, sum)) %>%
set_names(str_c("Total_",.x))) %>% bind_cols(df, .)
#> ID itemA_1 itemB_1 itemC_1 itemD_1 itemx_1 itemA_3 itemB_3 itemC_3 itemD_3
#> 1 1 89 33 84 15 86 8 9 18 98
#> 2 2 37 2 32 52 37 79 37 31 57
#> 3 3 5 21 54 58 25 74 43 7 14
#> 4 4 19 76 71 84 75 34 72 100 33
#> 5 5 78 52 58 42 5 28 90 21 95
#> itemx_3 itemA_n itemB_n itemC_n itemD_n itemx_n Total_1 Total_3 Total_n
#> 1 78 5 52 4 73 88 307 211 222
#> 2 82 88 78 3 3 69 160 286 241
#> 3 41 60 73 59 32 87 163 179 311
#> 4 97 86 33 81 44 22 325 336 266
#> 5 37 4 9 35 5 38 235 271 91
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 | |
| Solution 2 | PaulS |

