'Make a list out of frequencies, concatenating categories to that list
I am trying to adapt this solution, by onyambu:
New data
dat_in_new <- structure(list(rn = c("Type_A", "Type_B"
), `[0,25) east` = c(1269L, 85L), `[0,25) north` = c(364L, 21L
), `[0,25) south` = c(1172L, 97L), `[0,25) west` = c(549L, 49L
), `[25,50) east` = c(0L, 0L), `[25,50) north` = c(0L, 0L), `[25,50) south` = c(0L,
0L), `[25,50) west` = c(0L, 0L), `[25,100) east` = c(579L, 220L
), `[25,100) north` = c(406L, 58L), `[25,100) south` = c(1048L,
316L), `[25,100) west` = c(764L, 131L), `[50,100) east` = c(0L,
0L), `[50,100) north` = c(0L, 0L), `[50,100) south` = c(0L, 0L
), `[50,100) west` = c(0L, 0L), `[100,250) east` = c(441L, 149L
), `[100,250) north` = c(224L, 45L), `[100,250) south` = c(521L,
247L), `[100,250) west` = c(770L, 124L), `[100,500) east` = c(0L,
0L), `[100,500) north` = c(0L, 0L), `[100,500) south` = c(0L,
0L), `[100,500) west` = c(0L, 0L), `[250,500) east` = c(232L,
172L), `[250,500) north` = c(207L, 40L), `[250,500) south` = c(202L,
148L), `[250,500) west` = c(457L, 153L), `[500,1000) east` = c(103L,
0L), `[500,1000) north` = c(185L, 0L), `[500,1000) south` = c(66L,
0L), `[500,1000) west` = c(200L, 0L), `[1000,1500) east` = c(0L,
0L), `[1000,1500) north` = c(0L, 0L), `[1000,1500) south` = c(0L,
0L), `[1000,1500) west` = c(0L, 0L), `[1500,3000) east` = c(0L,
0L), `[1500,3000) north` = c(0L, 0L), `[1500,3000) south` = c(0L,
0L), `[1500,3000) west` = c(0L, 0L), `[500,1000000] east` = c(0L,
288L), `[500,1000000] north` = c(0L, 120L), `[500,1000000] south` = c(0L,
229L), `[500,1000000] west` = c(0L, 175L), `[1000,1000000] east` = c(53L,
0L), `[1000,1000000] north` = c(82L, 0L), `[1000,1000000] south` = c(23L,
0L), `[1000,1000000] west` = c(63L, 0L), `[3000,1000000] east` = c(0L,
0L), `[3000,1000000] north` = c(0L, 0L), `[3000,1000000] south` = c(0L,
0L), `[3000,1000000] west` = c(0L, 0L), Sum_table_in = c(9980,
2867), strata = list(c(0, 25, 100, 250, 500, 1000, 1e+06), c(0,
25, 100, 250, 500, 1e+06))), row.names = c(NA, -2L), class = c("data.table",
"data.frame"))
Old data and solution
library(dplyr)
dat_in <- structure(list(rn = c("Type_A", "Type_B"
), `[0,25)` = c(5L, 0L), `[25,50)` = c(0L, 0L), `[25,100)` = c(38L,
3L), `[50,100)` = c(0L, 0L), `[100,250)` = c(43L, 5L), `[100,500)` = c(0L,
0L), `[250,500)` = c(27L, 12L), `[500,1000)` = c(44L, 0L), `[1000,1500)` = c(0L,
0L), `[1500,3000)` = c(0L, 0L), `[500,1000000]` = c(0L, 53L),
`[1000,1000000]` = c(20L, 0L), `[3000,1000000]` = c(0L, 0L
), Sum_bin = c(177, 73), strata = list(c(0, 25, 100, 250,
500, 1000, 1e+06), c(0, 25, 100, 250, 500, 1e+06))), row.names = c(NA,
-2L), class = c("data.table", "data.frame"))
dat_in %>%
pivot_longer(-c(rn, strata)) %>%
tidyr::extract(name, c('lower', 'upper'), '(\\d+),(\\d+)', convert = TRUE) %>%
group_by(rn) %>%
filter(lower%in%strata[[1]] & upper %in% strata[[1]]) %>%
group_by(upper,.add = TRUE) %>%
summarise(freq = sum(value), .groups = 'drop_last') %>%
group_modify(~add_row(.,freq = sum(.$freq))) %>%
summarise(freq = list(freq))
Question
For the new data, there are many more values, because there are values for east, north, south and west. These values should be concatenated to make one long list of frequencies, one for each type.
The start can be adapted as follows:
part1 <- dat_in_new %>%
pivot_longer(-c(rn, strata)) %>%
tidyr::extract(name, c('lower', 'upper', 'rest'), '(\\d+),(\\d+)[\\]\\)]\\s*(\\w*)', convert = TRUE) %>%
select(-where(is_all_na))
But I get stuck in the following part, because I do not really understand the lines after group_by(rn), especially the .groups = 'drop_last' part (link). I usually just run the code line by line to see what happens, but for this solution that did not really help:
part2 <- part2 %>%
group_by(rn) %>%
filter(lower%in%strata[[1]] & upper %in% strata[[1]]) %>%
group_by(upper,.add = TRUE) %>%
summarise(freq = sum(value), .groups = 'drop_last') %>%
group_modify(~add_row(.,freq = sum(.$freq))) %>%
summarise(freq = list(freq))
Currently, the code runs, but I end up with the old outcome. How should I adapt this code to add all the values?
Solution 1:[1]
We may use pivot_longer as
library(dplyr)
library(tidyr)
dat_in_new %>%
pivot_longer(cols = -c(rn, strata, Sum_table_in),
names_to = c("lower", "upper", "direction"),
names_pattern = "\\[(\\d+),(\\d+)[\\)\\]]\\s+(\\S+$)",
values_drop_na = TRUE) %>%
type.convert(as.is = TRUE) %>%
group_by(rn, direction) %>%
filter(lower%in%strata[[1]] & upper %in% strata[[1]]) %>%
group_by(upper,.add = TRUE) %>%
summarise(freq = sum(value), .groups = 'drop_last') %>%
group_modify(~add_row(.,freq = sum(.$freq))) %>%
summarise(freq = list(freq), .groups = "drop")
-output
# A tibble: 8 × 3
rn direction freq
<chr> <chr> <list>
1 Type_A east <int [7]>
2 Type_A north <int [7]>
3 Type_A south <int [7]>
4 Type_A west <int [7]>
5 Type_B east <int [6]>
6 Type_B north <int [6]>
7 Type_B south <int [6]>
8 Type_B west <int [6]>
If we want only by 'rn'
dat_in_new %>%
pivot_longer(cols = -c(rn, strata, Sum_table_in),
names_to = c("lower", "upper", "direction"),
names_pattern = "\\[(\\d+),(\\d+)[\\)\\]]\\s+(\\S+$)",
values_drop_na = TRUE) %>%
type.convert(as.is = TRUE) %>%
group_by(rn, direction) %>%
filter(lower%in%strata[[1]] & upper %in% strata[[1]]) %>%
group_by(upper,.add = TRUE) %>%
summarise(freq = sum(value), .groups = 'drop_last') %>%
group_modify(~add_row(.,freq = sum(.$freq))) %>% group_by(rn) %>%
summarise(freq = list(freq), .groups = "drop")
-output
# A tibble: 2 × 2
rn freq
<chr> <list>
1 Type_A <int [28]>
2 Type_B <int [24]>
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 |

