'Make a list out of frequencies, concatenating categories to that list
I am trying to adapt this solution, by onyambu:
New data
dat_in_new <- structure(list(rn = c("Type_A", "Type_B"
), `[0,25) east` = c(1269L, 85L), `[0,25) north` = c(364L, 21L
), `[0,25) south` = c(1172L, 97L), `[0,25) west` = c(549L, 49L
), `[25,50) east` = c(0L, 0L), `[25,50) north` = c(0L, 0L), `[25,50) south` = c(0L,
0L), `[25,50) west` = c(0L, 0L), `[25,100) east` = c(579L, 220L
), `[25,100) north` = c(406L, 58L), `[25,100) south` = c(1048L,
316L), `[25,100) west` = c(764L, 131L), `[50,100) east` = c(0L,
0L), `[50,100) north` = c(0L, 0L), `[50,100) south` = c(0L, 0L
), `[50,100) west` = c(0L, 0L), `[100,250) east` = c(441L, 149L
), `[100,250) north` = c(224L, 45L), `[100,250) south` = c(521L,
247L), `[100,250) west` = c(770L, 124L), `[100,500) east` = c(0L,
0L), `[100,500) north` = c(0L, 0L), `[100,500) south` = c(0L,
0L), `[100,500) west` = c(0L, 0L), `[250,500) east` = c(232L,
172L), `[250,500) north` = c(207L, 40L), `[250,500) south` = c(202L,
148L), `[250,500) west` = c(457L, 153L), `[500,1000) east` = c(103L,
0L), `[500,1000) north` = c(185L, 0L), `[500,1000) south` = c(66L,
0L), `[500,1000) west` = c(200L, 0L), `[1000,1500) east` = c(0L,
0L), `[1000,1500) north` = c(0L, 0L), `[1000,1500) south` = c(0L,
0L), `[1000,1500) west` = c(0L, 0L), `[1500,3000) east` = c(0L,
0L), `[1500,3000) north` = c(0L, 0L), `[1500,3000) south` = c(0L,
0L), `[1500,3000) west` = c(0L, 0L), `[500,1000000] east` = c(0L,
288L), `[500,1000000] north` = c(0L, 120L), `[500,1000000] south` = c(0L,
229L), `[500,1000000] west` = c(0L, 175L), `[1000,1000000] east` = c(53L,
0L), `[1000,1000000] north` = c(82L, 0L), `[1000,1000000] south` = c(23L,
0L), `[1000,1000000] west` = c(63L, 0L), `[3000,1000000] east` = c(0L,
0L), `[3000,1000000] north` = c(0L, 0L), `[3000,1000000] south` = c(0L,
0L), `[3000,1000000] west` = c(0L, 0L), Sum_table_in = c(9980,
2867), strata = list(c(0, 25, 100, 250, 500, 1000, 1e+06), c(0,
25, 100, 250, 500, 1e+06))), row.names = c(NA, -2L), class = c("data.table",
"data.frame"))
Old data and solution
library(dplyr)
dat_in <- structure(list(rn = c("Type_A", "Type_B"
), `[0,25)` = c(5L, 0L), `[25,50)` = c(0L, 0L), `[25,100)` = c(38L,
3L), `[50,100)` = c(0L, 0L), `[100,250)` = c(43L, 5L), `[100,500)` = c(0L,
0L), `[250,500)` = c(27L, 12L), `[500,1000)` = c(44L, 0L), `[1000,1500)` = c(0L,
0L), `[1500,3000)` = c(0L, 0L), `[500,1000000]` = c(0L, 53L),
`[1000,1000000]` = c(20L, 0L), `[3000,1000000]` = c(0L, 0L
), Sum_bin = c(177, 73), strata = list(c(0, 25, 100, 250,
500, 1000, 1e+06), c(0, 25, 100, 250, 500, 1e+06))), row.names = c(NA,
-2L), class = c("data.table", "data.frame"))
dat_in %>%
pivot_longer(-c(rn, strata)) %>%
tidyr::extract(name, c('lower', 'upper'), '(\\d+),(\\d+)', convert = TRUE) %>%
group_by(rn) %>%
filter(lower%in%strata[[1]] & upper %in% strata[[1]]) %>%
group_by(upper,.add = TRUE) %>%
summarise(freq = sum(value), .groups = 'drop_last') %>%
group_modify(~add_row(.,freq = sum(.$freq))) %>%
summarise(freq = list(freq))
Question
For the new data, there are many more values, because there are values for east
, north
, south
and west
. These values should be concatenated to make one long list of frequencies, one for each type.
The start can be adapted as follows:
part1 <- dat_in_new %>%
pivot_longer(-c(rn, strata)) %>%
tidyr::extract(name, c('lower', 'upper', 'rest'), '(\\d+),(\\d+)[\\]\\)]\\s*(\\w*)', convert = TRUE) %>%
select(-where(is_all_na))
But I get stuck in the following part, because I do not really understand the lines after group_by(rn)
, especially the .groups = 'drop_last'
part (link). I usually just run the code line by line to see what happens, but for this solution that did not really help:
part2 <- part2 %>%
group_by(rn) %>%
filter(lower%in%strata[[1]] & upper %in% strata[[1]]) %>%
group_by(upper,.add = TRUE) %>%
summarise(freq = sum(value), .groups = 'drop_last') %>%
group_modify(~add_row(.,freq = sum(.$freq))) %>%
summarise(freq = list(freq))
Currently, the code runs, but I end up with the old outcome. How should I adapt this code to add all the values?
Solution 1:[1]
We may use pivot_longer
as
library(dplyr)
library(tidyr)
dat_in_new %>%
pivot_longer(cols = -c(rn, strata, Sum_table_in),
names_to = c("lower", "upper", "direction"),
names_pattern = "\\[(\\d+),(\\d+)[\\)\\]]\\s+(\\S+$)",
values_drop_na = TRUE) %>%
type.convert(as.is = TRUE) %>%
group_by(rn, direction) %>%
filter(lower%in%strata[[1]] & upper %in% strata[[1]]) %>%
group_by(upper,.add = TRUE) %>%
summarise(freq = sum(value), .groups = 'drop_last') %>%
group_modify(~add_row(.,freq = sum(.$freq))) %>%
summarise(freq = list(freq), .groups = "drop")
-output
# A tibble: 8 × 3
rn direction freq
<chr> <chr> <list>
1 Type_A east <int [7]>
2 Type_A north <int [7]>
3 Type_A south <int [7]>
4 Type_A west <int [7]>
5 Type_B east <int [6]>
6 Type_B north <int [6]>
7 Type_B south <int [6]>
8 Type_B west <int [6]>
If we want only by 'rn'
dat_in_new %>%
pivot_longer(cols = -c(rn, strata, Sum_table_in),
names_to = c("lower", "upper", "direction"),
names_pattern = "\\[(\\d+),(\\d+)[\\)\\]]\\s+(\\S+$)",
values_drop_na = TRUE) %>%
type.convert(as.is = TRUE) %>%
group_by(rn, direction) %>%
filter(lower%in%strata[[1]] & upper %in% strata[[1]]) %>%
group_by(upper,.add = TRUE) %>%
summarise(freq = sum(value), .groups = 'drop_last') %>%
group_modify(~add_row(.,freq = sum(.$freq))) %>% group_by(rn) %>%
summarise(freq = list(freq), .groups = "drop")
-output
# A tibble: 2 × 2
rn freq
<chr> <list>
1 Type_A <int [28]>
2 Type_B <int [24]>
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
Solution | Source |
---|---|
Solution 1 |