'create rows as the Aggregate sums of other rows grouped by conditions of multiple columns
I have a df as below, would like to sort by GEO, create row sums called SumTrade & SumNOT and by the different categories of APPR.
structure(list(GEO = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L), .Label = c("Barrie", "Guelph"), class = "factor"),
NOC = c(6552L, 6322L, 7511L, 6722L, 122L, 6552L, 6322L, 7511L,
7611L, 9619L, 6411L, 6211L), Occupation = structure(c(5L,
3L, 9L, 4L, 1L, 5L, 3L, 9L, 2L, 6L, 8L, 7L), .Label = c("Banking, credit and other investment managers",
"Construction trades helpers and labourers", "Cooks", "Operators and attendants in amusement, recreation and sport",
"Other customer and information services representatives",
"Other labourers in processing, manufacturing and utilities",
"Retail sales supervisors", "Sales and account representatives - wholesale trade (non-technical)",
"Transport truck drivers"), class = "factor"), JPA = c(118L,
91L, 59L, 27L, 27L, 106L, 72L, 58L, 49L, 115L, 109L, 88L),
APPR = structure(c(2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L,
1L, 1L), .Label = c("NOT", "Trade"), class = "factor")), class = "data.frame", row.names = c(NA,
-12L))
so my output should be
structure(list(GEO = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L), .Label = c("Barrie", "Guelph"
), class = "factor"), NOC = c(6552L, 6322L, 7511L, 6722L, 122L,
6552L, 6322L, 7511L, 7611L, 9619L, 6411L, 6211L, 7777L, 6666L,
7777L, 6666L), Occupation = structure(c(5L, 3L, 12L, 4L, 1L,
5L, 3L, 12L, 2L, 6L, 8L, 7L, 11L, 10L, 11L, 9L), .Label = c("Banking, credit and other investment managers",
"Construction trades helpers and labourers", "Cooks", "Operators and attendants in amusement, recreation and sport",
"Other customer and information services representatives", "Other labourers in processing, manufacturing and utilities",
"Retail sales supervisors", "Sales and account representatives - wholesale trade (non-technical)",
"SumNot", "SumNotTrade", "SumTrade", "Transport truck drivers"
), class = "factor"), JPA = c(118L, 91L, 59L, 27L, 27L, 106L,
72L, 58L, 49L, 115L, 109L, 88L, 268L, 54L, 285L, 312L), APPR = structure(c(2L,
2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L), .Label = c("NOT",
"Trade"), class = "factor")), class = "data.frame", row.names = c(NA,
-16L))
I tried with this code, but it's not generating what I want
df1%>%
group_by(GEO,APPR)%>%
mutate(sumval = sum(JPA))->df1
Solution 1:[1]
How about this:
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
inp <- structure(list(GEO = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L), .Label = c("Barrie", "Guelph"), class = "factor"),
NOC = c(6552L, 6322L, 7511L, 6722L, 122L, 6552L, 6322L, 7511L,
7611L, 9619L, 6411L, 6211L), Occupation = structure(c(5L,
3L, 9L, 4L, 1L, 5L, 3L, 9L, 2L, 6L, 8L, 7L), .Label = c("Banking, credit and other investment managers",
"Construction trades helpers and labourers", "Cooks", "Operators and attendants in amusement, recreation and sport",
"Other customer and information services representatives",
"Other labourers in processing, manufacturing and utilities",
"Retail sales supervisors", "Sales and account representatives - wholesale trade (non-technical)",
"Transport truck drivers"), class = "factor"), JPA = c(118L,
91L, 59L, 27L, 27L, 106L, 72L, 58L, 49L, 115L, 109L, 88L),
APPR = structure(c(2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L,
1L, 1L), .Label = c("NOT", "Trade"), class = "factor")), class = "data.frame", row.names = c(NA,
-12L))
out <- structure(list(GEO = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L), .Label = c("Barrie", "Guelph"
), class = "factor"), NOC = c(6552L, 6322L, 7511L, 6722L, 122L,
6552L, 6322L, 7511L, 7611L, 9619L, 6411L, 6211L, 7777L, 6666L,
7777L, 6666L), Occupation = structure(c(5L, 3L, 12L, 4L, 1L,
5L, 3L, 12L, 2L, 6L, 8L, 7L, 11L, 10L, 11L, 9L), .Label = c("Banking, credit and other investment managers",
"Construction trades helpers and labourers", "Cooks", "Operators and attendants in amusement, recreation and sport",
"Other customer and information services representatives", "Other labourers in processing, manufacturing and utilities",
"Retail sales supervisors", "Sales and account representatives - wholesale trade (non-technical)",
"SumNot", "SumNotTrade", "SumTrade", "Transport truck drivers"
), class = "factor"), JPA = c(118L, 91L, 59L, 27L, 27L, 106L,
72L, 58L, 49L, 115L, 109L, 88L, 268L, 54L, 285L, 312L), APPR = structure(c(2L,
2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L), .Label = c("NOT",
"Trade"), class = "factor")), class = "data.frame", row.names = c(NA,
-16L))
inp %>%
group_by(GEO, APPR) %>%
summarise(JPA = sum(JPA)) %>%
mutate(NOC = case_when(APPR == "Trade" ~ 7777,
APPR == "NOT" ~ 6666),
Occupation = case_when(APPR == "Trade" ~ "SumTrade",
APPR == "NOT" ~ "SumNot")) %>%
select(all_of(names(inp))) %>%
bind_rows(inp, .)
#> `summarise()` has grouped output by 'GEO'. You can override using the `.groups`
#> argument.
#> GEO NOC
#> 1 Barrie 6552
#> 2 Barrie 6322
#> 3 Barrie 7511
#> 4 Barrie 6722
#> 5 Barrie 122
#> 6 Guelph 6552
#> 7 Guelph 6322
#> 8 Guelph 7511
#> 9 Guelph 7611
#> 10 Guelph 9619
#> 11 Guelph 6411
#> 12 Guelph 6211
#> 13 Barrie 6666
#> 14 Barrie 7777
#> 15 Guelph 6666
#> 16 Guelph 7777
#> Occupation JPA
#> 1 Other customer and information services representatives 118
#> 2 Cooks 91
#> 3 Transport truck drivers 59
#> 4 Operators and attendants in amusement, recreation and sport 27
#> 5 Banking, credit and other investment managers 27
#> 6 Other customer and information services representatives 106
#> 7 Cooks 72
#> 8 Transport truck drivers 58
#> 9 Construction trades helpers and labourers 49
#> 10 Other labourers in processing, manufacturing and utilities 115
#> 11 Sales and account representatives - wholesale trade (non-technical) 109
#> 12 Retail sales supervisors 88
#> 13 SumNot 54
#> 14 SumTrade 268
#> 15 SumNot 312
#> 16 SumTrade 285
#> APPR
#> 1 Trade
#> 2 Trade
#> 3 Trade
#> 4 NOT
#> 5 NOT
#> 6 Trade
#> 7 Trade
#> 8 Trade
#> 9 Trade
#> 10 NOT
#> 11 NOT
#> 12 NOT
#> 13 NOT
#> 14 Trade
#> 15 NOT
#> 16 Trade
Created on 2022-04-20 by the reprex package (v2.0.1)
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 | DaveArmstrong |
