'create rows as the Aggregate sums of other rows grouped by conditions of multiple columns

I have a df as below, would like to sort by GEO, create row sums called SumTrade & SumNOT and by the different categories of APPR.

structure(list(GEO = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L), .Label = c("Barrie", "Guelph"), class = "factor"), 
    NOC = c(6552L, 6322L, 7511L, 6722L, 122L, 6552L, 6322L, 7511L, 
    7611L, 9619L, 6411L, 6211L), Occupation = structure(c(5L, 
    3L, 9L, 4L, 1L, 5L, 3L, 9L, 2L, 6L, 8L, 7L), .Label = c("Banking, credit and other investment managers", 
    "Construction trades helpers and labourers", "Cooks", "Operators and attendants in amusement, recreation and sport", 
    "Other customer and information services representatives", 
    "Other labourers in processing, manufacturing and utilities", 
    "Retail sales supervisors", "Sales and account representatives - wholesale trade (non-technical)", 
    "Transport truck drivers"), class = "factor"), JPA = c(118L, 
    91L, 59L, 27L, 27L, 106L, 72L, 58L, 49L, 115L, 109L, 88L), 
    APPR = structure(c(2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 
    1L, 1L), .Label = c("NOT", "Trade"), class = "factor")), class = "data.frame", row.names = c(NA, 
-12L))

so my output should be

structure(list(GEO = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L), .Label = c("Barrie", "Guelph"
), class = "factor"), NOC = c(6552L, 6322L, 7511L, 6722L, 122L, 
6552L, 6322L, 7511L, 7611L, 9619L, 6411L, 6211L, 7777L, 6666L, 
7777L, 6666L), Occupation = structure(c(5L, 3L, 12L, 4L, 1L, 
5L, 3L, 12L, 2L, 6L, 8L, 7L, 11L, 10L, 11L, 9L), .Label = c("Banking, credit and other investment managers", 
"Construction trades helpers and labourers", "Cooks", "Operators and attendants in amusement, recreation and sport", 
"Other customer and information services representatives", "Other labourers in processing, manufacturing and utilities", 
"Retail sales supervisors", "Sales and account representatives - wholesale trade (non-technical)", 
"SumNot", "SumNotTrade", "SumTrade", "Transport truck drivers"
), class = "factor"), JPA = c(118L, 91L, 59L, 27L, 27L, 106L, 
72L, 58L, 49L, 115L, 109L, 88L, 268L, 54L, 285L, 312L), APPR = structure(c(2L, 
2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L), .Label = c("NOT", 
"Trade"), class = "factor")), class = "data.frame", row.names = c(NA, 
-16L))

I tried with this code, but it's not generating what I want

  df1%>% 
    group_by(GEO,APPR)%>% 
    mutate(sumval = sum(JPA))->df1

r dplyr

Solution 1:^[1]

How about this:

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
inp <- structure(list(GEO = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 
                                 2L, 2L, 2L, 2L, 2L), .Label = c("Barrie", "Guelph"), class = "factor"), 
               NOC = c(6552L, 6322L, 7511L, 6722L, 122L, 6552L, 6322L, 7511L, 
                       7611L, 9619L, 6411L, 6211L), Occupation = structure(c(5L, 
                                                                             3L, 9L, 4L, 1L, 5L, 3L, 9L, 2L, 6L, 8L, 7L), .Label = c("Banking, credit and other investment managers", 
                                                                                                                                     "Construction trades helpers and labourers", "Cooks", "Operators and attendants in amusement, recreation and sport", 
                                                                                                                                     "Other customer and information services representatives", 
                                                                                                                                     "Other labourers in processing, manufacturing and utilities", 
                                                                                                                                     "Retail sales supervisors", "Sales and account representatives - wholesale trade (non-technical)", 
                                                                                                                                     "Transport truck drivers"), class = "factor"), JPA = c(118L, 
                                                                                                                                                                                            91L, 59L, 27L, 27L, 106L, 72L, 58L, 49L, 115L, 109L, 88L), 
               APPR = structure(c(2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 
                                  1L, 1L), .Label = c("NOT", "Trade"), class = "factor")), class = "data.frame", row.names = c(NA, 
                                                                                                                               -12L))

out <- structure(list(GEO = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 
                                        2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L), .Label = c("Barrie", "Guelph"
                                        ), class = "factor"), NOC = c(6552L, 6322L, 7511L, 6722L, 122L, 
                                                                      6552L, 6322L, 7511L, 7611L, 9619L, 6411L, 6211L, 7777L, 6666L, 
                                                                      7777L, 6666L), Occupation = structure(c(5L, 3L, 12L, 4L, 1L, 
                                                                                                              5L, 3L, 12L, 2L, 6L, 8L, 7L, 11L, 10L, 11L, 9L), .Label = c("Banking, credit and other investment managers", 
                                                                                                                                                                          "Construction trades helpers and labourers", "Cooks", "Operators and attendants in amusement, recreation and sport", 
                                                                                                                                                                          "Other customer and information services representatives", "Other labourers in processing, manufacturing and utilities", 
                                                                                                                                                                          "Retail sales supervisors", "Sales and account representatives - wholesale trade (non-technical)", 
                                                                                                                                                                          "SumNot", "SumNotTrade", "SumTrade", "Transport truck drivers"
                                                                                                              ), class = "factor"), JPA = c(118L, 91L, 59L, 27L, 27L, 106L, 
                                                                                                                                            72L, 58L, 49L, 115L, 109L, 88L, 268L, 54L, 285L, 312L), APPR = structure(c(2L, 
                                                                                                                                                                                                                       2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L), .Label = c("NOT", 
                                                                                                                                                                                                                                                                                               "Trade"), class = "factor")), class = "data.frame", row.names = c(NA, 
                                                                                                                                                                                                                                                                                                                                                                 -16L))

inp %>% 
  group_by(GEO, APPR) %>% 
  summarise(JPA = sum(JPA)) %>% 
  mutate(NOC = case_when(APPR == "Trade" ~ 7777, 
                         APPR == "NOT" ~ 6666), 
         Occupation = case_when(APPR == "Trade" ~ "SumTrade", 
                                APPR == "NOT" ~ "SumNot")) %>% 
  select(all_of(names(inp))) %>% 
  bind_rows(inp, .)
#> `summarise()` has grouped output by 'GEO'. You can override using the `.groups`
#> argument.
#>       GEO  NOC
#> 1  Barrie 6552
#> 2  Barrie 6322
#> 3  Barrie 7511
#> 4  Barrie 6722
#> 5  Barrie  122
#> 6  Guelph 6552
#> 7  Guelph 6322
#> 8  Guelph 7511
#> 9  Guelph 7611
#> 10 Guelph 9619
#> 11 Guelph 6411
#> 12 Guelph 6211
#> 13 Barrie 6666
#> 14 Barrie 7777
#> 15 Guelph 6666
#> 16 Guelph 7777
#>                                                             Occupation JPA
#> 1              Other customer and information services representatives 118
#> 2                                                                Cooks  91
#> 3                                              Transport truck drivers  59
#> 4          Operators and attendants in amusement, recreation and sport  27
#> 5                        Banking, credit and other investment managers  27
#> 6              Other customer and information services representatives 106
#> 7                                                                Cooks  72
#> 8                                              Transport truck drivers  58
#> 9                            Construction trades helpers and labourers  49
#> 10          Other labourers in processing, manufacturing and utilities 115
#> 11 Sales and account representatives - wholesale trade (non-technical) 109
#> 12                                            Retail sales supervisors  88
#> 13                                                              SumNot  54
#> 14                                                            SumTrade 268
#> 15                                                              SumNot 312
#> 16                                                            SumTrade 285
#>     APPR
#> 1  Trade
#> 2  Trade
#> 3  Trade
#> 4    NOT
#> 5    NOT
#> 6  Trade
#> 7  Trade
#> 8  Trade
#> 9  Trade
#> 10   NOT
#> 11   NOT
#> 12   NOT
#> 13   NOT
#> 14 Trade
#> 15   NOT
#> 16 Trade

^{Created on 2022-04-20 by the reprex package (v2.0.1)}

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution	Source
Solution 1	DaveArmstrong

'create rows as the Aggregate sums of other rows grouped by conditions of multiple columns

Solution 1:[1]

Sources

Related Questions

Solution 1:^[1]