'Multi-level list to data frame in R

I'd like to convert the below list to a data frame but I'm failing at doing it.

The list is taken from Microsoft Azure's API listing all resource types with technical information about them (link: https://docs.microsoft.com/en-us/rest/api/compute/resource-skus/list). The list is similar to this one:

library(tidyverse)

input <- list(value = list(
  list(resourceType = "rt1", name = "name1", tier = "tier1", size = "size1", family = "family1", capabilities = list(list(name = "cap_name1", value = "value1_1"), list(name = "cap_name2", value = "value1_2"))),
  list(resourceType = "rt1", name = "name2", tier = "tier2", size = "size2", family = "family2", capabilities = list(list(name = "cap_name2", value = "value2_2"), list(name = "cap_name3", value = "value2_3"))),
  list(resourceType = "rt1", name = "name3", tier = "tier3", size = "size3", family = "family3", capabilities = list(list(name = "cap_name1", value = "value3_1"), list(name = "cap_name3", value = "value3_3"))),
  list(resourceType = "rt1", name = "name4", tier = "tier4", size = "size4", family = "family4", capabilities = list(list(name = "cap_name1", value = "value4_1"), list(name = "cap_name2", value = "value4_2"), list(name = "cap_name3", value = "value4_3"))),
  list(resourceType = "rt2", name = "name5", capabilities = list(list(name = "cap_name4", value = "value5_5")))
))


expected_output <-
  tibble(
    resourceType = c("rt1", "rt1", "rt1", "rt1"),
    name = c("name1", "name2", "name3", "name4"),
    tier = c("tier1", "tier2", "tier3", "tier4"),
    size = c("size1", "size2", "size3", "size4"),
    family = c("family1", "family2", "family3", "family4"),
    cap_name1 = c("value1_1", NA, "value3_1", "value4_1"),
    cap_name2 = c("value1_2", "value2_2", NA, "value4_2"),
    cap_name3 = c(NA, "value2_3", "value3_3", "value4_3"),
  )
expected_output
#> # A tibble: 4 × 8
#>   resourceType name  tier  size  family  cap_name1 cap_name2 cap_name3
#>   <chr>        <chr> <chr> <chr> <chr>   <chr>     <chr>     <chr>    
#> 1 rt1          name1 tier1 size1 family1 value1_1  value1_2  <NA>     
#> 2 rt1          name2 tier2 size2 family2 <NA>      value2_2  value2_3 
#> 3 rt1          name3 tier3 size3 family3 value3_1  <NA>      value3_3 
#> 4 rt1          name4 tier4 size4 family4 value4_1  value4_2  value4_3

Created on 2022-05-12 by the reprex package (v2.0.1)

I have two problems here:

  1. I don't know how to filter only resourceType == "rt1". I know how to filter it this way:
input %>% pluck("value") %>% keep(~.x$resourceType == "rt1")

but I'd like to somehow do it without pluck step.

  1. The main problem is to transform it from input to expected_output. I found this complicated way to list all capabilities:
capabilities <- input %>% pluck("value") %>% keep(~.x$resourceType == "rt1") %>% transpose() %>% as_tibble() %>% pull(capabilities)
all_capabilities_names <- capabilities %>% map_depth(1, ~ map_chr(.x, "name")) %>% purrr::flatten_chr() %>% unique()
all_capabilities_names
#> [1] "cap_name1" "cap_name2" "cap_name3"

Created on 2022-05-12 by the reprex package (v2.0.1)

I'm stuck there because I have no idea how to map values to correct columns.

Working with lists is always a nightmare to me. Any help appreciated : )



Solution 1:[1]

With a little bit of purrry magic, you can sort of dig around to make tibbles at various levels and unnest/reduce them all the way up. With a little bit of playing around:

library(tidyverse)

input <- list(value = list(
  list(resourceType = "rt1", name = "name1", tier = "tier1", size = "size1", family = "family1", capabilities = list(list(name = "cap_name1", value = "value1_1"), list(name = "cap_name2", value = "value1_2"))),
  list(resourceType = "rt1", name = "name2", tier = "tier2", size = "size2", family = "family2", capabilities = list(list(name = "cap_name2", value = "value2_2"), list(name = "cap_name3", value = "value2_3"))),
  list(resourceType = "rt1", name = "name3", tier = "tier3", size = "size3", family = "family3", capabilities = list(list(name = "cap_name1", value = "value3_1"), list(name = "cap_name3", value = "value3_3"))),
  list(resourceType = "rt1", name = "name4", tier = "tier4", size = "size4", family = "family4", capabilities = list(list(name = "cap_name1", value = "value4_1"), list(name = "cap_name2", value = "value4_2"), list(name = "cap_name3", value = "value4_3"))),
  list(resourceType = "rt2", name = "name5", capabilities = list(list(name = "cap_name4", value = "value5_4")))
))

output_test <- input[[1]] %>% 
  map(as_tibble) %>% 
  reduce(bind_rows) %>% 
  mutate(capabilities = map(capabilities, as_tibble)) %>% 
  unnest(capabilities, names_repair = "unique") %>% 
  filter(resourceType == "rt1") %>% 
  pivot_wider(names_from = `name...6`, values_from = value) %>% 
  rename(name = `name...2`)
#> New names:
#> • `name` -> `name...2`
#> • `name` -> `name...6`

output_test
#> # A tibble: 4 × 8
#>   resourceType name  tier  size  family  cap_name1 cap_name2 cap_name3
#>   <chr>        <chr> <chr> <chr> <chr>   <chr>     <chr>     <chr>    
#> 1 rt1          name1 tier1 size1 family1 value1_1  value1_2  <NA>     
#> 2 rt1          name2 tier2 size2 family2 <NA>      value2_2  value2_3 
#> 3 rt1          name3 tier3 size3 family3 value3_1  <NA>      value3_3 
#> 4 rt1          name4 tier4 size4 family4 value4_1  value4_2  value4_3

And just to see if it's worked:

expected_output <-
  tibble(
    resourceType = c("rt1", "rt1", "rt1", "rt1"),
    name = c("name1", "name2", "name3", "name4"),
    tier = c("tier1", "tier2", "tier3", "tier4"),
    size = c("size1", "size2", "size3", "size4"),
    family = c("family1", "family2", "family3", "family4"),
    cap_name1 = c("value1_1", NA, "value3_1", "value4_1"),
    cap_name2 = c("value1_2", "value2_2", NA, "value4_2"),
    cap_name3 = c(NA, "value2_3", "value3_3", "value4_3"),
  )

assertthat::are_equal(expected_output, output_test)
#> [1] TRUE

Edit - encountering another error

If there is an empty vector in one of the lists then turning to a tibble wont work. You can discard that vector and it'll code as NA when binding all rows together:

library(tidyverse)

input <- list(value = list(
  list(resourceType = "rt1", name = "name1", tier = vector("character"), size = "size1", family = "family1", capabilities = list(list(name = "cap_name1", value = "value1_1"), list(name = "cap_name2", value = "value1_2"))),
  list(resourceType = "rt1", name = "name2", tier = "tier2", size = "size2", family = "family2", capabilities = list(list(name = "cap_name2", value = "value2_2"), list(name = "cap_name3", value = "value2_3"))),
  list(resourceType = "rt1", name = "name3", tier = "tier3", size = "size3", family = "family3", capabilities = list(list(name = "cap_name1", value = "value3_1"), list(name = "cap_name3", value = "value3_3"))),
  list(resourceType = "rt1", name = "name4", tier = "tier4", size = "size4", family = "family4", capabilities = list(list(name = "cap_name1", value = "value4_1"), list(name = "cap_name2", value = "value4_2"), list(name = "cap_name3", value = "value4_3"))),
  list(resourceType = "rt2", name = "name5", capabilities = list(list(name = "cap_name4", value = "value5_4")))
))

input$value %>% 
  map(~ discard(.x, is_empty) %>% as_tibble) %>% 
  reduce(bind_rows) %>% 
  mutate(capabilities = map(capabilities, as_tibble)) %>% 
  rename(value_name = name) %>% 
  unnest(capabilities, names_repair = "unique") %>% 
  filter(resourceType == "rt1") %>% 
  pivot_wider(names_from = `name`, values_from = value)
#> # A tibble: 4 × 8
#>   resourceType value_name size  family  tier  cap_name1 cap_name2 cap_name3
#>   <chr>        <chr>      <chr> <chr>   <chr> <chr>     <chr>     <chr>    
#> 1 rt1          name1      size1 family1 <NA>  value1_1  value1_2  <NA>     
#> 2 rt1          name2      size2 family2 tier2 <NA>      value2_2  value2_3 
#> 3 rt1          name3      size3 family3 tier3 value3_1  <NA>      value3_3 
#> 4 rt1          name4      size4 family4 tier4 value4_1  value4_2  value4_3

Created on 2022-05-14 by the reprex package (v2.0.1)

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1