'for loop to clean multiple dataframes
Here's my code:
library(PNADcIBGE)
#DOWNLOAD DATABASES#
data2018q1 <- get_pnadc(year = 2018, quarter = 1, vars = c("UF", "Capital", "V1022", "V2007", "V2009", "V2010", "V3009A", "VD3004", "VD4001", "VD4004", "V4001", "V4003", "V4005", "V4010", "V4002", "V4012", "V403322","VD4019", "VD4020"), labels = FALSE, deflator = TRUE, design = FALSE, defyear = 2018, defperiod = 1)
data2018q2 <- get_pnadc(year = 2018, quarter = 2, vars = c("UF", "Capital", "V1022", "V2007", "V2009", "V2010", "V3009A", "VD3004", "VD4001", "VD4004", "V4001", "V4003", "V4005", "V4010", "V4002", "V4012", "V403322","VD4019", "VD4020"), labels = FALSE, deflator = TRUE, design = FALSE, defyear = 2018, defperiod = 2)
data2018q3 <- get_pnadc(year = 2018, quarter = 3, vars = c("UF", "Capital", "V1022", "V2007", "V2009", "V2010", "V3009A", "VD3004", "VD4001", "VD4004", "V4001", "V4003", "V4005", "V4010", "V4002", "V4012", "V403322","VD4019", "VD4020"), labels = FALSE, deflator = TRUE, design = FALSE, defyear = 2018, defperiod = 3)
data2018q4 <- get_pnadc(year = 2018, quarter = 4, vars = c("UF", "Capital", "V1022", "V2007", "V2009", "V2010", "V3009A", "VD3004", "VD4001", "VD4004", "V4001", "V4003", "V4005", "V4010", "V4002", "V4012", "V403322","VD4019", "VD4020"), labels = FALSE, deflator = TRUE, design = FALSE, defyear = 2018, defperiod = 4)
#CLEAN DATABASES#
data_list <- list(data_2018q1,data_2018q2,data_2018q3,data_2018q4,data_2019q1,data_2019q2,data_2019q3,data_2019q4,data_2020q1,data_2020q2,data_2020q3,data_2020q4,data_2021q1,data_2021q2,data_2021q3)
I'm trying to use a log10 for the variable VD4020 for all dataframes within data_lis. After log10, I want to do other things, such as renaming columns, adding columns and other cleaning stuff. I tried using lapply with no success, and couldnt do a For loop either. The way I was doing it was by manually going into each dataframe and doing a sub data frame, such as
data_2018q1 <- subset.data.frame(data_2018q2, VD4020!="NA")
data_2018q2 <- subset.data.frame(data_2018q2, VD4020!="NA")
data_2018q3 <- subset.data.frame(data_2018q2, VD4020!="NA")
data_2018q1$VD4020 <- log(data_2018q1$VD4020)
data_2018q2$VD4020 <- log(data_2018q2$VD4020)
data_2018q3$VD4020 <- log(data_2018q3$VD4020)
And so on with each and every command. This is really time consuming as I need to do it with 15 dataframes.
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|
