'Loop over months and apply a function

Hi I would like to loop over months and for each subset apply a function,

1 - (1 - se * p2)^df$n

Do you have any alternative to a for loop? Or would you suggest a better way to code it with a loop? This is just a fake example as the real database is quite huge and it is annoying to loop over 12 months The column month is

Thanks in advance

rm(list = ls())

setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
#> Error: RStudio not running
getwd()
#> [1] "C:/Users/Angela/AppData/Local/Temp/Rtmp21Zbwk/reprex-19b06a781308-waspy-bunny"

#load required packages 
library(mc2d)
#> Loading required package: mvtnorm
#> 
#> Attaching package: 'mc2d'
#> The following objects are masked from 'package:base':
#> 
#>     pmax, pmin
library(gplots)
#> 
#> Attaching package: 'gplots'
#> The following object is masked from 'package:stats':
#> 
#>     lowess
library(RColorBrewer)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(reprex)
library(tidyverse)
set.seed(99)
iters<-1000

df<-data.frame(id=c(1:30),month=c(1:6,1,6,4,1,5,2,3,2,5,4,6,3:6,4:6,1:5,5),n=rpois(30,5))

df$n[df$n == "0"] <- 3
se<-rbeta(iters,96,6)
epi.a<-rpert(iters,min=1.5, mode=2, max=3)
p=0.2
p2=epi.a*p

##my try the idea is to loop over the months 

results<-data.frame(m1=numeric(iters))
results<-cbind(results,rep(results[1],5))
colnames(results)<-paste("m", sep = "_", 1:6)

for (j in 1:6) {
  for (i in 1:iters) {
    if (df$month[i]== "1")results$m_1[i]<- 1 - (1 - se[i] * p2[i])^df$n[i]
    else if (df$month[i]== "2")results$m_2[i]<- 1 - (1 - se[i] * p2[i])^df$n[i]
    else if(df$month[i]== "3")results$m_3[i]<- 1 - (1 - se[i] * p2[i])^df$n[i]
    else if(df$month[i]== "4")results$m_4[i]<- 1 - (1 - se[i] * p2[i])^df$n[i]
    else if(df$month[i]== "5")results$m_5[i]<- 1 - (1 - se[i] * p2[i])^df$n[i]
    else if(df$month[i]== "6")results$m_6[i]<- 1 - (1 - se[i] * p2[i])^df$n[i]
  }
  
}
#> Error in if (df$month[i] == "1") results$m_1[i] <- 1 - (1 - se[i] * p2[i])^df$n[i] else if (df$month[i] == : missing value where TRUE/FALSE needed

Created on 2022-05-04 by the reprex package (v2.0.1)



Solution 1:[1]

Using a for loop in R is very slow, because the interpreter can not use vectorization here. However, you can create a table of all possible combinations and calculate the result in a new column:

library(tidyverse)

expand_grid(
  j = seq(6),
  i = seq(10)
) %>%
  mutate(
    res = map2_dbl(j, i, ~ .x + .y**2)
  )
#> # A tibble: 60 × 3
#>        j     i   res
#>    <int> <int> <dbl>
#>  1     1     1     2
#>  2     1     2     5
#>  3     1     3    10
#>  4     1     4    17
#>  5     1     5    26
#>  6     1     6    37
#>  7     1     7    50
#>  8     1     8    65
#>  9     1     9    82
#> 10     1    10   101
#> # … with 50 more rows

Created on 2022-05-04 by the reprex package (v2.0.0)

Solution 2:[2]

Here is a way that avoids for loops. The calculations of p2 and m are vectorized and the results are output in a tidyverse pipe that reshapes those values, m, to wide format.

suppressPackageStartupMessages({
  library(mc2d)
  library(tidyverse)
})
set.seed(99)

df <- data.frame(
  id = 1:30,
  month = c(1:6, 1, 6, 4, 1, 5, 2, 3, 2, 5, 4, 6, 3:6, 4:6, 1:5, 5),
  n = rpois(30, 5)
)
iters <- nrow(df)

df$n[df$n == "0"] <- 3
se <- rbeta(iters, 96, 6)
epi.a <- rpert(iters, min = 1.5, mode = 2, max = 3)
p <- 0.2
p2 <- epi.a*p

m <- 1 - (1 - se * p2)^df$n
results <- data.frame(month = df$month, m)

results %>%
  arrange(month) %>%
  group_by(month) %>%
  mutate(n = row_number(), .groups = "drop") %>%
  pivot_wider(
    id_cols = n,
    names_from = month,
    names_glue = "m_{.name}",
    values_from = m
  ) %>%
  select(-n)
#> # A tibble: 7 × 6
#>      m_1    m_2    m_3    m_4   m_5    m_6
#>    <dbl>  <dbl>  <dbl>  <dbl> <dbl>  <dbl>
#> 1  0.970  0.623  0.905  0.998 0.929  0.980
#> 2  0.912  0.892  0.736  0.830 0.890  0.862
#> 3  0.795  0.932  0.553  0.958 0.931  0.798
#> 4  0.950  0.892  0.732  0.649 0.777  0.743
#> 5 NA     NA     NA      0.657 0.980  0.945
#> 6 NA     NA     NA      0.976 0.836 NA    
#> 7 NA     NA     NA     NA     0.740 NA

Created on 2022-05-04 by the reprex package (v2.0.1)

Edit

The code above can be rewritten as a function and then run the function iter times with replicate.

suppressPackageStartupMessages({
  library(mc2d)
  library(tidyverse)
})

sim_one <- function() {
  df <- data.frame(
    id = 1:30,
    month = c(1:6, 1, 6, 4, 1, 5, 2, 3, 2, 5, 4, 6, 3:6, 4:6, 1:5, 5),
    n = rpois(30, 5)
  )
  nr <- nrow(df)
  
  df$n[df$n == "0"] <- 3
  se <- rbeta(nr, 96, 6)
  epi.a <- rpert(nr, min = 1.5, mode = 2, max = 3)
  p <- 0.2
  p2 <- epi.a*p
  
  m <- 1 - (1 - se * p2)^df$n
  results <- data.frame(month = df$month, m)
  
  results %>%
    arrange(month) %>%
    group_by(month) %>%
    mutate(n = row_number(), .groups = "drop") %>%
    pivot_wider(
      id_cols = n,
      names_from = month,
      names_glue = "m_{.name}",
      values_from = m
    ) %>%
    select(-n)
}

Created on 2022-05-04 by the reprex package (v2.0.1)


Then run the function above, changing iters to the value you want, like this:

set.seed(99)

iters <- 2
sim_list <- replicate(iters, sim_one(), simplify = FALSE)

sim_list[[1]]
#> # A tibble: 7 × 6
#>      m_1    m_2    m_3    m_4   m_5    m_6
#>    <dbl>  <dbl>  <dbl>  <dbl> <dbl>  <dbl>
#> 1  0.970  0.623  0.905  0.998 0.929  0.980
#> 2  0.912  0.892  0.736  0.830 0.890  0.862
#> 3  0.795  0.932  0.553  0.958 0.931  0.798
#> 4  0.950  0.892  0.732  0.649 0.777  0.743
#> 5 NA     NA     NA      0.657 0.980  0.945
#> 6 NA     NA     NA      0.976 0.836 NA    
#> 7 NA     NA     NA     NA     0.740 NA

Created on 2022-05-04 by the reprex package (v2.0.1)

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1 danlooo
Solution 2