'How correct convert date format in integer format in R

df1=structure(list(date = c("22.04.2022", "22.04.2022", "22.04.2022", 
"22.04.2022", "23.04.2022", "23.04.2022", "23.04.2022", "24.04.2022", 
"24.04.2022", "24.04.2022"), d1 = c("8:00:00", "10:00:00", "12:00:00", 
"12:00:00", "10:00:00", "12:00:00", "12:00:00", "10:00:00", "12:00:00", 
"12:00:00"), d2 = c("10:00:00", "20:00:00", "22:00:00", "22:00:00", 
"20:00:00", "22:00:00", "22:00:00", "20:00:00", "22:00:00", "22:00:00"
)), class = "data.frame", row.names = c(NA, -10L))

here 3 columns with date format, all of them i need convert to integer, so that desired output will be

date         d1          d2         date1      d1_1 d2_1
22.04.2022  8:00:00     10:00:00    20220422    8   10
22.04.2022  10:00:00    20:00:00    20220422    10  20
22.04.2022  12:00:00    22:00:00    20220422    12  22
22.04.2022  12:00:00    22:00:00    20220422    12  22
23.04.2022  10:00:00    20:00:00    20220423    10  20
23.04.2022  12:00:00    22:00:00    20220423    12  22
23.04.2022  12:00:00    22:00:00    20220423    12  22
24.04.2022  10:00:00    20:00:00    20220424    10  20
24.04.2022  12:00:00    22:00:00    20220424    12  22
24.04.2022  12:00:00    22:00:00    20220424    12  22

where date1 d1_1 d2 are integer from date d1 d2

How can it can be done better? thank you.

r


Solution 1:[1]

A possible solution:

library(tidyverse)
library(lubridate)

df1 %>% 
  mutate(date1 = dmy(date) %>% str_remove_all("-"),
         across(matches("d[1-2]"), ~ str_extract(.x, "^\\d+(?=\\:)"),
                .names = "{.col}_1"))

#>          date       d1       d2    date1 d1_1 d2_1
#> 1  22.04.2022  8:00:00 10:00:00 20220422    8   10
#> 2  22.04.2022 10:00:00 20:00:00 20220422   10   20
#> 3  22.04.2022 12:00:00 22:00:00 20220422   12   22
#> 4  22.04.2022 12:00:00 22:00:00 20220422   12   22
#> 5  23.04.2022 10:00:00 20:00:00 20220423   10   20
#> 6  23.04.2022 12:00:00 22:00:00 20220423   12   22
#> 7  23.04.2022 12:00:00 22:00:00 20220423   12   22
#> 8  24.04.2022 10:00:00 20:00:00 20220424   10   20
#> 9  24.04.2022 12:00:00 22:00:00 20220424   12   22
#> 10 24.04.2022 12:00:00 22:00:00 20220424   12   22

Solution 2:[2]

We may also convert to Date class and use format as well as use hour to extract the hour part

library(dplyr)
library(lubridate)
df1 %>%
   mutate(date1 = format(dmy(date), '%Y%m%d'),
     across(d1:d2, ~ hour(hms(.x)), .names = "{.col}_1"))

-output

         date       d1       d2    date1 d1_1 d2_1
1  22.04.2022  8:00:00 10:00:00 20220422    8   10
2  22.04.2022 10:00:00 20:00:00 20220422   10   20
3  22.04.2022 12:00:00 22:00:00 20220422   12   22
4  22.04.2022 12:00:00 22:00:00 20220422   12   22
5  23.04.2022 10:00:00 20:00:00 20220423   10   20
6  23.04.2022 12:00:00 22:00:00 20220423   12   22
7  23.04.2022 12:00:00 22:00:00 20220423   12   22
8  24.04.2022 10:00:00 20:00:00 20220424   10   20
9  24.04.2022 12:00:00 22:00:00 20220424   12   22
10 24.04.2022 12:00:00 22:00:00 20220424   12   22

Solution 3:[3]

In base using sub:

df1$date1 <- sub("(\\d+)\\.(\\d+)\\.(\\d+)", "\\3\\2\\1", df1$date)
df1[c("d1_1", "d2_2")] <- lapply(df1[c("d1", "d2")], \(x) sub(":.*", "", x))

Using as.Date and difftime:

df1$date1 <- gsub("-", "", as.Date(df1$date, "%d.%m.%Y"))
df1[c("d1_1", "d2_2")] <- lapply(df1[c("d1", "d2")],
                                  \(x) as.integer(as.difftime(x, unit="hour")))

Result:

df1
#         date       d1       d2    date1 d1_1 d2_2
#1  22.04.2022  8:00:00 10:00:00 20220422    8   10
#2  22.04.2022 10:00:00 20:00:00 20220422   10   20
#3  22.04.2022 12:00:00 22:00:00 20220422   12   22
#4  22.04.2022 12:00:00 22:00:00 20220422   12   22
#5  23.04.2022 10:00:00 20:00:00 20220423   10   20
#6  23.04.2022 12:00:00 22:00:00 20220423   12   22
#7  23.04.2022 12:00:00 22:00:00 20220423   12   22
#8  24.04.2022 10:00:00 20:00:00 20220424   10   20
#9  24.04.2022 12:00:00 22:00:00 20220424   12   22
#10 24.04.2022 12:00:00 22:00:00 20220424   12   22

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1
Solution 2 akrun
Solution 3 GKi