'Group classification considering the state and date of data with other states in the middle in R
'''library(tidyverse)'''
df=structure(list(ID = c(348L, 348L, 348L, 348L, 348L, 348L, 348L, 348L, 348L, 348L, 348L, 348L, 348L, 348L, 348L, 533L, 533L, 533L,533L, 533L, 533L, 586L, 902L, 902L, 902L, 902L, 902L, 902L, 902L, 1064L, 1064L, 1064L, 1064L, 1064L, 1064L, 1064L, 1064L, 1064L,1064L, 1064L, 1064L, 1064L, 1064L, 1064L, 1064L, 1309L, 1736L, 1736L, 1736L, 1736L), date = c(0, 0.13, 1.71, 2.01, 3.25, 3.95, 5.13, 6.61, 7.23, 8.22, 9.04, 10.06, 11.18, 12.03, 13.18, 0, 0.39, 2.99, 6.21, 9.44, 12.43, 0.1, 0.23, 1.15, 2.07, 2.99, 3.75,4.67, 5.06, 0, 0.36, 1.35, 2.27, 3.42, 4.41, 5.52, 6.44, 7.4, 8.28, 9.44, 10.36, 11.24, 12.2, 13.35, 14.27, 0, 0, 1.58, 5.23, 6.15), state = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 5L, 4L, 5L, 5L, 4L, 5L, 3L, 3L, 4L, 4L, 3L, 3L, 3L, 4L, 3L, 3L, 3L, 4L, 5L, 4L, 5L, 5L, 6L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 3L, 5L, 4L, 5L, 6L), .Label = c("0", "1", "2", "3", "4", "5", "6", "7"), class = "factor")), class = "data.frame", row.names = c(NA, -50L))
I tried to calculate the interval, which is the period of staying in the state for each ID.
'''df %>% arrange(ID,date,state) %>% group_by(ID,state,first(date)) %>% mutate(firstdate_nextstate=lead(date,1),ID_lead=lead(ID,1)) %>% filter(ID==ID_lead) %>% mutate(interval=firstdate_nextstate-first(date))'''
Output
structure(list(ID = c(348L, 348L, 348L, 348L, 348L, 348L, 348L, 348L, 348L, 348L, 348L, 348L, 348L, 533L, 533L, 533L, 533L, 902L, 902L, 902L, 902L, 902L, 1064L, 1064L, 1064L, 1064L, 1064L, 1064L, 1064L, 1064L, 1064L, 1064L, 1064L, 1064L, 1736L), date = c(0, 0.13, 1.71, 2.01, 3.25, 3.95, 5.13, 6.61, 7.23, 8.22, 9.04, 10.06, 11.18, 0, 0.39, 2.99, 6.21, 0.23, 1.15, 2.07, 2.99, 3.75, 0, 0.36, 2.27, 3.42, 5.52, 6.44, 7.4, 9.44, 10.36, 11.24, 12.2, 13.35, 0), state = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 5L, 4L, 5L, 5L, 3L, 4L, 4L, 3L, 3L, 3L, 3L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 5L), .Label = c("0", "1", "2", "3", "4", "5", "6", "7"), class = "factor"), `first(date)` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), firstdate_nextstate = c(0.13, 1.71, 2.01, 3.25, 3.95, 5.13, 6.61, 7.23, 8.22, 9.04, 10.06, 11.18, 13.18, 2.99, 9.44, 6.21, 12.43, 2.99, 2.07, 5.06, 3.75, 4.67, 0.36, 1.35, 4.41, 5.52, 6.44, 8.28, 9.44, 10.36, 11.24, 12.2, 13.35, 14.27, 5.23), ID_lead = c(348L, 348L, 348L, 348L, 348L, 348L, 348L, 348L, 348L, 348L, 348L, 348L, 348L, 533L, 533L, 533L, 533L, 902L, 902L, 902L, 902L, 902L, 1064L, 1064L, 1064L, 1064L, 1064L, 1064L, 1064L, 1064L, 1064L, 1064L, 1064L, 1064L, 1736L), interval = c(0.13, 1.71, 2.01, 3.25, 3.95, 5.13, 6.61, 7.23, 8.22, 9.04, 10.06, 11.18, 13.18, 2.99, 9.05, 6.21, 12.43, 2.76, 0.92, 3.91, 3.52, 4.44, 0.36, 1.35, 2.14, 2.1, 3.02, 4.86, 2.04, 2.96, 3.84, 4.8, 5.95, 6.87, 5.23)), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), row.names = c(NA, -35L), groups = structure(list( ID = c(348L, 533L, 533L, 902L, 902L, 1064L, 1064L, 1064L, 1064L, 1736L), state = structure(c(2L, 4L, 5L, 3L, 4L, 3L, 4L, 5L, 6L, 5L), .Label = c("0", "1", "2", "3", "4", "5", "6", "7"), class = "factor"), `first(date)` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), .rows = structure(list(1:13, 15L, c(14L, 16L, 17L), c(18L, 21L, 22L), 19:20, 23:24, 25L, 26:28, 29:34, 35L), ptype = integer(0), class = c("vctrs_list_of", "vctrs_vctr", "list"))), row.names = c(NA, -10L), class = c("tbl_df", "tbl", "data.frame"), .drop = TRUE))
- However, the results were not what I expected. I want to calculate the interval, which is the difference between date by ID and state, but consider the state change according to the order of time to find the interval.
- For example, the interval of state 1 of ID 348 is 12.03-0.00,
the interval of state 2 is 13.18-12.03, the interval of state 4 of
ID 533 is 0.39-0.00 for date 0.00, 9.44-2.99 for date 6.21, and the interval of state 3 is 2.99-0.39 for date 0.39 and 12.43-9.44 for state 9.44. Here are the expected results.
Expected
How can I do this?
Please let me know.
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|
