'Determining duration between lead/lag datetime stamps by group in R
I am trying to determine the time difference between lead and lag timestamps per UserId in my dataframe. Here is a mock example of the data that I am using:-
df<-structure(list(OrigTime = structure(c(1622918877.032, 1622990559.267,
1623523623.486, 1622964997.979, 1622906958.74, 1622926722.027,
1622995932.538, 1623007146.343, 1622852334.564, 1623060816.584,
1623088121.297, 1623106707.248, 1623157212.89, 1623193304.839,
1623274436.729, 1623043054.385, 1623538988.387, 1623005857.513,
1622910331.285, 1622896371.372, 1623578741.851, 1623587705.782,
1622962414.979, 1623537531.112, 1622903658.463, 1622919800.373,
1623141876.369, 1622853086.032, 1623160223.889, 1623550454.237,
1622977109.735, 1622897061.318, 1622905574.829, 1622975250.648,
1622926151.326, 1623051278.279, 1623155086.613, 1622903064.758,
1623548739.613, 1622926195.481, 1622909740.446, 1622886108.56,
1623571384.742, 1623605761.052, 1622993030.692, 1622970165.788,
1623011480.065, 1622988209.8, 1623322872.845, 1622920880.023,
1623086821.623, 1623612400.05, 1622999424.629, 1622884467.164,
1622909027.54, 1623357575.019, 1623520403.778, 1623506301.231,
1623500077.499, 1622940184.402, 1623495635.381, 1623546311.154,
1623160176.129, 1623346026.209, 1623536993.306, 1622909817.164,
1623530930.965, 1622877595.896, 1622935176.086, 1622966029.777,
1623000495.258, 1623143927.116, 1623174826.058, 1623251814.229,
1623334538.838, 1623340615.546, 1623583357.417, 1622888019.697,
1622987815.488, 1622985968.726, 1622992132.955, 1623074972.982,
1623163375.997, 1622963144.848, 1623314383.152, 1623338313.831,
1623551534.787, 1623309944.257, 1622976858.145, 1622917035.885,
1623004832.589, 1623503830.715, 1622909067.695, 1623237521.393,
1623560279.459, 1623536321.051, 1623519172.633, 1622904213.48,
1622930692.598, 1623534517.344), class = c("POSIXct", "POSIXt"
), tzone = ""),
LastTime = structure(c(1622920407.314, 1622992017.446,
1623523816.193, 1622967138.074, 1622908479.07, 1622929803.083,
1622998806.578, 1623009559.964, 1622852937.119, 1623060829.114,
1623088500.114, 1623107957.757, 1623160460.245, 1623196568.13,
1623277115.719, 1623043075.018, 1623541056.552, 1623009099.355,
1622912024.838, 1622896604.061, 1623581281.396, 1623587723.349,
1622964263.202, 1623540285.881, 1622904906.681, 1622920279.246,
1623145192.007, 1622853411.776, 1623161274.767, 1623552259.907,
1622979765.947, 1622900302.79, 1622908080.385, 1622977216.257,
1622926569.129, 1623052916.319, 1623157846.969, 1622906004.01,
1623550464.577, 1622926894.527, 1622909784.953, 1622888260.143,
1623574636.082, 1623608199.766, 1622993135.284, 1622972841.446,
1623011795.668, 1622991602.355, 1623325007.005, 1622920971.11,
1623089131.737, 1623615902.665, 1623001797.927, 1622884502.675,
1622909092.535, 1623358953.803, 1623522069.405, 1623506926.633,
1623500257.459, 1622943093.707, 1623496149.811, 1623546362.939,
1623161795.166, 1623346301.719, 1623538246.607, 1622910287.546,
1623531717.106, 1622878786.985, 1622935344.171, 1622967457.633,
1623003649.624, 1623146488.585, 1623175049.705, 1623254364.036,
1623337920.846, 1623343354.911, 1623584490.558, 1622890693.611,
1622990339.013, 1622988379.946, 1622993619.888, 1623077241.267,
1623164370.961, 1622963916.184, 1623314543.741, 1623340169.331,
1623551844.722, 1623309979.964, 1622976999.899, 1622918705.036,
1623007011.831, 1623506533.509, 1622910346.854, 1623237991.188,
1623563695.799, 1623537537.925, 1623520857.21, 1622904770.506,
1622931363.125, 1623535224.247), class = c("POSIXct", "POSIXt"), tzone = ""),
Count = c(1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 1L, 1L, 3L, 1L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 3L, 1L, 4L, 1L, 1L, 1L, 1L, 1L),
Date = structure(c(18783,18784, 18790, 18784, 18783, 18783, 18784, 18784, 18783, 18785,
18785, 18785, 18786, 18787, 18787, 18785, 18791, 18784, 18783,
18783, 18791, 18791, 18784, 18790, 18783, 18783, 18786, 18783,
18786, 18791, 18784, 18783, 18783, 18784, 18783, 18785, 18786,
18783, 18791, 18783, 18783, 18783, 18791, 18791, 18784, 18784,
18784, 18784, 18788, 18783, 18785, 18791, 18784, 18783, 18783,
18788, 18790, 18790, 18790, 18784, 18790, 18791, 18786, 18788,
18790, 18783, 18790, 18783, 18784, 18784, 18784, 18786, 18786,
18787, 18788, 18788, 18791, 18783, 18784, 18784, 18784, 18785,
18786, 18784, 18788, 18788, 18791, 18788, 18784, 18783, 18784,
18790, 18783, 18787, 18791, 18790, 18790, 18783, 18783, 18790
), class = "Date"),
UserId = c("853a97958b", "5697a38454", "c634a20759","ccf82840cd", "c083d8b935", "0410501e6b", "44c7c67c09", "9df7bcac8a",
"83db724191", "ebebd2505c", "9718c1ac40", "70e04f7a77", "8d6640a636",
"5ae2fb243b", "ebebd2505c", "7183f894ba", "83db724191", "0587f9f73b",
"875b5bcf85", "b1e4fc6c16", "5697a38454", "6cd922ee02", "503297a15f",
"def09e5b6a", "c083d8b935", "b7a17366f5", "44c7c67c09", "ee96fb2ea5",
"1053a213ea", "44c7c67c09", "5ae2fb243b", "e34de6863b", "7cf03078e8",
"bdfe5fe4d0", "bdfe5fe4d0", "c4c834065b", "36d0a2a630", "acfef8373f",
"ccf82840cd", "708885c8e0", "a1ce5e9964", "a5d5a264b4", "def09e5b6a",
"779cb9e811", "b1e4fc6c16", "c083d8b935", "a5d5a264b4", "5ae2fb243b",
"8d6a062f0f", "3e7700d63e", "9df7bcac8a", "44c7c67c09", "dfb1730f71",
"4646f53bd2", "c58f3eea85", "70e04f7a77", "de91cac214", "def09e5b6a",
"4646f53bd2", "875b5bcf85", "70e04f7a77", "65a2416cbc", "b3896de6fe",
"efe44e7d92", "44c7c67c09", "ebebd2505c", "0587f9f73b", "0393eacfee",
"8fccf03fc1", "83db724191", "de91cac214", "9718c1ac40", "779cb9e811",
"49f14ca03c", "0410501e6b", "a5d5a264b4", "e2c5bb55c9", "a1ce5e9964",
"3e7700d63e", "875b5bcf85", "70e04f7a77", "dfb1730f71", "46e37e426c",
"708885c8e0", "875b5bcf85", "e2c5bb55c9", "1053a213ea", "7cf03078e8",
"50da214747", "7cf03078e8", "0410501e6b", "779cb9e811", "9df7bcac8a",
"5697a38454", "ddf466ccaa", "c083d8b935", "3e7700d63e", "def09e5b6a",
"f52c20cd44", "5ae2fb243b")), row.names = c(NA, 100L), class = "data.frame")
library(tidyverse)
df<-df%>%
arrange(UserId,OrigTime)
What I want is to be able to group the data by UserId and then calculate the difference (minutes) in time between the LastTime in one row and the OrigTime in the following row. Here is an example of the type of outcome I am after, using one UserId as an example:-
#5697a38454
#OrigTime LastTime Count Date UserId duration_min
#1 2021-06-06 15:42:39 2021-06-06 16:06:57 1 2021-06-06 5697a38454 NA
#2 2021-06-09 12:18:41 2021-06-09 12:26:31 1 2021-06-09 5697a38454 4091.73
#3 2021-06-13 11:05:41 2021-06-13 11:48:01 3 2021-06-13 5697a38454 6399.17
For the first instance per UserId, the time difference will be NA, as there is no previous LastTime to base a calculation of. Should a particular UserId only have one row/instance, then the duration_min variable should be NA also. Can anyone demonstrate a way to do this? much appreciated :)
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|
