'Grouping dataframe by columns and creating sequential ID to aggregate the data further
I have a dataset with the following structure. Each Name can have multiple V_IDs and the records within V_ID are ordered by time. Some records have a mark on them.
I want to create a sequential ID, which iterates within each Name-V_ID group, and increments at a new marked observation. While also maintaining a cumulative count of observations in each group. THe output should look something like below.
The input and output data structures are given below. I am happy to do the actual code by myself, any guidance on the approach I could follow using python is appreciated.
input_data <-
structure(
list(
Name = c(
"m91",
"m91",
"m91",
"m91",
"m91",
"m91",
"m91",
"m91",
"m91",
"m91",
"m91",
"m91",
"m91",
"m91",
"m91",
"m91",
"m91",
"m91",
"m91",
"m91",
"m91",
"m91",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
NA,
NA,
NA,
NA,
NA,
NA
),
V_ID = c(
13434,
13434,
13434,
13434,
13434,
13434,
13434,
13434,
13434,
13434,
13434,
13434,
13434,
13445,
13445,
13445,
13445,
13445,
13445,
13445,
13445,
13445,
13642,
13642,
13642,
13642,
13642,
13642,
13642,
13642,
13642,
13642,
13723,
13723,
13723,
13723,
13723,
13723,
13723,
13723,
13723,
13723,
13723,
13723,
13723,
13723,
13723,
13723,
13723,
13723,
13723,
13723,
13723,
13723,
13769,
13769,
13769,
13769,
13769,
13769,
13769,
13769,
13769,
13769,
NA,
NA,
NA,
NA,
NA,
NA
),
Time = structure(
c(
NA,
59700,
55620,
53040,
46800,
42840,
40080,
36180,
33000,
31020,
28800,
25680,
21720,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
1260,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
35100,
36060,
36900,
38340,
39600,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
1020,
3720,
5400,
35100,
36060,
36900,
38340,
39600,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA
),
class = c("hms", "difftime"),
units = "secs"
),
Marked = c(
NA,
NA,
NA,
"brust",
NA,
NA,
NA,
NA,
NA,
"hust",
NA,
NA,
NA,
NA,
NA,
"crust",
NA,
NA,
NA,
NA,
NA,
"trust",
NA,
NA,
NA,
NA,
"plast",
NA,
NA,
NA,
"must",
NA,
NA,
NA,
NA,
NA,
"trust",
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
"crust",
NA,
NA,
NA,
NA,
NA,
NA,
"last",
NA,
NA,
NA,
NA,
"hust",
NA,
NA,
"clust",
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA
)
),
row.names = c(NA,-70L),
spec = structure(
list(
cols = list(
Name = structure(list(), class = c("collector_character",
"collector")),
V_ID = structure(list(), class = c("collector_double",
"collector")), Time = structure(list(format = ""), class = c("collector_time",
data_output <-
structure(
list(
Name = c(
"m91",
"m91",
"m91",
"m91",
"m91",
"m91",
"m91",
"m91",
"m91",
"m91",
"m91",
"m91",
"m91",
"m91",
"m91",
"m91",
"m91",
"m91",
"m91",
"m91",
"m91",
"m91",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
"m92",
NA,
NA,
NA,
NA,
NA,
NA
),
V_ID = c(
13434,
13434,
13434,
13434,
13434,
13434,
13434,
13434,
13434,
13434,
13434,
13434,
13434,
13445,
13445,
13445,
13445,
13445,
13445,
13445,
13445,
13445,
13642,
13642,
13642,
13642,
13642,
13642,
13642,
13642,
13642,
13642,
13723,
13723,
13723,
13723,
13723,
13723,
13723,
13723,
13723,
13723,
13723,
13723,
13723,
13723,
13723,
13723,
13723,
13723,
13723,
13723,
13723,
13723,
13769,
13769,
13769,
13769,
13769,
13769,
13769,
13769,
13769,
13769,
NA,
NA,
NA,
NA,
NA,
NA
),
Time = structure(
c(
NA,
59700,
55620,
53040,
46800,
42840,
40080,
36180,
33000,
31020,
28800,
25680,
21720,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
1260,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
35100,
36060,
36900,
38340,
39600,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
1020,
3720,
5400,
35100,
36060,
36900,
38340,
39600,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA
),
class = c("hms", "difftime"),
units = "secs"
),
Marked = c(
NA,
NA,
NA,
"brust",
NA,
NA,
NA,
NA,
NA,
"hust",
NA,
NA,
NA,
NA,
NA,
"crust",
NA,
NA,
NA,
NA,
NA,
"trust",
NA,
NA,
NA,
NA,
"plast",
NA,
NA,
NA,
"must",
NA,
NA,
NA,
NA,
NA,
"trust",
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
"crust",
NA,
NA,
NA,
NA,
NA,
NA,
"last",
NA,
NA,
NA,
NA,
"hust",
NA,
NA,
"clust",
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA
),
index = c(
1,
1,
1,
1,
2,
2,
2,
2,
2,
2,
3,
3,
3,
1,
1,
1,
2,
2,
2,
2,
2,
2,
1,
1,
1,
1,
1,
2,
2,
2,
2,
3,
1,
1,
1,
1,
1,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
3,
3,
3,
3,
3,
3,
3,
1,
1,
1,
1,
1,
2,
2,
2,
3,
3,
NA,
NA,
NA,
NA,
NA,
NA
),
Cumulative_count = c(
1,
2,
3,
4,
1,
2,
3,
4,
5,
6,
1,
2,
3,
1,
2,
3,
4,
5,
6,
7,
8,
9,
1,
2,
3,
4,
5,
1,
2,
3,
4,
5,
1,
2,
3,
4,
5,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
1,
2,
3,
4,
5,
6,
7,
1,
2,
3,
4,
5,
1,
2,
3,
1,
2,
NA,
NA,
NA,
NA,
NA,
NA
)
),
row.names = c(NA,-70L),
spec = structure(list(
cols = list(
Name = structure(list(), class = c("collector_character",
"collector")),
V_ID = structure(list(), class = c("collector_double",
"collector")),
Time = structure(list(format = ""), class = c("collector_time",
"collector")),
Marked = structure(list(), class = c("collector_character",
"collector")),
index = structure(list(), class = c("collector_double",
"collector")),
Cumulative_count = structure(list(), class = c("collector_double",
"collector"))
),
default = structure(list(), class = c("collector_guess",
"collector")),
delim = ","
), class = "col_spec"),
problems = < pointer:0x0000019b44716e20 > ,
class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame")
)
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|


