'How to add columns for animal passage in R
I am trying to summarize our detection data in a way that I can easily see when an animal moves from one pool to another. Here is an example of one animal that I track
tibble [22 x 13] (S3: tbl_df/tbl/data.frame)
$ Receiver : chr [1:22] "VR2Tx-480679" "VR2Tx-480690" "VR2Tx-480690" "VR2Tx-480690" ...
$ Transmitter : chr [1:22] "A69-9001-12418" "A69-9001-12418" "A69-9001-12418" "A69-9001-12418" ...
$ Species : chr [1:22] "PDFH" "PDFH" "PDFH" "PDFH" ...
$ LocalDATETIME: POSIXct[1:22], format: "2021-05-28 07:16:52" ...
$ StationName : chr [1:22] "1405U" "1406U" "1406U" "1406U" ...
$ LengthValue : num [1:22] 805 805 805 805 805 805 805 805 805 805 ...
$ WeightValue : num [1:22] 8.04 8.04 8.04 8.04 8.04 8.04 8.04 8.04 8.04 8.04 ...
$ Sex : chr [1:22] "NA" "NA" "NA" "NA" ...
$ Translocated : num [1:22] 0 0 0 0 0 0 0 0 0 0 ...
$ Pool : num [1:22] 16 16 16 16 16 16 16 16 16 16 ...
$ DeployDate : POSIXct[1:22], format: "2018-06-05" ...
$ Latitude : num [1:22] 41.6 41.6 41.6 41.6 41.6 ...
$ Longitude : num [1:22] -90.4 -90.4 -90.4 -90.4 -90.4 ...
I want to add columns that would allow me to summarize this data in a way that I would have the start date of when an animal was in a pool and when the animal moved to a different pool it would have the end date of when it exits.
Ex: Enters Pool 19 on 1/1/22, next detected in Pool 20 on 1/2/22, so there would be columns that say fish entered and exited P
ool 19 on 1/1/22 and 1/2/22. I have shared an Excel file example of what I am trying to do. I would like to code upstream movement with a 1 and downstream movement with 0.
I have millions of detections and hundreds of animals that I monitor so I am trying to find a way to look at passages for each animal. Thank you!
Here is my dataset using dput:
structure(list(Receiver = c("VR2Tx-480679", "VR2Tx-480690", "VR2Tx-480690",
"VR2Tx-480690", "VR2Tx-480690", "VR2Tx-480690", "VR2Tx-480690",
"VR2Tx-480690", "VR2Tx-480690", "VR2Tx-480690", "VR2Tx-480690",
"VR2Tx-480690", "VR2Tx-480690", "VR2Tx-480692", "VR2Tx-480695",
"VR2Tx-480695", "VR2Tx-480713", "VR2Tx-480713", "VR2Tx-480702",
"VR100", "VR100", "VR100"), Transmitter = c("A69-9001-12418",
"A69-9001-12418", "A69-9001-12418", "A69-9001-12418", "A69-9001-12418",
"A69-9001-12418", "A69-9001-12418", "A69-9001-12418", "A69-9001-12418",
"A69-9001-12418", "A69-9001-12418", "A69-9001-12418", "A69-9001-12418",
"A69-9001-12418", "A69-9001-12418", "A69-9001-12418", "A69-9001-12418",
"A69-9001-12418", "A69-9001-12418", "A69-9001-12418", "A69-9001-12418",
"A69-9001-12418"), Species = c("PDFH", "PDFH", "PDFH", "PDFH",
"PDFH", "PDFH", "PDFH", "PDFH", "PDFH", "PDFH", "PDFH", "PDFH",
"PDFH", "PDFH", "PDFH", "PDFH", "PDFH", "PDFH", "PDFH", "PDFH",
"PDFH", "PDFH"), LocalDATETIME = structure(c(1622186212, 1622381700,
1622384575, 1622184711, 1622381515, 1622381618, 1622381751, 1622381924,
1622382679, 1622383493, 1622384038, 1622384612, 1622183957, 1622381515,
1626905954, 1626905688, 1622971975, 1622970684, 1626929618, 1624616880,
1626084540, 1626954660), tzone = "UTC", class = c("POSIXct",
"POSIXt")), StationName = c("1405U", "1406U", "1406U", "1406U",
"1406U", "1406U", "1406U", "1406U", "1406U", "1406U", "1406U",
"1406U", "1406U", "1404L", "1401D", "1401D", "14Aux2", "14Aux2",
"15.Mid.Wall", "man_loc", "man_loc", "man_loc"), LengthValue = c(805,
805, 805, 805, 805, 805, 805, 805, 805, 805, 805, 805, 805, 805,
805, 805, 805, 805, 805, 805, 805, 805), WeightValue = c(8.04,
8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04,
8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04),
Sex = c("NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA",
"NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA",
"NA", "NA", "NA"), Translocated = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Pool = c(16,
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 16, 14, 14, 16), DeployDate = structure(c(1528156800,
1528156800, 1528156800, 1528156800, 1528156800, 1528156800,
1528156800, 1528156800, 1528156800, 1528156800, 1528156800,
1528156800, 1528156800, 1528156800, 1528156800, 1528156800,
1528156800, 1528156800, 1528156800, 1528156800, 1528156800,
1528156800), tzone = "UTC", class = c("POSIXct", "POSIXt"
)), Latitude = c(41.57471, 41.5758, 41.5758, 41.5758, 41.5758,
41.5758, 41.5758, 41.5758, 41.5758, 41.5758, 41.5758, 41.5758,
41.5758, 41.57463, 41.5731, 41.5731, 41.57469, 41.57469,
41.57469, 41.57469, 41.57469, 41.57469), Longitude = c(-90.39944,
-90.39793, -90.39793, -90.39793, -90.39793, -90.39793, -90.39793,
-90.39793, -90.39793, -90.39793, -90.39793, -90.39793, -90.39793,
-90.39984, -90.40391, -90.40391, -90.40462, -90.40462, -90.40462,
-90.40462, -90.40462, -90.40462)), row.names = c(NA, -22L
), class = c("tbl_df", "tbl", "data.frame"))
> dput(T12418)
structure(list(Receiver = c("VR2Tx-480679", "VR2Tx-480690", "VR2Tx-480690",
"VR2Tx-480690", "VR2Tx-480690", "VR2Tx-480690", "VR2Tx-480690",
"VR2Tx-480690", "VR2Tx-480690", "VR2Tx-480690", "VR2Tx-480690",
"VR2Tx-480690", "VR2Tx-480690", "VR2Tx-480692", "VR2Tx-480695",
"VR2Tx-480695", "VR2Tx-480713", "VR2Tx-480713", "VR2Tx-480702",
"VR100", "VR100", "VR100"), Transmitter = c("A69-9001-12418",
"A69-9001-12418", "A69-9001-12418", "A69-9001-12418", "A69-9001-12418",
"A69-9001-12418", "A69-9001-12418", "A69-9001-12418", "A69-9001-12418",
"A69-9001-12418", "A69-9001-12418", "A69-9001-12418", "A69-9001-12418",
"A69-9001-12418", "A69-9001-12418", "A69-9001-12418", "A69-9001-12418",
"A69-9001-12418", "A69-9001-12418", "A69-9001-12418", "A69-9001-12418",
"A69-9001-12418"), Species = c("PDFH", "PDFH", "PDFH", "PDFH",
"PDFH", "PDFH", "PDFH", "PDFH", "PDFH", "PDFH", "PDFH", "PDFH",
"PDFH", "PDFH", "PDFH", "PDFH", "PDFH", "PDFH", "PDFH", "PDFH",
"PDFH", "PDFH"), LocalDATETIME = structure(c(1622186212, 1622381700,
1622384575, 1622184711, 1622381515, 1622381618, 1622381751, 1622381924,
1622382679, 1622383493, 1622384038, 1622384612, 1622183957, 1622381515,
1626905954, 1626905688, 1622971975, 1622970684, 1626929618, 1624616880,
1626084540, 1626954660), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
StationName = c("1405U", "1406U", "1406U", "1406U", "1406U",
"1406U", "1406U", "1406U", "1406U", "1406U", "1406U", "1406U",
"1406U", "1404L", "1401D", "1401D", "14Aux2", "14Aux2", "15.Mid.Wall",
"man_loc", "man_loc", "man_loc"), LengthValue = c(805, 805,
805, 805, 805, 805, 805, 805, 805, 805, 805, 805, 805, 805,
805, 805, 805, 805, 805, 805, 805, 805), WeightValue = c(8.04,
8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04,
8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04,
8.04), Sex = c("NA", "NA", "NA", "NA", "NA", "NA", "NA",
"NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA",
"NA", "NA", "NA", "NA", "NA"), Translocated = c(0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
Pool = c(16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 16, 16, 16, 16, 16, 14, 14, 16), DeployDate = structure(c(1528156800,
1528156800, 1528156800, 1528156800, 1528156800, 1528156800,
1528156800, 1528156800, 1528156800, 1528156800, 1528156800,
1528156800, 1528156800, 1528156800, 1528156800, 1528156800,
1528156800, 1528156800, 1528156800, 1528156800, 1528156800,
1528156800), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
Latitude = c(41.57471, 41.5758, 41.5758, 41.5758, 41.5758,
41.5758, 41.5758, 41.5758, 41.5758, 41.5758, 41.5758, 41.5758,
41.5758, 41.57463, 41.5731, 41.5731, 41.57469, 41.57469,
41.57469, 41.57469, 41.57469, 41.57469), Longitude = c(-90.39944,
-90.39793, -90.39793, -90.39793, -90.39793, -90.39793, -90.39793,
-90.39793, -90.39793, -90.39793, -90.39793, -90.39793, -90.39793,
-90.39984, -90.40391, -90.40391, -90.40462, -90.40462, -90.40462,
-90.40462, -90.40462, -90.40462)), class = c("tbl_df", "tbl",
"data.frame"), row.names = c(NA, -22L))
Solution 1:[1]
Here is one possibility for getting the beginning date for entering a pool and ending date for leaving a pool. First, I group by Species (could also add additional grouping variables to distinguish between specimens) and arrange by the time. Then, I look for any changes to the Pool using cumsum. Then, I pull the first date recorded for the pool as the the date that they entered the pool. Then, I do some grouping and ungrouping to grab the date from the next group (i.e., the date the species left the pool) and then copy that date for the whole group. For determining upstream/downstream, we can use case_when inside of mutate. I'm also assuming that you want this to match the date, so I have filled in the values for each group with the movement for pool change.
library(tidyverse)
df_dates <- df %>%
group_by(Species, Transmitter) %>%
arrange(Species, Transmitter, LocalDATETIME) %>%
mutate(changeGroup = cumsum(Pool != lag(Pool, default = -1))) %>%
group_by(Species, Transmitter, changeGroup) %>%
mutate(EnterPool = first(format(as.Date(LocalDATETIME), "%m/%d/%Y"))) %>%
ungroup(changeGroup) %>%
mutate(LeftPool = lead(EnterPool)) %>%
group_by(Species, Transmitter, changeGroup) %>%
mutate(LeftPool = last(LeftPool)) %>%
ungroup(changeGroup) %>%
mutate(stream = case_when((Pool - lag(Pool)) > 0 ~ 0,
(Pool - lag(Pool)) < 0 ~ 1)) %>%
fill(stream, .direction = "down")
Output
print(as_tibble(df_dates[1:24, c(1:5, 10:17)]), n=24)
# A tibble: 24 × 13
Receiver Transmitter Species LocalDATETIME StationName Pool DeployDate Latitude Longitude changeGroup EnterPool LeftPool stream
<chr> <chr> <chr> <dttm> <chr> <dbl> <dttm> <dbl> <dbl> <int> <chr> <chr> <dbl>
1 VR2Tx-480690 A69-9001-12418 PDFH 2021-05-28 06:39:17 1406U 16 2018-06-05 00:00:00 41.6 -90.4 1 05/28/2021 06/25/2021 NA
2 VR2Tx-480690 A69-9001-12418 PDFH 2021-05-28 06:51:51 1406U 16 2018-06-05 00:00:00 41.6 -90.4 1 05/28/2021 06/25/2021 NA
3 VR2Tx-480679 A69-9001-12418 PDFH 2021-05-28 07:16:52 1405U 16 2018-06-05 00:00:00 41.6 -90.4 1 05/28/2021 06/25/2021 NA
4 VR2Tx-480690 A69-9001-12418 PDFH 2021-05-30 13:31:55 1406U 16 2018-06-05 00:00:00 41.6 -90.4 1 05/28/2021 06/25/2021 NA
5 VR2Tx-480692 A69-9001-12418 PDFH 2021-05-30 13:31:55 1404L 16 2018-06-05 00:00:00 41.6 -90.4 1 05/28/2021 06/25/2021 NA
6 VR2Tx-480690 A69-9001-12418 PDFH 2021-05-30 13:33:38 1406U 16 2018-06-05 00:00:00 41.6 -90.4 1 05/28/2021 06/25/2021 NA
7 VR2Tx-480690 A69-9001-12418 PDFH 2021-05-30 13:35:00 1406U 16 2018-06-05 00:00:00 41.6 -90.4 1 05/28/2021 06/25/2021 NA
8 VR2Tx-480690 A69-9001-12418 PDFH 2021-05-30 13:35:51 1406U 16 2018-06-05 00:00:00 41.6 -90.4 1 05/28/2021 06/25/2021 NA
9 VR2Tx-480690 A69-9001-12418 PDFH 2021-05-30 13:38:44 1406U 16 2018-06-05 00:00:00 41.6 -90.4 1 05/28/2021 06/25/2021 NA
10 VR2Tx-480690 A69-9001-12418 PDFH 2021-05-30 13:51:19 1406U 16 2018-06-05 00:00:00 41.6 -90.4 1 05/28/2021 06/25/2021 NA
11 VR2Tx-480690 A69-9001-12418 PDFH 2021-05-30 14:04:53 1406U 16 2018-06-05 00:00:00 41.6 -90.4 1 05/28/2021 06/25/2021 NA
12 VR2Tx-480690 A69-9001-12418 PDFH 2021-05-30 14:13:58 1406U 16 2018-06-05 00:00:00 41.6 -90.4 1 05/28/2021 06/25/2021 NA
13 VR2Tx-480690 A69-9001-12418 PDFH 2021-05-30 14:22:55 1406U 16 2018-06-05 00:00:00 41.6 -90.4 1 05/28/2021 06/25/2021 NA
14 VR2Tx-480690 A69-9001-12418 PDFH 2021-05-30 14:23:32 1406U 16 2018-06-05 00:00:00 41.6 -90.4 1 05/28/2021 06/25/2021 NA
15 VR2Tx-480713 A69-9001-12418 PDFH 2021-06-06 09:11:24 14Aux2 16 2018-06-05 00:00:00 41.6 -90.4 1 05/28/2021 06/25/2021 NA
16 VR2Tx-480713 A69-9001-12418 PDFH 2021-06-06 09:32:55 14Aux2 16 2018-06-05 00:00:00 41.6 -90.4 1 05/28/2021 06/25/2021 NA
17 VR100 A69-9001-12418 PDFH 2021-06-25 10:28:00 man_loc 14 2018-06-05 00:00:00 41.6 -90.4 2 06/25/2021 07/21/2021 1
18 VR100 A69-9001-12418 PDFH 2021-07-12 10:09:00 man_loc 14 2018-06-05 00:00:00 41.6 -90.4 2 06/25/2021 07/21/2021 1
19 VR2Tx-480695 A69-9001-12418 PDFH 2021-07-21 22:14:48 1401D 16 2018-06-05 00:00:00 41.6 -90.4 3 07/21/2021 NA 0
20 VR2Tx-480695 A69-9001-12418 PDFH 2021-07-21 22:19:14 1401D 16 2018-06-05 00:00:00 41.6 -90.4 3 07/21/2021 NA 0
21 VR2Tx-480702 A69-9001-12418 PDFH 2021-07-22 04:53:38 15.Mid.Wall 16 2018-06-05 00:00:00 41.6 -90.4 3 07/21/2021 NA 0
22 VR100 A69-9001-12418 PDFH 2021-07-22 11:51:00 man_loc 16 2018-06-05 00:00:00 41.6 -90.4 3 07/21/2021 NA 0
23 AR100 B80-9001-12420 PDFH 2021-07-22 11:51:00 man_loc 19 2018-06-05 00:00:00 42.6 -90.4 1 07/22/2021 07/22/2021 NA
24 AR100 B80-9001-12420 PDFH 2021-07-22 11:51:01 man_loc 18 2018-06-05 00:00:00 42.6 -90.4 2 07/22/2021 NA 1
Data
df <- structure(list(Receiver = c("VR2Tx-480679", "VR2Tx-480690", "VR2Tx-480690",
"VR2Tx-480690", "VR2Tx-480690", "VR2Tx-480690", "VR2Tx-480690",
"VR2Tx-480690", "VR2Tx-480690", "VR2Tx-480690", "VR2Tx-480690",
"VR2Tx-480690", "VR2Tx-480690", "VR2Tx-480692", "VR2Tx-480695",
"VR2Tx-480695", "VR2Tx-480713", "VR2Tx-480713", "VR2Tx-480702",
"VR100", "VR100", "VR100", "AR100", "AR100"), Transmitter = c("A69-9001-12418",
"A69-9001-12418", "A69-9001-12418", "A69-9001-12418", "A69-9001-12418",
"A69-9001-12418", "A69-9001-12418", "A69-9001-12418", "A69-9001-12418",
"A69-9001-12418", "A69-9001-12418", "A69-9001-12418", "A69-9001-12418",
"A69-9001-12418", "A69-9001-12418", "A69-9001-12418", "A69-9001-12418",
"A69-9001-12418", "A69-9001-12418", "A69-9001-12418", "A69-9001-12418",
"A69-9001-12418", "B80-9001-12420", "B80-9001-12420"), Species = c("PDFH", "PDFH", "PDFH", "PDFH",
"PDFH", "PDFH", "PDFH", "PDFH", "PDFH", "PDFH", "PDFH", "PDFH",
"PDFH", "PDFH", "PDFH", "PDFH", "PDFH", "PDFH", "PDFH", "PDFH",
"PDFH", "PDFH", "PDFH", "PDFH"), LocalDATETIME = structure(c(1622186212, 1622381700,
1622384575, 1622184711, 1622381515, 1622381618, 1622381751, 1622381924,
1622382679, 1622383493, 1622384038, 1622384612, 1622183957, 1622381515,
1626905954, 1626905688, 1622971975, 1622970684, 1626929618, 1624616880,
1626084540, 1626954660, 1626954661, 1626954660), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
StationName = c("1405U", "1406U", "1406U", "1406U", "1406U",
"1406U", "1406U", "1406U", "1406U", "1406U", "1406U", "1406U",
"1406U", "1404L", "1401D", "1401D", "14Aux2", "14Aux2", "15.Mid.Wall",
"man_loc", "man_loc", "man_loc", "man_loc", "man_loc"), LengthValue = c(805, 805,
805, 805, 805, 805, 805, 805, 805, 805, 805, 805, 805, 805,
805, 805, 805, 805, 805, 805, 805, 805, 805, 805), WeightValue = c(8.04,
8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04,
8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04,
8.04, 8.04, 8.04), Sex = c("NA", "NA", "NA", "NA", "NA", "NA", "NA",
"NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA",
"NA", "NA", "NA", "NA", "NA", "NA", "NA"), Translocated = c(0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
Pool = c(16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 16, 16, 16, 16, 16, 14, 14, 16, 18, 19), DeployDate = structure(c(1528156800,
1528156800, 1528156800, 1528156800, 1528156800, 1528156800,
1528156800, 1528156800, 1528156800, 1528156800, 1528156800,
1528156800, 1528156800, 1528156800, 1528156800, 1528156800,
1528156800, 1528156800, 1528156800, 1528156800, 1528156800,
1528156800, 1528156800, 1528156800), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
Latitude = c(41.57471, 41.5758, 41.5758, 41.5758, 41.5758,
41.5758, 41.5758, 41.5758, 41.5758, 41.5758, 41.5758, 41.5758,
41.5758, 41.57463, 41.5731, 41.5731, 41.57469, 41.57469,
41.57469, 41.57469, 41.57469, 41.57469, 42.57469, 42.57469), Longitude = c(-90.39944,
-90.39793, -90.39793, -90.39793, -90.39793, -90.39793, -90.39793,
-90.39793, -90.39793, -90.39793, -90.39793, -90.39793, -90.39793,
-90.39984, -90.40391, -90.40391, -90.40462, -90.40462, -90.40462,
-90.40462, -90.40462, -90.40462, -90.40470, -90.40470)), class = c("tbl_df", "tbl",
"data.frame"), row.names = c(NA, -24L))
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 |

