r - Time Difference for a factor variable between two columns on subsequent rows -
i have data frame entry time
, exit time
. trying column stay duration
difference between exit time , next row entry time. dataframe sorted date , entry time.
for first row stay duration is: 6:54:50 - 7:34:36 = 0:39:46
the problem face below scenario: when exit_time 22:15:05
, next row entry_time has 6:02:46
. fr case need stay duration 7:47:41
dput(df) structure(list(jrny_id_num = c(115492027250, 115492027250, 115523231209, 115523231209, 115526742250, 115526742250, 115509240124, 115509240124, 115539253765, 115539253765, 115570245886, 115567046025, 115562452408, 115562452408, 115574565032, 115574565032), biz_dt = structure(c(1l, 1l, 1l, 1l, 2l, 2l, 2l, 2l, 3l, 3l, 3l, 3l, 4l, 4l, 4l, 4l), .label = c("2017-01-01", "2017-01-02", "2017-01-03", "2017-01-04", "2017-01-05", "2017-01-06", "2017-01-07", "2017-01-09", "2017-01-10", "2017-01-11", "2017-01-12", "2017-01-13", "2017-01-14", "2017-01-15", "2017-01-16", "2017-01-17", "2017-01-18", "2017-01-19", "2017-01-20", "2017-01-21", "2017-01-22", "2017-01-23", "2017-01-24", "2017-01-25", "2017-01-26", "2017-01-27", "2017-01-31"), class = "factor"), entry_tm = structure(c(37l, 41l, 45l, 46l, 8l, 25l, 52l, 73l, 5l, 15l, 56l, 89l, 29l, 33l, 63l, 77l), .label = c("05:30:39", "05:32:07", "05:32:33", "05:32:38", "05:32:50", "05:32:59", "05:33:06", "05:37:14", "05:37:58", "05:38:34", "05:38:38", "05:40:22", "05:40:49", "05:41:16", "05:42:27", "05:47:17", "05:48:03", "05:48:13", "05:48:54", "05:49:15", "05:50:17", "05:51:42", "05:52:30", "05:53:20", "05:54:40", "05:56:24", "05:57:59", "06:00:11", "06:02:46", "06:03:28", "06:05:44", "06:32:18", "06:40:32", "06:40:40", "06:42:35", "06:45:51", "06:45:55", "06:52:49", "06:57:25", "07:03:49", "07:34:36", "08:26:43", "09:16:34", "10:16:10", "12:21:51", "13:36:40", "15:29:30", "16:07:03", "16:10:49", "16:13:51", "16:15:04", "16:29:20", "16:47:49", "16:48:42", "16:55:50", "16:56:27", "16:58:53", "17:01:02", "17:03:31", "17:06:19", "17:09:03", "17:11:22", "17:12:15", "17:12:57", "17:15:11", "17:16:56", "17:21:07", "17:22:18", "17:22:22", "17:23:53", "17:28:37", "17:30:17", "17:30:24", "17:31:21", "17:32:22", "17:59:07", "18:16:25", "18:17:13", "18:23:36", "18:27:40", "18:44:43", "18:46:36", "18:53:21", "20:55:32", "21:06:00", "21:07:08", "21:18:10", "21:18:21", "21:42:25", "21:43:45", "23:31:38"), class = "factor"), exit_tm = structure(c(34l, 37l, 45l, 46l, 9l, 27l, 54l, 60l, 7l, 20l, 71l, 88l, 25l, 40l, 68l, 72l), .label = c("?", "05:37:56", "05:39:50", "05:39:51", "05:39:53", "05:40:03", "05:40:51", "05:41:01", "05:43:02", "05:44:51", "05:45:04", "05:45:45", "05:56:35", "05:57:45", "05:58:58", "06:03:06", "06:21:39", "06:23:35", "06:24:30", "06:24:58", "06:28:36", "06:29:17", "06:29:25", "06:32:11", "06:32:15", "06:34:01", "06:36:28", "06:41:02", "06:41:05", "06:43:31", "06:44:44", "06:51:05", "06:51:46", "06:54:50", "07:07:55", "07:39:39", "07:43:21", "07:48:49", "08:05:22", "08:17:58", "08:18:15", "08:32:12", "09:34:23", "10:31:51", "13:17:38", "13:46:19", "16:08:29", "16:26:21", "16:26:50", "16:37:08", "17:09:13", "17:25:49", "17:26:31", "17:26:50", "17:27:35", "17:28:20", "17:31:16", "17:34:43", "17:35:16", "17:36:37", "17:42:09", "17:42:23", "18:00:27", "18:06:53", "18:08:38", "18:09:18", "18:13:02", "18:14:35", "18:20:15", "18:22:06", "18:23:17", "18:25:18", "18:25:30", "18:28:47", "18:30:11", "18:30:54", "18:33:31", "18:38:49", "18:41:19", "18:52:25", "19:05:37", "19:27:49", "21:06:21", "21:41:28", "21:47:13", "21:53:35", "21:54:29", "22:15:05", "22:25:41", "23:59:35"), class = "factor")), .names = c("jrny_id_num", "biz_dt", "entry_tm", "exit_tm"), row.names = c(160l, 73l, 51l, 145l, 111l, 56l, 119l, 157l, 168l, 131l, 81l, 78l, 135l, 35l, 165l, 25l), class = "data.frame")
the output trying is:
jrny_id_num biz_dt entry_tm exit_tm stay_duration 115492027250 1/1/2017 6:45:55 6:54:50 0:39:46 115492027250 1/1/2017 7:34:36 7:43:21 4:38:30 115523231209 1/1/2017 12:21:51 13:17:38 0:19:02 115523231209 1/1/2017 13:36:40 13:46:19 15:50:55 115526742250 1/2/2017 5:37:14 5:43:02 0:11:38 115526742250 1/2/2017 5:54:40 6:36:28 9:52:52 115509240124 1/2/2017 16:29:20 17:26:50 0:03:34 115509240124 1/2/2017 17:30:24 17:36:37 11:56:13 115539253765 1/3/2017 5:32:50 5:40:51 0:01:36 115539253765 1/3/2017 5:42:27 6:24:58 10:31:29 115570245886 1/3/2017 16:56:27 18:23:17 3:19:08 115567046025 1/3/2017 21:42:25 22:15:05 7:47:41 115562452408 1/4/2017 6:02:46 6:32:15 0:08:17 115562452408 1/4/2017 6:40:32 8:17:58 8:54:17 115574565032 1/4/2017 17:12:15 18:14:35 0:01:50 115574565032 1/4/2017 18:16:25 18:25:18
updated
try:
require(dplyr) diff_to_hms <- function(x) { y <- abs(x) sprintf("%s:%02d:%02d:%02d", ifelse(x < 0, "-", ""), y %% 86400 %/% 3600, y %% 3600 %/% 60, y %% 60 %/% 1) } dat %>% mutate(entry_tm = as.posixct(strptime(paste(biz_dt,entry_tm),format = "%y-%m-%d %h:%m:%s")), exit_tm = as.posixct(strptime(paste(biz_dt,exit_tm),format = "%y-%m-%d %h:%m:%s"))) %>% mutate(stay_duration = as.numeric(difftime(exit_tm,lead(entry_tm),unit="secs")*-1), stay_duration2 = diff_to_hms(stay_duration))
Comments
Post a Comment