diff --git a/DESCRIPTION b/DESCRIPTION index 9c99606b..650a642c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -36,6 +36,7 @@ Imports: rlang, sf, lubridate, + hms, tidyr, tools Suggests: diff --git a/R/time.R b/R/time.R index 4ead72bf..c1fe8f8a 100644 --- a/R/time.R +++ b/R/time.R @@ -27,23 +27,30 @@ filter_stop_times_by_hour <- function(stop_times, return(stop_times) } -#' Add lubridate::hms columns to feed +#' Add hms::hms columns to feed #' #' Adds columns to stop_times (arrival_time_hms, departure_time_hms) and frequencies (start_time_hms, end_time_hms) -#' with times converted with lubridate::hms(). +#' with times converted with hms::hms(). #' #' @return gtfs_obj with added hms times columns for stop_times_df and frequencies_df #' @keywords internal -#' @importFrom lubridate hms +#' @importFrom hms hms set_hms_times <- function(gtfs_obj) { stopifnot(is_gtfs_obj(gtfs_obj)) - gtfs_obj$stop_times_df$arrival_time_hms <- lubridate::hms(gtfs_obj$stop_times_df$arrival_time, quiet=T) - gtfs_obj$stop_times_df$departure_time_hms <- lubridate::hms(gtfs_obj$stop_times_df$departure_time, quiet=T) + str_to_seconds <- function(hhmmss_str) { + sapply( + strsplit(hhmmss_str, ":"), + function(Y) { sum(as.numeric(Y) * c(3600, 60, 1)) } + ) + } + + gtfs_obj$stop_times_df$arrival_time_hms <- hms::hms(str_to_seconds(gtfs_obj$stop_times_df$arrival_time)) + gtfs_obj$stop_times_df$departure_time_hms <- hms::hms(str_to_seconds(gtfs_obj$stop_times_df$departure_time)) if(!is.null(gtfs_obj$frequencies_df)) { - gtfs_obj$frequencies_df$start_time_hms <- lubridate::hms(gtfs_obj$frequencies_df$start_time, quiet=T) - gtfs_obj$frequencies_df$end_time_hms <- lubridate::hms(gtfs_obj$frequencies_df$end_time, quiet=T) + gtfs_obj$frequencies_df$start_time_hms <- hms::hms(str_to_seconds(gtfs_obj$frequencies_df$start_time)) + gtfs_obj$frequencies_df$end_time_hms <- hms::hms(str_to_seconds(gtfs_obj$frequencies_df$end_time)) } return(gtfs_obj) @@ -52,7 +59,7 @@ set_hms_times <- function(gtfs_obj) { #' Returns all possible date/service_id combinations as a data frame #' #' Use it to summarise service. For example, get a count of the number of services for a date. See example. -#' @return gtfs_obj with added date_service data frame +#' @return a date_service data frame #' @param gtfs_obj a gtfs_object as read by read_gtfs #' @export #' @examples @@ -67,7 +74,11 @@ set_hms_times <- function(gtfs_obj) { get_date_service_table <- function(gtfs_obj) { stopifnot(is_gtfs_obj(gtfs_obj)) - if(all(is.na(gtfs_obj$calendar_df$start_date)) & all(is.na(gtfs_obj$calendar_df$start_date))) { + weekday <- function(date) { + c("sunday", "monday", "tuesday", "wednesday", "thursday", "friday", "saturday")[as.POSIXlt(date)$wday + 1] + } + + if(all(is.na(gtfs_obj$calendar_df$start_date)) & all(is.na(gtfs_obj$calendar_df$end_date))) { # TODO validate no start_date and end_date defined in calendar.txt date_service_df <- dplyr::tibble(date=lubridate::ymd("19700101"), service_id="x") %>% dplyr::filter(service_id != "x") } else { @@ -78,7 +89,7 @@ get_date_service_table <- function(gtfs_obj) { max(gtfs_obj$calendar_df$end_date, na.rm = T), 1 ), - weekday = tolower(weekdays(date)) + weekday = weekday(date) ) # gather services by weekdays @@ -93,20 +104,22 @@ get_date_service_table <- function(gtfs_obj) { # set services to dates according to weekdays and start/end date date_service_df <- dplyr::full_join(dates, service_ids_weekdays, by="weekday") %>% - dplyr::filter(date > start_date & date < end_date) %>% + dplyr::filter(date >= start_date & date <= end_date) %>% dplyr::select(-weekday, -start_date, -end_date) } - # add calendar_dates additions (1) - additions = gtfs_obj$calendar_dates_df %>% filter(exception_type == 1) %>% select(-exception_type) - if(nrow(additions) > 0) { - date_service_df <- dplyr::full_join(date_service_df, additions, by=c("date", "service_id")) - } - - # remove calendar_dates exceptions (2) - exceptions = gtfs_obj$calendar_dates_df %>% filter(exception_type == 2) %>% select(-exception_type) - if(nrow(exceptions) > 0) { - date_service_df <- dplyr::anti_join(date_service_df, exceptions, by=c("date", "service_id")) + if(!is.null(gtfs_obj$calendar_dates_df)) { + # add calendar_dates additions (1) + additions = gtfs_obj$calendar_dates_df %>% filter(exception_type == 1) %>% select(-exception_type) + if(nrow(additions) > 0) { + date_service_df <- dplyr::full_join(date_service_df, additions, by=c("date", "service_id")) + } + + # remove calendar_dates exceptions (2) + exceptions = gtfs_obj$calendar_dates_df %>% filter(exception_type == 2) %>% select(-exception_type) + if(nrow(exceptions) > 0) { + date_service_df <- dplyr::anti_join(date_service_df, exceptions, by=c("date", "service_id")) + } } if(nrow(date_service_df) == 0) { diff --git a/tests/testthat/test-time.R b/tests/testthat/test-time.R new file mode 100644 index 00000000..dd2a321d --- /dev/null +++ b/tests/testthat/test-time.R @@ -0,0 +1,96 @@ +context('Time manipulation') + +create_empty_gtfs_obj <- function() { + g <- list() + class(g) <- "gtfs" + attributes(g)$validation_result <- data.frame() + return(g) +} + +test_that('set_hms_times() works with valid data', { + gtest <- create_empty_gtfs_obj() + gtest$stop_times_df <- dplyr::tibble( + arrival_time = c("08:00:00", "14:00:00", "26:10:00"), + departure_time = c("08:00:10", "14:00:20", "26:10:30")) + gtest$frequencies_df = dplyr::tibble( + start_time = c("06:00:00"), + end_time = c("12:00:00") + ) + + gtest <- tidytransit:::set_hms_times(gtest) + + expect_is(gtest$stop_times_df$arrival_time_hms, "hms") + expect_is(gtest$stop_times_df$departure_time_hms, "hms") + expect_is(gtest$stop_times_df$arrival_time, "character") + expect_is(gtest$stop_times_df$departure_time, "character") + expect_false(is.na(gtest$stop_times_df$arrival_time_hms[3])) + expect_equal(gtest$stop_times_df$departure_time_hms[3], hms::hms(26*3600+10*60+30)) + + expect_is(gtest$frequencies_df$start_time_hms, "hms") + expect_is(gtest$frequencies_df$end_time_hms, "hms") + expect_is(gtest$frequencies_df$start_time, "character") + expect_is(gtest$frequencies_df$end_time, "character") +}) + +test_that('get_date_service_table() uses the right dates', { + gtest <- create_empty_gtfs_obj() + gtest$calendar_df <- dplyr::tibble( + service_id = "s1", + monday = 1, + tuesday = 0, + wednesday = 1, + thursday = 0, + friday = 0, + saturday = 0, + sunday = 0, + start_date = lubridate::ymd("20180101"), # monday + end_date = lubridate::ymd("20180131")) # wednesday + + date_service <- tidytransit:::get_date_service_table(gtest) + + expect_true(lubridate::ymd("20180101") %in% date_service$date) + expect_false(lubridate::ymd("20180102") %in% date_service$date) + expect_true(lubridate::ymd("20180131") %in% date_service$date) +}) + +test_that('get_date_service_table() works with additions and exceptions', { + gtest <- create_empty_gtfs_obj() + gtest$calendar_df <- dplyr::tibble( + service_id = c("wdays", "wend"), + monday = c(1, 0), + tuesday = c(1, 0), + wednesday = c(1, 0), + thursday = c(1, 0), + friday = c(1, 1), + saturday = c(0, 1), + sunday = c(0, 1), + start_date = c(lubridate::ymd("20180201"), lubridate::ymd("20180401")), + end_date = c(lubridate::ymd("20180430"), lubridate::ymd("20180430"))) + gtest$calendar_dates_df = dplyr::tibble( + service_id = c("wdays", "wend"), + date = c(lubridate::ymd("20180314"), lubridate::ymd("20180226")), + exception_type = c(2, 1) + ) + + date_service <- tidytransit:::get_date_service_table(gtest) + + # exception + mar14 <- date_service[date_service$date == lubridate::ymd("20180613"),] + expect_equal(nrow(mar14), 0) + + # addition + feb26 <- date_service[date_service$date == lubridate::ymd("20180226"),] # monday + expect_equal(nrow(feb26), 2) + + # overlaps + apr05 <- date_service[date_service$date == lubridate::ymd("20180405"),] # thursday + expect_equal(apr05 %>% dplyr::group_by(date) %>% dplyr::count() %>% dplyr::pull(n), 1) + apr06 <- date_service[date_service$date == lubridate::ymd("20180406"),] # friday + expect_equal(apr06 %>% dplyr::group_by(date) %>% dplyr::count() %>% dplyr::pull(n), 2) + + range <- date_service %>% + dplyr::group_by(service_id) %>% + dplyr::summarise(min = min(date), max=max(date)) + expect_equal(range[range$service_id == "wdays", "min"], dplyr::tibble(min=lubridate::ymd("20180201"))) + expect_equal(range[range$service_id == "wend", "max"], dplyr::tibble(max=lubridate::ymd("20180429"))) +})