Skip to content

Commit

Permalink
Feature/newline check (#58)
Browse files Browse the repository at this point in the history
reflows checking for newline at end of file.

On unix-ish (Linux + MacOS), uses `tail -c1` to grab the last byte of
the file, and then `wc -l` to count newlines in that string (of 1 byte).

Maintains previous (pure R) logic for windows systems.

---------

Co-authored-by: CJ Yetman <cyetman@rmi.org>
  • Loading branch information
AlexAxthelm and cjyetman authored Jan 30, 2024
1 parent 288d99f commit 54a8113
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 24 deletions.
76 changes: 52 additions & 24 deletions R/has_newline_at_end.R
Original file line number Diff line number Diff line change
@@ -1,33 +1,61 @@
# Determine if file has trailing newline
# returns TRUE if file has newline at end
# returns FALSE if file has no newline at end
# returns NA if file is not accessible or is not a text file
file_has_newline_at_end <- function(filepath) {
stopifnot(
"`filepath` must be length 1" = length(filepath) == 1L
)
if (!is_file_accessible(filepath) || !is_text_file(filepath)) {
return(NA)
}
if (.Platform$OS.type == "unix") {
newlines_in_final_char <- system(
command = paste(
"tail -c1 ", filepath, # tail -c1 returns last 1 byte
" | ", # pipe to next command
"wc -l" # wc -l returns number of newline characters in input
),
intern = TRUE
)
has_trailing_newline <- as.logical(as.integer(newlines_in_final_char))
} else {
# Windows does not have a POSIX compliant `tail` command
# so we use builtin R functionality, which is slower
con <- file(filepath, "rb")
on.exit(close(con))

not_at_end <- TRUE
chars <- ""
while (not_at_end) {
prev_chars <- chars
chars <- readChar(con, nchars = 2048L, useBytes = TRUE)
if (length(chars) == 0L) {
not_at_end <- FALSE
}
}

has_trailing_newline <- grepl(
# using [\n] instead of [\n\r] because modern systems use either \n or
# \r\n as newline, both of which match \n as last character.
"[\n]$", iconv(prev_chars, to = "UTF-8", sub = "")
)

}

return(has_trailing_newline)
}

has_newline_at_end <- function(filepaths) {
filepaths <- simplify_if_one_col_df(filepaths)
stopifnot("`filepaths` must be a character vector" = typeof(filepaths) == "character")
stopifnot(
"`filepaths` must be a character vector" = typeof(filepaths) == "character"
)
filepaths <- canonize_path(filepaths)

vapply(
X = filepaths,
FUN = function(filepath) {
if (!is_file_accessible(filepath) || !is_text_file(filepath)) {
return(NA)
}

con <- file(filepath, "rb")
on.exit(close(con))

not_at_end <- TRUE
chars <- ""
while (not_at_end) {
prev_chars <- chars
chars <- readChar(con, nchars = 2048L, useBytes = TRUE)
if (length(chars) == 0L) {
not_at_end <- FALSE
}
}

# using [\n] instead of [\n\r] because modern systems use either \n or
# \r\n as newline, both of which match \n as last character.
# \r as last character should not be recognized as trailing newline.
grepl("[\n]$", iconv(prev_chars, to = "UTF-8", sub = ""))
},
FUN = file_has_newline_at_end,
FUN.VALUE = logical(1),
USE.NAMES = FALSE
)
Expand Down
17 changes: 17 additions & 0 deletions tests/testthat/test-has_newline_at_end.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,13 @@ test_that("has_newline_at_end identifies a binary file", {
expect_equal(has_newline_at_end(binary), NA)
})

test_that("has_newline_at_end identifies an empty file", {
empty <- withr::local_tempfile()
write.table(data.frame(), file = empty, col.names = FALSE)
expect_identical(as.integer(file.size(empty)), 0L)
expect_identical(has_newline_at_end(empty), NA)
})

test_that("has_newline_at_end identifies an inaccessible filepath", {
filepath <- "non-existant-file.txt"
expect_false(file.exists(filepath))
Expand All @@ -53,3 +60,13 @@ test_that("has_newline_at_end behaves as expected when multiple filepaths are pa
expect_equal(has_newline_at_end(c(no_newline, no_newline)), c(FALSE, FALSE))
expect_equal(has_newline_at_end(c(newline, newline)), c(TRUE, TRUE))
})

test_that("file_has_newline_at_end expects a single file", {
f1 <- withr::local_tempfile()
f2 <- withr::local_tempfile()
expect_error(
file_has_newline_at_end(c(f1, f2)),
"`filepath` must be length 1"
)
})

0 comments on commit 54a8113

Please sign in to comment.