Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New function(s) to locate function definitions in R scripts. #19

Merged
merged 6 commits into from
Feb 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ export(check_markov_trace)
export(check_trans_prob_array)
export(check_trans_prob_mat)
export(extract_function_name)
export(find_function_definitions)
export(find_next_vector_element)
export(find_previous_vector_element)
export(get_active_functions)
Expand Down
145 changes: 138 additions & 7 deletions R/cheers_checker.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,20 +29,27 @@ find_next_vector_element <- function(value, vector) {
#' @description Find the previous element of the vector before a value
#' @param value A value of numeric values
#' @param vector A vector of numeric values
#' @param LTE a boolean to determine collection on "less than" or "less than equal"
#' @return The previous element of the vector before the value
#' @export
#' @examples
#' \dontrun{
#' find_previous_vector_element(value = 5, vector = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))
#' }
#'
find_previous_vector_element <- function(value, vector){
find_previous_vector_element <- function(value, vector, LTE=FALSE){

# Find the elements in the vector that are less than the specified value
less_than_value <- vector[vector < value]
if(LTE==TRUE) {
less_than_value <- vector[vector <= value]
} else {
less_than_value <- vector[vector < value]
}

# If there are no elements less than the specified value, return the value
# If there are no elements less than the specified value,
# return the minimum value found in the vector
if (length(less_than_value) == 0) {
return(value)
return (min(vector))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This element could be greater than the value, so should probably return NA here.

}

# Find the maximum value among the elements less than the specified value
Expand Down Expand Up @@ -90,12 +97,136 @@ extract_function_name <- function(string){

}

#assertHE::extract_function_name(string = "#' }
#
#create_Markov_trace <- function(transition_matrix_,")

#' @title Helper function for find_function_definitions
#'
#' @description Do not try to call this - it is only a help
#' and is only to keep find_function_definitions clean
#' These two functions could really be combined - a task for another day
#'
#' @param v_func_lnums A vector of "FUNCTION" line numbers
#'
#' @param v_assign_lnums A vector of "XXX_ASSIGN" line number
#'
#' @param v_symbol_lnums a vector of "SYMBOL" line numbers
#'
#' @param parsed_data the dataframe result of running utils::getParseData
#'
#' @return Filteresd vector of function line numbers
#'
filter_func_lnums <- function(v_func_lnums, v_assign_lnums, v_symbol_lnums, parsed_data) {

# Create an empty vector to store filtered v_func_lnums
filtered_v_func_lnums <- c()

# Iterate over each v_func_lnum
for (func_lnum in v_func_lnums) {

# Find the line number of the nearest v_assign_lnum before the v_func_lnum
assign_lnum <- max(v_assign_lnums[v_assign_lnums <= func_lnum])

# Find the line number of the nearest v_symbol_lnum before the v_func_lnum
symbol_lnum <- max(v_symbol_lnums[v_symbol_lnums <= func_lnum])

# Check if both assign_lnum and symbol_lnum exist
# and that the assign doesn't come before the symbol.
#
# Ignoring uninteresting parse data like "expr", "NUM_CONST", etc
# R parse data for a function definition can only exist in this order:
# SYMBOL, ASSIGN, FUNCTION
if ( is.na(assign_lnum) || is.na(symbol_lnum) || assign_lnum < symbol_lnum) {
next
}

# if assign_lnum == symbol_lnum,
# Then we might have a single line like this:
#
# foo <- function(A){} # i.e. a function definition
#
# or we may have the use of a Lambda (anonymous) function like this:
#
# result <- apply(data, 2, function(x) { sqrt(mean(x^2)) })
#
# This is NOT a funtion definition (well, it is, but, not one we care about) !
# in this case, there will (hopefully) always be a SYMBOL between
# FUNCTION and XXX_ASSIGN.
#
# check that the col1 value for assign is greater than for symbol
if(assign_lnum == symbol_lnum){

assign_col1 <- max( parsed_data[ parsed_data$line1 == assign_lnum
& (parsed_data$token == "LEFT_ASSIGN"
| parsed_data$token == "EQ_ASSIGN") ,
"col1"] )

symbol_col1 <- max( parsed_data[ parsed_data$line1 == symbol_lnum
& parsed_data$token == "SYMBOL" ,
"col1"] )

if(symbol_col1 >= assign_col1){
# These aren't the droids you're looking for.
next
}
}

# If we get this far, we're pretty confident we have a function definition.
# Add the v_func_lnum to the filtered vector
filtered_v_func_lnums <- c(filtered_v_func_lnums, func_lnum)

}

# Return the filtered vector
return(filtered_v_func_lnums)
}

#' @title Parses an R source file, returns function names defined within.
#'
#' @description Using utils::getParseData(), searches for function definitions
#' by matching the FUNCTION keyword (i.e. "function")
#' with it's associated SYMBOL (i.e the function name)
#'
#' @param filename A string containing a path to an R source file
#'
#' @return A string containing the function names
#'
#' @importFrom stringr str_locate_all str_replace_all
#'
#' @export
#'
find_function_definitions <- function(filename) {

# Parse the R code
parsed_data <-
utils::getParseData(parse(filename, keep.source = TRUE))

# get the line numbers containing the function keyword
# identified by token value "FUNCTION"
v_func_lnums <- parsed_data[parsed_data$token == "FUNCTION", "line1"]

# find line numbers which are identified as assignments
# named functions get assigned names - Lambdas (anonymous funcitons) don't !
v_assign_lnums <- parsed_data[parsed_data$token == "LEFT_ASSIGN" | parsed_data$token == "EQ_ASSIGN", "line1"]

# find line numbers which are identified as symbol definitions
# Function names are identiffied as SYMBOLs
v_symbol_lnums <- parsed_data[parsed_data$token == "SYMBOL", "line1"]

# Try to filter the use of the keyword "function" when it is used
# as a Lambda (anonymous) function.
v_func_lnums <- filter_func_lnums(v_func_lnums, v_assign_lnums, v_symbol_lnums, parsed_data)

# for each function location find the immediately preceeding symbols location
v_symbols_preceding_functions <- sapply(X = v_func_lnums,
FUN = find_previous_vector_element,
vector=v_symbol_lnums, LTE=TRUE)

# Extract the symbol names (stored in the "text" element)
# from the data.frame located at the line numbers that identify symbols
function_symbols <- parsed_data[ parsed_data$line1 %in% v_symbols_preceding_functions
& parsed_data$token == "SYMBOL", "text"]
return(function_symbols)

}

#' @title Get cheers classification tags from a given file
#' @description For a provided filepath, identify the cheers classification tags
Expand Down
25 changes: 25 additions & 0 deletions man/filter_func_lnums.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions man/find_function_definitions.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/find_previous_vector_element.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

84 changes: 84 additions & 0 deletions tests/testthat/example_scripts/example_tricky_functions.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@

# Messy function 1: Unclear variable names, inconsistent formatting
do_something_random <- function(x, y) {
result <- x + y * 2 # No spaces around operators
return(result) # Unnecessary return statement
}

# Messy function 2: Redundant parentheses, unnecessary comments
calculate_something <- function(data) {
filtered_data <- data[data$value > 10, ] # Redundant parentheses
# Calculate the mean (average)
mean_value <- mean(filtered_data$value) # Unnecessary comment
return(mean_value)
}

# Messy function 3: Nested loops, inconsistent indentation
find_matches <- function(list1, list2) {
matches <- vector()
for (item1 in list1) {
for (item2 in list2) {
if (item1 == item2) {
matches <- append(matches, item1) # Inefficient use of append
}
}
}
return(matches)
}

# Messy function 4: Global variables, side effects
global_var <- 0
perform_task <- function(input) {
global_var <<- global_var + input # Modifying global variable
return(global_var)
}

# Messy function 5: Long lines, no comments
combine_strings <- function(str1, str2, sep = " ") {
combined_string <- paste(str1, str2, sep = sep) # Long line, no comment
return(combined_string)
}

# Messy function 6: Unclear logic, repetitive code
process_data <- function(data) {
if (some_condition) {
# Do something
} else if (another_condition) {
# Do something else
} else {
# Do something different
}
# More repetitive code
}

# Messy function 7: Deeply nested function calls, hard to read
transform_data = function(data) {
result <- apply(data, 2, #comment
# comment
function(x) {
sqrt(mean(x^2)) # Nested function call, multiple operations
})
return(result)
}

# Messy function 8: Unnecessary temporary variables, inefficient sorting
sort_values <- function(values) {
temp <- sort(values) # Unnecessary temporary variable
sorted_values <- temp # Inefficient sorting
return(sorted_values)
}

# Messy function 9: Magic numbers, unclear purpose
generate_output <- function() {
output <- list(value1 = 42, value2 = 3.14) # Magic numbers
return(output)
}

# Messy function 10: Overly complex, hard to maintain
do_everything <- function(data) {
# Combine multiple steps, hard to follow
result <- process_data(data)
result <- transform_data(result)
output <- generate_output(result)
return(output)
}
20 changes: 20 additions & 0 deletions tests/testthat/test-cheers_checker.R
Original file line number Diff line number Diff line change
Expand Up @@ -228,3 +228,23 @@ test_that("get_folder_cheers_classifications works for an example project",
cheers_pattern = "@family") |> nrow()
expect_true(object = tmp > 0)
})

test_that("find_function_definitions works as intended",
{
expect_equal(
object = find_function_definitions(
filename = testthat::test_path("example_scripts", "example_tricky_functions.R")),
expected = c(
"do_something_random"
, "calculate_something"
, "find_matches"
, "perform_task"
, "combine_strings"
, "process_data"
, "transform_data"
, "sort_values"
, "generate_output"
, "do_everything")
)
})

Loading