Skip to content

Commit

Permalink
Merge pull request #19 from Smit-tay/main
Browse files Browse the repository at this point in the history
New function(s) to locate function definitions in R scripts.
  • Loading branch information
RobertASmith authored Feb 4, 2024
2 parents 98fe70d + f51b4e4 commit 8518799
Show file tree
Hide file tree
Showing 7 changed files with 290 additions and 8 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ export(check_markov_trace)
export(check_trans_prob_array)
export(check_trans_prob_mat)
export(extract_function_name)
export(find_function_definitions)
export(find_next_vector_element)
export(find_previous_vector_element)
export(get_active_functions)
Expand Down
145 changes: 138 additions & 7 deletions R/cheers_checker.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,20 +29,27 @@ find_next_vector_element <- function(value, vector) {
#' @description Find the previous element of the vector before a value
#' @param value A value of numeric values
#' @param vector A vector of numeric values
#' @param LTE a boolean to determine collection on "less than" or "less than equal"
#' @return The previous element of the vector before the value
#' @export
#' @examples
#' \dontrun{
#' find_previous_vector_element(value = 5, vector = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))
#' }
#'
find_previous_vector_element <- function(value, vector){
find_previous_vector_element <- function(value, vector, LTE=FALSE){

# Find the elements in the vector that are less than the specified value
less_than_value <- vector[vector < value]
if(LTE==TRUE) {
less_than_value <- vector[vector <= value]
} else {
less_than_value <- vector[vector < value]
}

# If there are no elements less than the specified value, return the value
# If there are no elements less than the specified value,
# return the minimum value found in the vector
if (length(less_than_value) == 0) {
return(value)
return (min(vector))
}

# Find the maximum value among the elements less than the specified value
Expand Down Expand Up @@ -90,12 +97,136 @@ extract_function_name <- function(string){

}

#assertHE::extract_function_name(string = "#' }
#
#create_Markov_trace <- function(transition_matrix_,")

#' @title Helper function for find_function_definitions
#'
#' @description Do not try to call this - it is only a help
#' and is only to keep find_function_definitions clean
#' These two functions could really be combined - a task for another day
#'
#' @param v_func_lnums A vector of "FUNCTION" line numbers
#'
#' @param v_assign_lnums A vector of "XXX_ASSIGN" line number
#'
#' @param v_symbol_lnums a vector of "SYMBOL" line numbers
#'
#' @param parsed_data the dataframe result of running utils::getParseData
#'
#' @return Filteresd vector of function line numbers
#'
filter_func_lnums <- function(v_func_lnums, v_assign_lnums, v_symbol_lnums, parsed_data) {

# Create an empty vector to store filtered v_func_lnums
filtered_v_func_lnums <- c()

# Iterate over each v_func_lnum
for (func_lnum in v_func_lnums) {

# Find the line number of the nearest v_assign_lnum before the v_func_lnum
assign_lnum <- max(v_assign_lnums[v_assign_lnums <= func_lnum])

# Find the line number of the nearest v_symbol_lnum before the v_func_lnum
symbol_lnum <- max(v_symbol_lnums[v_symbol_lnums <= func_lnum])

# Check if both assign_lnum and symbol_lnum exist
# and that the assign doesn't come before the symbol.
#
# Ignoring uninteresting parse data like "expr", "NUM_CONST", etc
# R parse data for a function definition can only exist in this order:
# SYMBOL, ASSIGN, FUNCTION
if ( is.na(assign_lnum) || is.na(symbol_lnum) || assign_lnum < symbol_lnum) {
next
}

# if assign_lnum == symbol_lnum,
# Then we might have a single line like this:
#
# foo <- function(A){} # i.e. a function definition
#
# or we may have the use of a Lambda (anonymous) function like this:
#
# result <- apply(data, 2, function(x) { sqrt(mean(x^2)) })
#
# This is NOT a funtion definition (well, it is, but, not one we care about) !
# in this case, there will (hopefully) always be a SYMBOL between
# FUNCTION and XXX_ASSIGN.
#
# check that the col1 value for assign is greater than for symbol
if(assign_lnum == symbol_lnum){

assign_col1 <- max( parsed_data[ parsed_data$line1 == assign_lnum
& (parsed_data$token == "LEFT_ASSIGN"
| parsed_data$token == "EQ_ASSIGN") ,
"col1"] )

symbol_col1 <- max( parsed_data[ parsed_data$line1 == symbol_lnum
& parsed_data$token == "SYMBOL" ,
"col1"] )

if(symbol_col1 >= assign_col1){
# These aren't the droids you're looking for.
next
}
}

# If we get this far, we're pretty confident we have a function definition.
# Add the v_func_lnum to the filtered vector
filtered_v_func_lnums <- c(filtered_v_func_lnums, func_lnum)

}

# Return the filtered vector
return(filtered_v_func_lnums)
}

#' @title Parses an R source file, returns function names defined within.
#'
#' @description Using utils::getParseData(), searches for function definitions
#' by matching the FUNCTION keyword (i.e. "function")
#' with it's associated SYMBOL (i.e the function name)
#'
#' @param filename A string containing a path to an R source file
#'
#' @return A string containing the function names
#'
#' @importFrom stringr str_locate_all str_replace_all
#'
#' @export
#'
find_function_definitions <- function(filename) {

# Parse the R code
parsed_data <-
utils::getParseData(parse(filename, keep.source = TRUE))

# get the line numbers containing the function keyword
# identified by token value "FUNCTION"
v_func_lnums <- parsed_data[parsed_data$token == "FUNCTION", "line1"]

# find line numbers which are identified as assignments
# named functions get assigned names - Lambdas (anonymous funcitons) don't !
v_assign_lnums <- parsed_data[parsed_data$token == "LEFT_ASSIGN" | parsed_data$token == "EQ_ASSIGN", "line1"]

# find line numbers which are identified as symbol definitions
# Function names are identiffied as SYMBOLs
v_symbol_lnums <- parsed_data[parsed_data$token == "SYMBOL", "line1"]

# Try to filter the use of the keyword "function" when it is used
# as a Lambda (anonymous) function.
v_func_lnums <- filter_func_lnums(v_func_lnums, v_assign_lnums, v_symbol_lnums, parsed_data)

# for each function location find the immediately preceeding symbols location
v_symbols_preceding_functions <- sapply(X = v_func_lnums,
FUN = find_previous_vector_element,
vector=v_symbol_lnums, LTE=TRUE)

# Extract the symbol names (stored in the "text" element)
# from the data.frame located at the line numbers that identify symbols
function_symbols <- parsed_data[ parsed_data$line1 %in% v_symbols_preceding_functions
& parsed_data$token == "SYMBOL", "text"]
return(function_symbols)

}

#' @title Get cheers classification tags from a given file
#' @description For a provided filepath, identify the cheers classification tags
Expand Down
25 changes: 25 additions & 0 deletions man/filter_func_lnums.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions man/find_function_definitions.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/find_previous_vector_element.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

84 changes: 84 additions & 0 deletions tests/testthat/example_scripts/example_tricky_functions.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@

# Messy function 1: Unclear variable names, inconsistent formatting
do_something_random <- function(x, y) {
result <- x + y * 2 # No spaces around operators
return(result) # Unnecessary return statement
}

# Messy function 2: Redundant parentheses, unnecessary comments
calculate_something <- function(data) {
filtered_data <- data[data$value > 10, ] # Redundant parentheses
# Calculate the mean (average)
mean_value <- mean(filtered_data$value) # Unnecessary comment
return(mean_value)
}

# Messy function 3: Nested loops, inconsistent indentation
find_matches <- function(list1, list2) {
matches <- vector()
for (item1 in list1) {
for (item2 in list2) {
if (item1 == item2) {
matches <- append(matches, item1) # Inefficient use of append
}
}
}
return(matches)
}

# Messy function 4: Global variables, side effects
global_var <- 0
perform_task <- function(input) {
global_var <<- global_var + input # Modifying global variable
return(global_var)
}

# Messy function 5: Long lines, no comments
combine_strings <- function(str1, str2, sep = " ") {
combined_string <- paste(str1, str2, sep = sep) # Long line, no comment
return(combined_string)
}

# Messy function 6: Unclear logic, repetitive code
process_data <- function(data) {
if (some_condition) {
# Do something
} else if (another_condition) {
# Do something else
} else {
# Do something different
}
# More repetitive code
}

# Messy function 7: Deeply nested function calls, hard to read
transform_data = function(data) {
result <- apply(data, 2, #comment
# comment
function(x) {
sqrt(mean(x^2)) # Nested function call, multiple operations
})
return(result)
}

# Messy function 8: Unnecessary temporary variables, inefficient sorting
sort_values <- function(values) {
temp <- sort(values) # Unnecessary temporary variable
sorted_values <- temp # Inefficient sorting
return(sorted_values)
}

# Messy function 9: Magic numbers, unclear purpose
generate_output <- function() {
output <- list(value1 = 42, value2 = 3.14) # Magic numbers
return(output)
}

# Messy function 10: Overly complex, hard to maintain
do_everything <- function(data) {
# Combine multiple steps, hard to follow
result <- process_data(data)
result <- transform_data(result)
output <- generate_output(result)
return(output)
}
20 changes: 20 additions & 0 deletions tests/testthat/test-cheers_checker.R
Original file line number Diff line number Diff line change
Expand Up @@ -228,3 +228,23 @@ test_that("get_folder_cheers_classifications works for an example project",
cheers_pattern = "@family") |> nrow()
expect_true(object = tmp > 0)
})

test_that("find_function_definitions works as intended",
{
expect_equal(
object = find_function_definitions(
filename = testthat::test_path("example_scripts", "example_tricky_functions.R")),
expected = c(
"do_something_random"
, "calculate_something"
, "find_matches"
, "perform_task"
, "combine_strings"
, "process_data"
, "transform_data"
, "sort_values"
, "generate_output"
, "do_everything")
)
})

0 comments on commit 8518799

Please sign in to comment.