From 41ec3655f674124218d04abc188738c0c4d100f2 Mon Sep 17 00:00:00 2001
From: Gabriele Bozzola <sbozzolator@gmail.com>
Date: Sun, 1 Sep 2024 19:52:45 -0700
Subject: [PATCH] Remove dependency on DataFrames

DataFrames is a heavy dependency, but it is used only in one function. This
commit rewrites that function to avoid using the package
---
 Project.toml                      |  4 ++--
 src/standalone/Vegetation/pfts.jl | 28 ++++++++++------------------
 2 files changed, 12 insertions(+), 20 deletions(-)

diff --git a/Project.toml b/Project.toml
index 7ca60f41a9..234a87e318 100644
--- a/Project.toml
+++ b/Project.toml
@@ -9,7 +9,6 @@ ClimaComms = "3a4d1b5c-c61d-41fd-a00a-5873ba7a1b0d"
 ClimaCore = "d414da3d-4745-48bb-8d80-42e94e092884"
 ClimaDiagnostics = "1ecacbb8-0713-4841-9a07-eb5aa8a2d53f"
 ClimaUtilities = "b3f4f4ca-9299-4f7f-bd9b-81e1242a7513"
-DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
 DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
 Insolation = "e98cc03f-d57e-4e3c-b70c-8d51efe9e0d8"
@@ -26,6 +25,7 @@ Thermodynamics = "b60c26fb-14c3-4610-9d3e-2d17fe7ff00c"
 CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 ClimaParams = "5c42b081-d73a-476f-9059-fd94b934656c"
+DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
@@ -33,7 +33,7 @@ cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd"
 
 [extensions]
 CreateParametersExt = "ClimaParams"
-NeuralSnowExt = ["CSV", "HTTP", "Flux", "StatsBase", "cuDNN"]
+NeuralSnowExt = ["CSV", "DataFrames", "HTTP", "Flux", "StatsBase", "cuDNN"]
 
 [compat]
 ArtifactWrappers = "0.2"
diff --git a/src/standalone/Vegetation/pfts.jl b/src/standalone/Vegetation/pfts.jl
index 794e233d24..3a69a4344f 100644
--- a/src/standalone/Vegetation/pfts.jl
+++ b/src/standalone/Vegetation/pfts.jl
@@ -36,8 +36,6 @@ are used in the comments in the code below.
 
 export Pft, default_pfts, params_from_pfts, pft_param_list
 
-using DataFrames
-
 # List of parameters that PFTs need to define in order to be valid
 pft_param_list = [
     # TwoStreamModel parameters
@@ -504,31 +502,25 @@ function params_from_pfts(
     pfts::Vector{Pft} = default_pfts,
 )
     """
-    This function takes in a vector of PFT cover percentages and returns the correct
-    parameter set corresponding to the most dominant PFT by cover percentage.
+    Takes in a vector of PFT cover percentages and returns the correct parameter set
+    corresponding to the most dominant PFT by cover percentage.
 
     May optionally take in a vector of PFTs to use instead of the default PFTs.
-    In this case the pfts_cover vector must be the same length and in the
-    corresponding order to the PFTs in the pfts vector. This allows a user to 
+    In this case, the pfts_cover vector must be the same length and in the
+    corresponding order to the PFTs in the pfts vector. This allows a user to
     define their own PFT scheme and plug it into the model.
     """
-    # Construct the PFT DataFrame from the specified PFTs
-    pft_df = DataFrame()
-    for pft in pfts
-        # Push the internal NamedTuple of parameters to a row in the df. Since
-        # the PFT is stored as a NamedTuple conversion to a DF row is ensured 
-        # to work and ensure the ordering of the parameters.
-        push!(pft_df, pft.parameters)
-    end
-
     # Find the index of the dominant PFT by cover percentage
     max_ind = argmax(pft_cover)
 
-    # Instantiate a variable with the correct name for the weighted average of 
-    # each parameter.
+    # Extract the parameter set for the dominant PFT
     param_set = []
+    dominant_pft_params = pfts[max_ind].parameters
+
+    # Collect the parameters in the correct order
     for param in pft_param_list
-        append!(param_set, pft_df[max_ind, param])
+        append!(param_set, getfield(dominant_pft_params, param))
     end
+
     return param_set
 end