From f75d8082b58a40c59c1fb042cf87a9fcf64df3d7 Mon Sep 17 00:00:00 2001
From: odow <o.dowson@gmail.com>
Date: Tue, 10 Sep 2024 14:13:57 +1200
Subject: [PATCH 1/7] Add DataFrames.jl extension

---
 Project.toml             | 10 +++--
 ext/JuMPDataFramesExt.jl | 23 +++++++++++
 ext/test_DataFrames.jl   | 83 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 113 insertions(+), 3 deletions(-)
 create mode 100644 ext/JuMPDataFramesExt.jl
 create mode 100644 ext/test_DataFrames.jl

diff --git a/Project.toml b/Project.toml
index fd93219a7f6..12323a0df88 100644
--- a/Project.toml
+++ b/Project.toml
@@ -9,32 +9,36 @@ MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
 MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee"
 MutableArithmetics = "d8a4904e-b15c-11e9-3269-09a3773c0cb0"
 OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
-Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
+Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 
 [weakdeps]
+DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0"
 
 [extensions]
+JuMPDataFramesExt = "DataFrames"
 JuMPDimensionalDataExt = "DimensionalData"
 
 [compat]
+DataFrames = "1"
 DimensionalData = "0.24, 0.25, 0.26.2, 0.27"
 LinearAlgebra = "<0.0.1, 1.6"
 MacroTools = "0.5"
 MathOptInterface = "1.25.2"
 MutableArithmetics = "1.1"
 OrderedCollections = "1"
-Printf = "<0.0.1, 1.6"
 PrecompileTools = "1"
+Printf = "<0.0.1, 1.6"
 SparseArrays = "<0.0.1, 1.6"
 Test = "<0.0.1, 1.6"
 julia = "1.6"
 
 [extras]
+DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["DimensionalData", "Test"]
+test = ["DataFrames", "DimensionalData", "Test"]
diff --git a/ext/JuMPDataFramesExt.jl b/ext/JuMPDataFramesExt.jl
new file mode 100644
index 00000000000..c64f7fba9a8
--- /dev/null
+++ b/ext/JuMPDataFramesExt.jl
@@ -0,0 +1,23 @@
+#  Copyright 2017, Iain Dunning, Joey Huchette, Miles Lubin, and contributors
+#  This Source Code Form is subject to the terms of the Mozilla Public
+#  License, v. 2.0. If a copy of the MPL was not distributed with this
+#  file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+module JuMPDataFramesExt
+
+import DataFrames
+import JuMP
+
+function JuMP.Containers.container(
+    f::Function,
+    indices,
+    ::Type{DataFrames.DataFrame},
+    names::AbstractVector,
+)
+    rows = vec(collect(indices))
+    df = DataFrames.DataFrame(NamedTuple{tuple(names...)}(arg) for arg in rows)
+    df.value = [f(arg...) for arg in rows]
+    return df
+end
+
+end #module
diff --git a/ext/test_DataFrames.jl b/ext/test_DataFrames.jl
new file mode 100644
index 00000000000..80aa83ceb1b
--- /dev/null
+++ b/ext/test_DataFrames.jl
@@ -0,0 +1,83 @@
+#  Copyright 2017, Iain Dunning, Joey Huchette, Miles Lubin, and contributors
+#  This Source Code Form is subject to the terms of the Mozilla Public
+#  License, v. 2.0. If a copy of the MPL was not distributed with this
+#  file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+module TestContainersDataFrames
+
+using Test
+
+using DataFrames
+using JuMP
+
+function test_dimension_data_vector()
+    model = Model()
+    @variable(model, x[i = 2:4], container = DataFrame)
+    @test x isa DataFrame
+    @test size(x) == (3, 2)
+    @test names(x) == ["i", "value"]
+    return
+end
+
+function test_dimension_data_matrix()
+    model = Model()
+    @variable(model, x[i = 2:4, j = ["a", "b"]], container = DataFrame)
+    @test x isa DataFrame
+    @test size(x) == (6, 3)
+    @test names(x) == ["i", "j", "value"]
+    @test sum(x[x.j .== "a", :value]) isa AffExpr
+    return
+end
+
+function test_dimension_data_triangle()
+    model = Model()
+    @variable(model, x[i = 2:4, j in i:4], container = DataFrame)
+    @test x isa DataFrame
+    @test size(x) == (6, 3)
+    @test names(x) == ["i", "j", "value"]
+    return
+end
+
+function test_dimension_data_sparse()
+    model = Model()
+    @variable(model, x[i in 1:4, j in 1:4; isodd(i + j)], container = DataFrame)
+    @test x isa DataFrame
+    @test size(x) == (8, 3)
+    @test x.i == [1, 1, 2, 2, 3, 3, 4, 4]
+    @test x.j == [2, 4, 1, 3, 2, 4, 1, 3]
+    @test names(x) == ["i", "j", "value"]
+    return
+end
+
+function test_dataframes_expression()
+    model = Model()
+    B = ["a", "b"]
+    @variable(model, x[i = 2:4, j = B], container = DataFrame)
+    @expression(
+        model,
+        expr[j = B],
+        sum(x[x.j .== j, :value]),
+        container = DataFrame,
+    )
+    @test expr isa DataFrame
+    @test expr.j == ["a", "b"]
+    expr2 = DataFrames.combine(
+        DataFrames.groupby(x, :j),
+        :value => sum => :value,
+    )
+    @test expr == expr2
+    return
+end
+
+function test_data_frames_missing_names()
+    model = Model()
+    x = @variable(model, [1:3, 1:2], container = DataFrame)
+    @test all(startswith.(names(x), ["##", "##", "value"]))
+    x = @variable(model, [i in 1:3, 1:2], container = DataFrame)
+    @test all(startswith.(names(x), ["i", "##", "value"]))
+    x = @variable(model, [1:3, j in 1:2], container = DataFrame)
+    @test all(startswith.(names(x), ["##", "j", "value"]))
+    return
+end
+
+end

From 08ab62979ab5fd23faf859967620cf35204146d2 Mon Sep 17 00:00:00 2001
From: odow <o.dowson@gmail.com>
Date: Tue, 10 Sep 2024 15:58:08 +1200
Subject: [PATCH 2/7] Update multi tutorial

---
 docs/src/tutorials/linear/multi.jl | 88 ++++++++++++++++++------------
 1 file changed, 53 insertions(+), 35 deletions(-)

diff --git a/docs/src/tutorials/linear/multi.jl b/docs/src/tutorials/linear/multi.jl
index 3916ac2cb34..b22d884de84 100644
--- a/docs/src/tutorials/linear/multi.jl
+++ b/docs/src/tutorials/linear/multi.jl
@@ -3,7 +3,7 @@
 # v.2.0. If a copy of the MPL was not distributed with this file, You can       #src
 # obtain one at https://mozilla.org/MPL/2.0/.                                   #src
 
-# # The multi-commodity flow problem
+# # Working with SQLite and DataFrames
 
 # **This tutorial was originally contributed by Louis Luangkesorn.**
 
@@ -23,10 +23,10 @@ using JuMP
 import DataFrames
 import HiGHS
 import SQLite
+import SQLite: DBInterface
 import Tables
 import Test
 
-const DBInterface = SQLite.DBInterface
 
 # ## Formulation
 
@@ -119,38 +119,62 @@ products =
 
 model = Model(HiGHS.Optimizer)
 set_silent(model)
-@variable(model, x[origins, destinations, products] >= 0)
+@variable(
+    model,
+    x[origin in origins, destination in destinations, product in products] >= 0,
+    container = DataFrames.DataFrame,
+)
 
 # One approach when working with databases is to extract all of the data into a
-# Julia datastructure. For example, let's pull the cost table into a DataFrame
-# and then construct our objective by iterating over the rows of the DataFrame:
+# Julia datastructure. For example, let's pull the cost table into a DataFrame:
 
 cost = DBInterface.execute(db, "SELECT * FROM cost") |> DataFrames.DataFrame
-@objective(
-    model,
-    Max,
-    sum(r.cost * x[r.origin, r.destination, r.product] for r in eachrow(cost)),
-);
 
-# If we don't want to use a DataFrame, we can use a `Tables.rowtable` instead:
+# and then join the decision variables:
 
-supply = DBInterface.execute(db, "SELECT * FROM supply") |> Tables.rowtable
-for r in supply
-    @constraint(model, sum(x[r.origin, :, r.product]) <= r.supply)
+function natural_join(left, right)
+    on_names = intersect(names(left), names(right))
+    return DataFrames.innerjoin(left, right; on = on_names)
 end
 
-# Another approach is to execute the query, and then to iterate through the rows
-# of the query using `Tables.rows`:
+cost_x = natural_join(cost, x)
+
+# We've defined a new function, `natural_join`, to simplify the process of
+# joining two DataFrames. This fuction acts like the `NATURAL JOIN` statment in
+# SQL.
+
+# Our objective is the inner product of two columns:
 
-demand = DBInterface.execute(db, "SELECT * FROM demand")
-for r in Tables.rows(demand)
-    @constraint(model, sum(x[:, r.destination, r.product]) == r.demand)
+@objective(model, Max, cost_x.cost' * cost_x.value);
+
+# The supply constraint is more complicated. A useful utility is a function that
+# sums the `.value` column after grouping on a set of columns:
+
+function sum_value_by(df, cols)
+    gdf = DataFrames.groupby(df, cols)
+    return DataFrames.combine(gdf, :value => sum => :value)
 end
 
-# !!! warning
-#     Iterating through the rows of a query result works by incrementing a
-#     cursor inside the database. As a consequence, you cannot call
-#     `Tables.rows` twice on the same query result.
+# Here is it in action:
+
+sum_value_by(x, [:origin, :product])
+
+# The constraint that the supply must be less than or equal to a capacity can
+# now be written as:
+
+supply = natural_join(
+    DBInterface.execute(db, "SELECT * FROM supply") |> DataFrames.DataFrame,
+    sum_value_by(x, [:origin, :product]),
+)
+@constraint(model, supply.value .<= supply.supply);
+
+# The demand constraint ca be written similarly:
+
+demand = natural_join(
+    DBInterface.execute(db, "SELECT * FROM demand") |> DataFrames.DataFrame,
+    sum_value_by(x, [:destination, :product]),
+)
+@constraint(model, demand.value .== demand.demand);
 
 # The SQLite queries can be arbitrarily complex. For example, here's a query
 # which builds every possible origin-destination pair:
@@ -164,13 +188,12 @@ od_pairs = DBInterface.execute(
     INNER JOIN locations b
     ON a.type = 'origin' AND b.type = 'destination'
     """,
-)
+) |> DataFrames.DataFrame
 
 # With a constraint that we cannot send more than 625 units between each pair:
 
-for r in Tables.rows(od_pairs)
-    @constraint(model, sum(x[r.origin, r.destination, :]) <= 625)
-end
+od = natural_join(od_pairs, sum_value_by(x, [:origin, :destination]))
+@constraint(model, od.value .<= 625);
 
 # ## Solution
 
@@ -181,12 +204,7 @@ Test.@test is_solved_and_feasible(model)
 Test.@test objective_value(model) == 225_700.0      #src
 solution_summary(model)
 
-# and print the solution:
+# and obtain the solution:
 
-begin
-    println("         ", join(products, ' '))
-    for o in origins, d in destinations
-        v = lpad.([round(Int, value(x[o, d, p])) for p in products], 5)
-        println(o, " ", d, " ", join(replace.(v, "   0" => "  . "), " "))
-    end
-end
+x.value = value.(x.value)
+x[x.value .> 0, :]

From 5065efc48c8515af0e07fc6a2770e1879e88c2a7 Mon Sep 17 00:00:00 2001
From: Oscar Dowson <odow@users.noreply.github.com>
Date: Tue, 10 Sep 2024 16:44:12 +1200
Subject: [PATCH 3/7] Update docs/src/tutorials/linear/multi.jl

---
 docs/src/tutorials/linear/multi.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/src/tutorials/linear/multi.jl b/docs/src/tutorials/linear/multi.jl
index b22d884de84..a935f0017a9 100644
--- a/docs/src/tutorials/linear/multi.jl
+++ b/docs/src/tutorials/linear/multi.jl
@@ -3,7 +3,7 @@
 # v.2.0. If a copy of the MPL was not distributed with this file, You can       #src
 # obtain one at https://mozilla.org/MPL/2.0/.                                   #src
 
-# # Working with SQLite and DataFrames
+# # The multi-commodity flow problem
 
 # **This tutorial was originally contributed by Louis Luangkesorn.**
 

From c980a0ed5a35a77f748bc1f0b2295c00062dfc65 Mon Sep 17 00:00:00 2001
From: odow <o.dowson@gmail.com>
Date: Tue, 10 Sep 2024 17:26:16 +1200
Subject: [PATCH 4/7] Fix formattig

---
 docs/src/tutorials/linear/multi.jl | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/docs/src/tutorials/linear/multi.jl b/docs/src/tutorials/linear/multi.jl
index a935f0017a9..07510655763 100644
--- a/docs/src/tutorials/linear/multi.jl
+++ b/docs/src/tutorials/linear/multi.jl
@@ -27,7 +27,6 @@ import SQLite: DBInterface
 import Tables
 import Test
 
-
 # ## Formulation
 
 # The multi-commondity flow problem is a simple extension of
@@ -179,16 +178,17 @@ demand = natural_join(
 # The SQLite queries can be arbitrarily complex. For example, here's a query
 # which builds every possible origin-destination pair:
 
-od_pairs = DBInterface.execute(
-    db,
-    """
-    SELECT a.location as 'origin',
-           b.location as 'destination'
-    FROM locations a
-    INNER JOIN locations b
-    ON a.type = 'origin' AND b.type = 'destination'
-    """,
-) |> DataFrames.DataFrame
+od_pairs =
+    DBInterface.execute(
+        db,
+        """
+        SELECT a.location as 'origin',
+            b.location as 'destination'
+        FROM locations a
+        INNER JOIN locations b
+        ON a.type = 'origin' AND b.type = 'destination'
+        """,
+    ) |> DataFrames.DataFrame
 
 # With a constraint that we cannot send more than 625 units between each pair:
 
@@ -207,4 +207,4 @@ solution_summary(model)
 # and obtain the solution:
 
 x.value = value.(x.value)
-x[x.value .> 0, :]
+x[x.value.>0, :]

From 601c697a02e1856d3361afc94f504bcaa034a246 Mon Sep 17 00:00:00 2001
From: Oscar Dowson <odow@users.noreply.github.com>
Date: Tue, 10 Sep 2024 21:09:01 +1200
Subject: [PATCH 5/7] Update docs/src/tutorials/linear/multi.jl

---
 docs/src/tutorials/linear/multi.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/src/tutorials/linear/multi.jl b/docs/src/tutorials/linear/multi.jl
index 07510655763..28c01c52cc1 100644
--- a/docs/src/tutorials/linear/multi.jl
+++ b/docs/src/tutorials/linear/multi.jl
@@ -139,7 +139,7 @@ end
 cost_x = natural_join(cost, x)
 
 # We've defined a new function, `natural_join`, to simplify the process of
-# joining two DataFrames. This fuction acts like the `NATURAL JOIN` statment in
+# joining two DataFrames. This function acts like the `NATURAL JOIN` statement in
 # SQL.
 
 # Our objective is the inner product of two columns:

From 1a709728d31fa92af19d86b3569f00182dea57b2 Mon Sep 17 00:00:00 2001
From: odow <o.dowson@gmail.com>
Date: Wed, 11 Sep 2024 10:31:07 +1200
Subject: [PATCH 6/7] Update

---
 docs/make.jl                      |   1 +
 docs/src/extensions/DataFrames.md | 153 ++++++++++++++++++++++++++++++
 2 files changed, 154 insertions(+)
 create mode 100644 docs/src/extensions/DataFrames.md

diff --git a/docs/make.jl b/docs/make.jl
index 54ea801b60b..19bcb291f17 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -195,6 +195,7 @@ for (solver, data) in TOML.parsefile(joinpath(@__DIR__, "packages.toml"))
 end
 push!(
     _LIST_OF_EXTENSIONS,
+    "JuliaData/DataFrames.jl" => "extensions/DataFrames.md",
     "rafaqz/DimensionalData.jl" => "extensions/DimensionalData.md",
 )
 
diff --git a/docs/src/extensions/DataFrames.md b/docs/src/extensions/DataFrames.md
new file mode 100644
index 00000000000..bbadd70d758
--- /dev/null
+++ b/docs/src/extensions/DataFrames.md
@@ -0,0 +1,153 @@
+# DataFrames.jl
+
+[DataFrames.jl](https://github.com/JuliaData/DataFrames.jl) provides tools for
+working with in-memory tabular data in Julia.
+
+!!! compat
+    Using the DataFrames extension with JuMP requires Julia v1.9 or later.
+
+The DataFrames extension in JuMP lets you construct a `DataFrames.DataFrame` as
+a container in the JuMP macros.
+
+## License
+
+DataFrames.jl is licensed under the [MIT license](https://github.com/JuliaData/DataFrames.jl/blob/main/LICENSE.md).
+
+## Installation
+
+Install DataFrames using `Pkg.add`:
+
+```julia
+import Pkg
+Pkg.add("DataFrames")
+```
+
+## Use with JuMP
+
+Activate the extension by loading both JuMP and DataFrames:
+
+```jldoctest ext_data_frames
+julia> using JuMP, DataFrames
+```
+
+Then, pass `container = DataFrames.DataFrame` in the [`@variable`](@ref),
+[`@constraint`](@ref), or [`@expression`](@ref) macros:
+
+```jldoctest ext_data_frames
+julia> model = Model();
+
+julia> @variable(
+           model,
+           x[i = 2:4, j = ["a", "b"]] >= i,
+           container = DataFrames.DataFrame,
+       )
+6×3 DataFrame
+ Row │ i      j       value
+     │ Int64  String  GenericV…
+─────┼──────────────────────────
+   1 │     2  a       x[2,a]
+   2 │     3  a       x[3,a]
+   3 │     4  a       x[4,a]
+   4 │     2  b       x[2,b]
+   5 │     3  b       x[3,b]
+   6 │     4  b       x[4,b]
+```
+
+Here `x` is a `DataFrames.DataFrame` array object, so operations use the
+DataFrames syntax:
+
+```jldoctest ext_data_frames
+julia> x[x.j .== "a", [:i, :value]]
+3×2 DataFrame
+ Row │ i      value
+     │ Int64  GenericV…
+─────┼──────────────────
+   1 │     2  x[2,a]
+   2 │     3  x[3,a]
+   3 │     4  x[4,a]
+
+julia> DataFrames.unstack(x, :i, :j, :value)
+3×3 DataFrame
+ Row │ i      a           b
+     │ Int64  GenericV…?  GenericV…?
+─────┼───────────────────────────────
+   1 │     2  x[2,a]      x[2,b]
+   2 │     3  x[3,a]      x[3,b]
+   3 │     4  x[4,a]      x[4,b]
+```
+
+You can use `container = DataFrames.DataFrame` in the [`@expression`](@ref)
+macro:
+
+```jldoctest ext_data_frames
+julia> @expression(
+           model,
+           expr[j = ["a", "b"]],
+           sum(x[x.j .== j, :value]),
+           container = DataFrames.DataFrame,
+       )
+2×2 DataFrame
+ Row │ j       value
+     │ String  AffExpr
+─────┼──────────────────────────────────
+   1 │ a       x[2,a] + x[3,a] + x[4,a]
+   2 │ b       x[2,b] + x[3,b] + x[4,b]
+```
+
+and in [`@constraint`](@ref):
+
+```jldoctest ext_data_frames
+julia> @constraint(
+           model,
+           [j = ["a", "b"]],
+           sum(x[x.j .== j, :value]) <= 1,
+           container = DataFrames.DataFrame,
+       )
+2×2 DataFrame
+ Row │ j       value
+     │ String  Constrai…
+─────┼──────────────────────────────────────
+   1 │ a       x[2,a] + x[3,a] + x[4,a] ≤ 1
+   2 │ b       x[2,b] + x[3,b] + x[4,b] ≤ 1
+```
+
+### DataFrame-native syntax
+
+While you can use indexing in JuMP's `@expression` and `@constraint` macros, it
+may be more convienent to use DataFrames.jl split-apply-combine framework. For
+example, `expr` can be equivalently written as:
+
+```jldoctest ext_data_frames
+julia> expr2 = model[:expr2] = DataFrames.combine(
+           DataFrames.groupby(x, :j),
+           :value => sum => :value,
+       )
+2×2 DataFrame
+ Row │ j       value
+     │ String  AffExpr
+─────┼──────────────────────────────────
+   1 │ a       x[2,a] + x[3,a] + x[4,a]
+   2 │ b       x[2,b] + x[3,b] + x[4,b]
+```
+
+and the constraint could be written as
+
+```jldoctest ext_data_frames
+julia> df_constraint(v) = @constraint(model, sum(v) <= 1);
+
+julia> DataFrames.combine(
+           DataFrames.groupby(x, :j),
+           :value => df_constraint => :value,
+       )
+2×2 DataFrame
+ Row │ j       value
+     │ String  Constrai…
+─────┼──────────────────────────────────────
+   1 │ a       x[2,a] + x[3,a] + x[4,a] ≤ 1
+   2 │ b       x[2,b] + x[3,b] + x[4,b] ≤ 1
+```
+
+## Documentation
+
+See the [DataFrames.jl documentation](https://dataframes.juliadata.org/stable/)
+for more details on the syntax and features of `DataFrames.DataFrame`.

From 890ede8af9f58fdd82fdbbde4783807e33708d76 Mon Sep 17 00:00:00 2001
From: Oscar Dowson <odow@users.noreply.github.com>
Date: Wed, 11 Sep 2024 11:40:40 +1200
Subject: [PATCH 7/7] Update docs/src/extensions/DataFrames.md

---
 docs/src/extensions/DataFrames.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/src/extensions/DataFrames.md b/docs/src/extensions/DataFrames.md
index bbadd70d758..eec366d8424 100644
--- a/docs/src/extensions/DataFrames.md
+++ b/docs/src/extensions/DataFrames.md
@@ -114,7 +114,7 @@ julia> @constraint(
 ### DataFrame-native syntax
 
 While you can use indexing in JuMP's `@expression` and `@constraint` macros, it
-may be more convienent to use DataFrames.jl split-apply-combine framework. For
+may be more convenient to use DataFrames.jl split-apply-combine framework. For
 example, `expr` can be equivalently written as:
 
 ```jldoctest ext_data_frames