jump-dev · odow · Sep 10, 2024 · Sep 10, 2024 · Sep 10, 2024 · Sep 10, 2024
diff --git a/Project.toml b/Project.toml
@@ -9,32 +9,36 @@ MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
 MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee"
 MutableArithmetics = "d8a4904e-b15c-11e9-3269-09a3773c0cb0"
 OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
-Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
+Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 
 [weakdeps]
+DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0"
 
 [extensions]
+JuMPDataFramesExt = "DataFrames"
 JuMPDimensionalDataExt = "DimensionalData"
 
 [compat]
+DataFrames = "1"
 DimensionalData = "0.24, 0.25, 0.26.2, 0.27"
 LinearAlgebra = "<0.0.1, 1.6"
 MacroTools = "0.5"
 MathOptInterface = "1.25.2"
 MutableArithmetics = "1.1"
 OrderedCollections = "1"
-Printf = "<0.0.1, 1.6"
 PrecompileTools = "1"
+Printf = "<0.0.1, 1.6"
 SparseArrays = "<0.0.1, 1.6"
 Test = "<0.0.1, 1.6"
 julia = "1.6"
 
 [extras]
+DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["DimensionalData", "Test"]
+test = ["DataFrames", "DimensionalData", "Test"]
diff --git a/docs/src/tutorials/linear/multi.jl b/docs/src/tutorials/linear/multi.jl
@@ -23,11 +23,10 @@ using JuMP
 import DataFrames
 import HiGHS
 import SQLite
+import SQLite: DBInterface
 import Tables
 import Test
 
-const DBInterface = SQLite.DBInterface
-
 # ## Formulation
 
 # The multi-commondity flow problem is a simple extension of
@@ -119,58 +118,82 @@ products =
 
 model = Model(HiGHS.Optimizer)
 set_silent(model)
-@variable(model, x[origins, destinations, products] >= 0)
+@variable(
+ model,
+ x[origin in origins, destination in destinations, product in products] >= 0,
+ container = DataFrames.DataFrame,
+)
 
 # One approach when working with databases is to extract all of the data into a
-# Julia datastructure. For example, let's pull the cost table into a DataFrame
-# and then construct our objective by iterating over the rows of the DataFrame:
+# Julia datastructure. For example, let's pull the cost table into a DataFrame:
 
 cost = DBInterface.execute(db, "SELECT * FROM cost") |> DataFrames.DataFrame
-@objective(
- model,
- Max,
- sum(r.cost * x[r.origin, r.destination, r.product] for r in eachrow(cost)),
-);
 
-# If we don't want to use a DataFrame, we can use a `Tables.rowtable` instead:
+# and then join the decision variables:
 
-supply = DBInterface.execute(db, "SELECT * FROM supply") |> Tables.rowtable
-for r in supply
- @constraint(model, sum(x[r.origin, :, r.product]) <= r.supply)
+function natural_join(left, right)
+ on_names = intersect(names(left), names(right))
+ return DataFrames.innerjoin(left, right; on = on_names)
 end
 
-# Another approach is to execute the query, and then to iterate through the rows
-# of the query using `Tables.rows`:
+cost_x = natural_join(cost, x)
+
+# We've defined a new function, `natural_join`, to simplify the process of
+# joining two DataFrames. This fuction acts like the `NATURAL JOIN` statment in
+# SQL.
 
-demand = DBInterface.execute(db, "SELECT * FROM demand")
-for r in Tables.rows(demand)
- @constraint(model, sum(x[:, r.destination, r.product]) == r.demand)
+# Our objective is the inner product of two columns:
+
+@objective(model, Max, cost_x.cost' * cost_x.value);
+
+# The supply constraint is more complicated. A useful utility is a function that
+# sums the `.value` column after grouping on a set of columns:
+
+function sum_value_by(df, cols)
+ gdf = DataFrames.groupby(df, cols)
+ return DataFrames.combine(gdf, :value => sum => :value)
 end
 
-# !!! warning
-# Iterating through the rows of a query result works by incrementing a
-# cursor inside the database. As a consequence, you cannot call
-# `Tables.rows` twice on the same query result.
+# Here is it in action:
+
+sum_value_by(x, [:origin, :product])
+
+# The constraint that the supply must be less than or equal to a capacity can
+# now be written as:
+
+supply = natural_join(
+ DBInterface.execute(db, "SELECT * FROM supply") |> DataFrames.DataFrame,
+ sum_value_by(x, [:origin, :product]),
+)
+@constraint(model, supply.value .<= supply.supply);
+
+# The demand constraint ca be written similarly:
+
+demand = natural_join(
+ DBInterface.execute(db, "SELECT * FROM demand") |> DataFrames.DataFrame,
+ sum_value_by(x, [:destination, :product]),
+)
+@constraint(model, demand.value .== demand.demand);
 
 # The SQLite queries can be arbitrarily complex. For example, here's a query
 # which builds every possible origin-destination pair:
 
-od_pairs = DBInterface.execute(
- db,
- """
- SELECT a.location as 'origin',
- b.location as 'destination'
- FROM locations a
- INNER JOIN locations b
- ON a.type = 'origin' AND b.type = 'destination'
- """,
-)
+od_pairs =
+ DBInterface.execute(
+ db,
+ """
+ SELECT a.location as 'origin',
+ b.location as 'destination'
+ FROM locations a
+ INNER JOIN locations b
+ ON a.type = 'origin' AND b.type = 'destination'
+ """,
+ ) |> DataFrames.DataFrame
 
 # With a constraint that we cannot send more than 625 units between each pair:
 
-for r in Tables.rows(od_pairs)
- @constraint(model, sum(x[r.origin, r.destination, :]) <= 625)
-end
+od = natural_join(od_pairs, sum_value_by(x, [:origin, :destination]))
+@constraint(model, od.value .<= 625);
 
 # ## Solution
 
@@ -181,12 +204,7 @@ Test.@test is_solved_and_feasible(model)
 Test.@test objective_value(model) == 225_700.0 #src
 solution_summary(model)
 
-# and print the solution:
+# and obtain the solution:
 
-begin
- println(" ", join(products, ' '))
- for o in origins, d in destinations
- v = lpad.([round(Int, value(x[o, d, p])) for p in products], 5)
- println(o, " ", d, " ", join(replace.(v, " 0" => " . "), " "))
- end
-end
+x.value = value.(x.value)
+x[x.value.>0, :]
diff --git a/ext/JuMPDataFramesExt.jl b/ext/JuMPDataFramesExt.jl
@@ -0,0 +1,23 @@
+# Copyright 2017, Iain Dunning, Joey Huchette, Miles Lubin, and contributors
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+module JuMPDataFramesExt
+
+import DataFrames
+import JuMP
+
+function JuMP.Containers.container(
+ f::Function,
+ indices,
+ ::Type{DataFrames.DataFrame},
+ names::AbstractVector,
+)
+ rows = vec(collect(indices))
+ df = DataFrames.DataFrame(NamedTuple{tuple(names...)}(arg) for arg in rows)
+ df.value = [f(arg...) for arg in rows]
+ return df
+end
+
+end #module
diff --git a/ext/test_DataFrames.jl b/ext/test_DataFrames.jl
@@ -0,0 +1,83 @@
+# Copyright 2017, Iain Dunning, Joey Huchette, Miles Lubin, and contributors
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+module TestContainersDataFrames
+
+using Test
+
+using DataFrames
+using JuMP
+
+function test_dimension_data_vector()
+ model = Model()
+ @variable(model, x[i = 2:4], container = DataFrame)
+ @test x isa DataFrame
+ @test size(x) == (3, 2)
+ @test names(x) == ["i", "value"]
+ return
+end
+
+function test_dimension_data_matrix()
+ model = Model()
+ @variable(model, x[i = 2:4, j = ["a", "b"]], container = DataFrame)
+ @test x isa DataFrame
+ @test size(x) == (6, 3)
+ @test names(x) == ["i", "j", "value"]
+ @test sum(x[x.j .== "a", :value]) isa AffExpr
+ return
+end
+
+function test_dimension_data_triangle()
+ model = Model()
+ @variable(model, x[i = 2:4, j in i:4], container = DataFrame)
+ @test x isa DataFrame
+ @test size(x) == (6, 3)
+ @test names(x) == ["i", "j", "value"]
+ return
+end
+
+function test_dimension_data_sparse()
+ model = Model()
+ @variable(model, x[i in 1:4, j in 1:4; isodd(i + j)], container = DataFrame)
+ @test x isa DataFrame
+ @test size(x) == (8, 3)
+ @test x.i == [1, 1, 2, 2, 3, 3, 4, 4]
+ @test x.j == [2, 4, 1, 3, 2, 4, 1, 3]
+ @test names(x) == ["i", "j", "value"]
+ return
+end
+
+function test_dataframes_expression()
+ model = Model()
+ B = ["a", "b"]
+ @variable(model, x[i = 2:4, j = B], container = DataFrame)
+ @expression(
+ model,
+ expr[j = B],
+ sum(x[x.j .== j, :value]),
+ container = DataFrame,
+ )
+ @test expr isa DataFrame
+ @test expr.j == ["a", "b"]
+ expr2 = DataFrames.combine(
+ DataFrames.groupby(x, :j),
+ :value => sum => :value,
+ )
+ @test expr == expr2
+ return
+end
+
+function test_data_frames_missing_names()
+ model = Model()
+ x = @variable(model, [1:3, 1:2], container = DataFrame)
+ @test all(startswith.(names(x), ["##", "##", "value"]))
+ x = @variable(model, [i in 1:3, 1:2], container = DataFrame)
+ @test all(startswith.(names(x), ["i", "##", "value"]))
+ x = @variable(model, [1:3, j in 1:2], container = DataFrame)
+ @test all(startswith.(names(x), ["##", "j", "value"]))
+ return
+end
+
+end