Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add DataFrames.jl extension #3821

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,32 +9,36 @@ MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee"
MutableArithmetics = "d8a4904e-b15c-11e9-3269-09a3773c0cb0"
OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"

[weakdeps]
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0"

[extensions]
JuMPDataFramesExt = "DataFrames"
JuMPDimensionalDataExt = "DimensionalData"

[compat]
DataFrames = "1"
DimensionalData = "0.24, 0.25, 0.26.2, 0.27"
LinearAlgebra = "<0.0.1, 1.6"
MacroTools = "0.5"
MathOptInterface = "1.25.2"
MutableArithmetics = "1.1"
OrderedCollections = "1"
Printf = "<0.0.1, 1.6"
PrecompileTools = "1"
Printf = "<0.0.1, 1.6"
SparseArrays = "<0.0.1, 1.6"
Test = "<0.0.1, 1.6"
julia = "1.6"

[extras]
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["DimensionalData", "Test"]
test = ["DataFrames", "DimensionalData", "Test"]
106 changes: 62 additions & 44 deletions docs/src/tutorials/linear/multi.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,10 @@ using JuMP
import DataFrames
import HiGHS
import SQLite
import SQLite: DBInterface
import Tables
import Test

const DBInterface = SQLite.DBInterface

# ## Formulation

# The multi-commondity flow problem is a simple extension of
Expand Down Expand Up @@ -119,58 +118,82 @@ products =

model = Model(HiGHS.Optimizer)
set_silent(model)
@variable(model, x[origins, destinations, products] >= 0)
@variable(
model,
x[origin in origins, destination in destinations, product in products] >= 0,
container = DataFrames.DataFrame,
)

# One approach when working with databases is to extract all of the data into a
# Julia datastructure. For example, let's pull the cost table into a DataFrame
# and then construct our objective by iterating over the rows of the DataFrame:
# Julia datastructure. For example, let's pull the cost table into a DataFrame:

cost = DBInterface.execute(db, "SELECT * FROM cost") |> DataFrames.DataFrame
@objective(
model,
Max,
sum(r.cost * x[r.origin, r.destination, r.product] for r in eachrow(cost)),
);

# If we don't want to use a DataFrame, we can use a `Tables.rowtable` instead:
# and then join the decision variables:

supply = DBInterface.execute(db, "SELECT * FROM supply") |> Tables.rowtable
for r in supply
@constraint(model, sum(x[r.origin, :, r.product]) <= r.supply)
function natural_join(left, right)
on_names = intersect(names(left), names(right))
return DataFrames.innerjoin(left, right; on = on_names)
end

# Another approach is to execute the query, and then to iterate through the rows
# of the query using `Tables.rows`:
cost_x = natural_join(cost, x)

# We've defined a new function, `natural_join`, to simplify the process of
# joining two DataFrames. This fuction acts like the `NATURAL JOIN` statment in
odow marked this conversation as resolved.
Show resolved Hide resolved
# SQL.

demand = DBInterface.execute(db, "SELECT * FROM demand")
for r in Tables.rows(demand)
@constraint(model, sum(x[:, r.destination, r.product]) == r.demand)
# Our objective is the inner product of two columns:

@objective(model, Max, cost_x.cost' * cost_x.value);

# The supply constraint is more complicated. A useful utility is a function that
# sums the `.value` column after grouping on a set of columns:

function sum_value_by(df, cols)
gdf = DataFrames.groupby(df, cols)
return DataFrames.combine(gdf, :value => sum => :value)
end

# !!! warning
# Iterating through the rows of a query result works by incrementing a
# cursor inside the database. As a consequence, you cannot call
# `Tables.rows` twice on the same query result.
# Here is it in action:

sum_value_by(x, [:origin, :product])

# The constraint that the supply must be less than or equal to a capacity can
# now be written as:

supply = natural_join(
DBInterface.execute(db, "SELECT * FROM supply") |> DataFrames.DataFrame,
sum_value_by(x, [:origin, :product]),
)
@constraint(model, supply.value .<= supply.supply);

# The demand constraint ca be written similarly:

demand = natural_join(
DBInterface.execute(db, "SELECT * FROM demand") |> DataFrames.DataFrame,
sum_value_by(x, [:destination, :product]),
)
@constraint(model, demand.value .== demand.demand);

# The SQLite queries can be arbitrarily complex. For example, here's a query
# which builds every possible origin-destination pair:

od_pairs = DBInterface.execute(
db,
"""
SELECT a.location as 'origin',
b.location as 'destination'
FROM locations a
INNER JOIN locations b
ON a.type = 'origin' AND b.type = 'destination'
""",
)
od_pairs =
DBInterface.execute(
db,
"""
SELECT a.location as 'origin',
b.location as 'destination'
FROM locations a
INNER JOIN locations b
ON a.type = 'origin' AND b.type = 'destination'
""",
) |> DataFrames.DataFrame

# With a constraint that we cannot send more than 625 units between each pair:

for r in Tables.rows(od_pairs)
@constraint(model, sum(x[r.origin, r.destination, :]) <= 625)
end
od = natural_join(od_pairs, sum_value_by(x, [:origin, :destination]))
@constraint(model, od.value .<= 625);

# ## Solution

Expand All @@ -181,12 +204,7 @@ Test.@test is_solved_and_feasible(model)
Test.@test objective_value(model) == 225_700.0 #src
solution_summary(model)

# and print the solution:
# and obtain the solution:

begin
println(" ", join(products, ' '))
for o in origins, d in destinations
v = lpad.([round(Int, value(x[o, d, p])) for p in products], 5)
println(o, " ", d, " ", join(replace.(v, " 0" => " . "), " "))
end
end
x.value = value.(x.value)
x[x.value.>0, :]
23 changes: 23 additions & 0 deletions ext/JuMPDataFramesExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Copyright 2017, Iain Dunning, Joey Huchette, Miles Lubin, and contributors
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at https://mozilla.org/MPL/2.0/.

module JuMPDataFramesExt

import DataFrames
import JuMP

function JuMP.Containers.container(
f::Function,
indices,
::Type{DataFrames.DataFrame},
names::AbstractVector,
)
rows = vec(collect(indices))
df = DataFrames.DataFrame(NamedTuple{tuple(names...)}(arg) for arg in rows)
df.value = [f(arg...) for arg in rows]
return df
end

end #module
83 changes: 83 additions & 0 deletions ext/test_DataFrames.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
# Copyright 2017, Iain Dunning, Joey Huchette, Miles Lubin, and contributors
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at https://mozilla.org/MPL/2.0/.

module TestContainersDataFrames

using Test

using DataFrames
using JuMP

function test_dimension_data_vector()
model = Model()
@variable(model, x[i = 2:4], container = DataFrame)
@test x isa DataFrame
@test size(x) == (3, 2)
@test names(x) == ["i", "value"]
return
end

function test_dimension_data_matrix()
model = Model()
@variable(model, x[i = 2:4, j = ["a", "b"]], container = DataFrame)
@test x isa DataFrame
@test size(x) == (6, 3)
@test names(x) == ["i", "j", "value"]
@test sum(x[x.j .== "a", :value]) isa AffExpr
return
end

function test_dimension_data_triangle()
model = Model()
@variable(model, x[i = 2:4, j in i:4], container = DataFrame)
@test x isa DataFrame
@test size(x) == (6, 3)
@test names(x) == ["i", "j", "value"]
return
end

function test_dimension_data_sparse()
model = Model()
@variable(model, x[i in 1:4, j in 1:4; isodd(i + j)], container = DataFrame)
@test x isa DataFrame
@test size(x) == (8, 3)
@test x.i == [1, 1, 2, 2, 3, 3, 4, 4]
@test x.j == [2, 4, 1, 3, 2, 4, 1, 3]
@test names(x) == ["i", "j", "value"]
return
end

function test_dataframes_expression()
model = Model()
B = ["a", "b"]
@variable(model, x[i = 2:4, j = B], container = DataFrame)
@expression(
model,
expr[j = B],
sum(x[x.j .== j, :value]),
container = DataFrame,
)
@test expr isa DataFrame
@test expr.j == ["a", "b"]
expr2 = DataFrames.combine(
DataFrames.groupby(x, :j),
:value => sum => :value,
)
@test expr == expr2
return
end

function test_data_frames_missing_names()
model = Model()
x = @variable(model, [1:3, 1:2], container = DataFrame)
@test all(startswith.(names(x), ["##", "##", "value"]))
x = @variable(model, [i in 1:3, 1:2], container = DataFrame)
@test all(startswith.(names(x), ["i", "##", "value"]))
x = @variable(model, [1:3, j in 1:2], container = DataFrame)
@test all(startswith.(names(x), ["##", "j", "value"]))
return
end

end
Loading