diff --git a/Project.toml b/Project.toml index fd93219a7f6..12323a0df88 100644 --- a/Project.toml +++ b/Project.toml @@ -9,32 +9,36 @@ MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee" MutableArithmetics = "d8a4904e-b15c-11e9-3269-09a3773c0cb0" OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" -Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" +Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" [weakdeps] +DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0" [extensions] +JuMPDataFramesExt = "DataFrames" JuMPDimensionalDataExt = "DimensionalData" [compat] +DataFrames = "1" DimensionalData = "0.24, 0.25, 0.26.2, 0.27" LinearAlgebra = "<0.0.1, 1.6" MacroTools = "0.5" MathOptInterface = "1.25.2" MutableArithmetics = "1.1" OrderedCollections = "1" -Printf = "<0.0.1, 1.6" PrecompileTools = "1" +Printf = "<0.0.1, 1.6" SparseArrays = "<0.0.1, 1.6" Test = "<0.0.1, 1.6" julia = "1.6" [extras] +DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["DimensionalData", "Test"] +test = ["DataFrames", "DimensionalData", "Test"] diff --git a/docs/make.jl b/docs/make.jl index 54ea801b60b..19bcb291f17 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -195,6 +195,7 @@ for (solver, data) in TOML.parsefile(joinpath(@__DIR__, "packages.toml")) end push!( _LIST_OF_EXTENSIONS, + "JuliaData/DataFrames.jl" => "extensions/DataFrames.md", "rafaqz/DimensionalData.jl" => "extensions/DimensionalData.md", ) diff --git a/docs/src/extensions/DataFrames.md b/docs/src/extensions/DataFrames.md new file mode 100644 index 00000000000..eec366d8424 --- /dev/null +++ b/docs/src/extensions/DataFrames.md @@ -0,0 +1,153 @@ +# DataFrames.jl + +[DataFrames.jl](https://github.com/JuliaData/DataFrames.jl) provides tools for +working with in-memory tabular data in Julia. + +!!! compat + Using the DataFrames extension with JuMP requires Julia v1.9 or later. + +The DataFrames extension in JuMP lets you construct a `DataFrames.DataFrame` as +a container in the JuMP macros. + +## License + +DataFrames.jl is licensed under the [MIT license](https://github.com/JuliaData/DataFrames.jl/blob/main/LICENSE.md). + +## Installation + +Install DataFrames using `Pkg.add`: + +```julia +import Pkg +Pkg.add("DataFrames") +``` + +## Use with JuMP + +Activate the extension by loading both JuMP and DataFrames: + +```jldoctest ext_data_frames +julia> using JuMP, DataFrames +``` + +Then, pass `container = DataFrames.DataFrame` in the [`@variable`](@ref), +[`@constraint`](@ref), or [`@expression`](@ref) macros: + +```jldoctest ext_data_frames +julia> model = Model(); + +julia> @variable( + model, + x[i = 2:4, j = ["a", "b"]] >= i, + container = DataFrames.DataFrame, + ) +6×3 DataFrame + Row │ i j value + │ Int64 String GenericV… +─────┼────────────────────────── + 1 │ 2 a x[2,a] + 2 │ 3 a x[3,a] + 3 │ 4 a x[4,a] + 4 │ 2 b x[2,b] + 5 │ 3 b x[3,b] + 6 │ 4 b x[4,b] +``` + +Here `x` is a `DataFrames.DataFrame` array object, so operations use the +DataFrames syntax: + +```jldoctest ext_data_frames +julia> x[x.j .== "a", [:i, :value]] +3×2 DataFrame + Row │ i value + │ Int64 GenericV… +─────┼────────────────── + 1 │ 2 x[2,a] + 2 │ 3 x[3,a] + 3 │ 4 x[4,a] + +julia> DataFrames.unstack(x, :i, :j, :value) +3×3 DataFrame + Row │ i a b + │ Int64 GenericV…? GenericV…? +─────┼─────────────────────────────── + 1 │ 2 x[2,a] x[2,b] + 2 │ 3 x[3,a] x[3,b] + 3 │ 4 x[4,a] x[4,b] +``` + +You can use `container = DataFrames.DataFrame` in the [`@expression`](@ref) +macro: + +```jldoctest ext_data_frames +julia> @expression( + model, + expr[j = ["a", "b"]], + sum(x[x.j .== j, :value]), + container = DataFrames.DataFrame, + ) +2×2 DataFrame + Row │ j value + │ String AffExpr +─────┼────────────────────────────────── + 1 │ a x[2,a] + x[3,a] + x[4,a] + 2 │ b x[2,b] + x[3,b] + x[4,b] +``` + +and in [`@constraint`](@ref): + +```jldoctest ext_data_frames +julia> @constraint( + model, + [j = ["a", "b"]], + sum(x[x.j .== j, :value]) <= 1, + container = DataFrames.DataFrame, + ) +2×2 DataFrame + Row │ j value + │ String Constrai… +─────┼────────────────────────────────────── + 1 │ a x[2,a] + x[3,a] + x[4,a] ≤ 1 + 2 │ b x[2,b] + x[3,b] + x[4,b] ≤ 1 +``` + +### DataFrame-native syntax + +While you can use indexing in JuMP's `@expression` and `@constraint` macros, it +may be more convenient to use DataFrames.jl split-apply-combine framework. For +example, `expr` can be equivalently written as: + +```jldoctest ext_data_frames +julia> expr2 = model[:expr2] = DataFrames.combine( + DataFrames.groupby(x, :j), + :value => sum => :value, + ) +2×2 DataFrame + Row │ j value + │ String AffExpr +─────┼────────────────────────────────── + 1 │ a x[2,a] + x[3,a] + x[4,a] + 2 │ b x[2,b] + x[3,b] + x[4,b] +``` + +and the constraint could be written as + +```jldoctest ext_data_frames +julia> df_constraint(v) = @constraint(model, sum(v) <= 1); + +julia> DataFrames.combine( + DataFrames.groupby(x, :j), + :value => df_constraint => :value, + ) +2×2 DataFrame + Row │ j value + │ String Constrai… +─────┼────────────────────────────────────── + 1 │ a x[2,a] + x[3,a] + x[4,a] ≤ 1 + 2 │ b x[2,b] + x[3,b] + x[4,b] ≤ 1 +``` + +## Documentation + +See the [DataFrames.jl documentation](https://dataframes.juliadata.org/stable/) +for more details on the syntax and features of `DataFrames.DataFrame`. diff --git a/docs/src/tutorials/linear/multi.jl b/docs/src/tutorials/linear/multi.jl index 3916ac2cb34..28c01c52cc1 100644 --- a/docs/src/tutorials/linear/multi.jl +++ b/docs/src/tutorials/linear/multi.jl @@ -23,11 +23,10 @@ using JuMP import DataFrames import HiGHS import SQLite +import SQLite: DBInterface import Tables import Test -const DBInterface = SQLite.DBInterface - # ## Formulation # The multi-commondity flow problem is a simple extension of @@ -119,58 +118,82 @@ products = model = Model(HiGHS.Optimizer) set_silent(model) -@variable(model, x[origins, destinations, products] >= 0) +@variable( + model, + x[origin in origins, destination in destinations, product in products] >= 0, + container = DataFrames.DataFrame, +) # One approach when working with databases is to extract all of the data into a -# Julia datastructure. For example, let's pull the cost table into a DataFrame -# and then construct our objective by iterating over the rows of the DataFrame: +# Julia datastructure. For example, let's pull the cost table into a DataFrame: cost = DBInterface.execute(db, "SELECT * FROM cost") |> DataFrames.DataFrame -@objective( - model, - Max, - sum(r.cost * x[r.origin, r.destination, r.product] for r in eachrow(cost)), -); -# If we don't want to use a DataFrame, we can use a `Tables.rowtable` instead: +# and then join the decision variables: -supply = DBInterface.execute(db, "SELECT * FROM supply") |> Tables.rowtable -for r in supply - @constraint(model, sum(x[r.origin, :, r.product]) <= r.supply) +function natural_join(left, right) + on_names = intersect(names(left), names(right)) + return DataFrames.innerjoin(left, right; on = on_names) end -# Another approach is to execute the query, and then to iterate through the rows -# of the query using `Tables.rows`: +cost_x = natural_join(cost, x) + +# We've defined a new function, `natural_join`, to simplify the process of +# joining two DataFrames. This function acts like the `NATURAL JOIN` statement in +# SQL. -demand = DBInterface.execute(db, "SELECT * FROM demand") -for r in Tables.rows(demand) - @constraint(model, sum(x[:, r.destination, r.product]) == r.demand) +# Our objective is the inner product of two columns: + +@objective(model, Max, cost_x.cost' * cost_x.value); + +# The supply constraint is more complicated. A useful utility is a function that +# sums the `.value` column after grouping on a set of columns: + +function sum_value_by(df, cols) + gdf = DataFrames.groupby(df, cols) + return DataFrames.combine(gdf, :value => sum => :value) end -# !!! warning -# Iterating through the rows of a query result works by incrementing a -# cursor inside the database. As a consequence, you cannot call -# `Tables.rows` twice on the same query result. +# Here is it in action: + +sum_value_by(x, [:origin, :product]) + +# The constraint that the supply must be less than or equal to a capacity can +# now be written as: + +supply = natural_join( + DBInterface.execute(db, "SELECT * FROM supply") |> DataFrames.DataFrame, + sum_value_by(x, [:origin, :product]), +) +@constraint(model, supply.value .<= supply.supply); + +# The demand constraint ca be written similarly: + +demand = natural_join( + DBInterface.execute(db, "SELECT * FROM demand") |> DataFrames.DataFrame, + sum_value_by(x, [:destination, :product]), +) +@constraint(model, demand.value .== demand.demand); # The SQLite queries can be arbitrarily complex. For example, here's a query # which builds every possible origin-destination pair: -od_pairs = DBInterface.execute( - db, - """ - SELECT a.location as 'origin', - b.location as 'destination' - FROM locations a - INNER JOIN locations b - ON a.type = 'origin' AND b.type = 'destination' - """, -) +od_pairs = + DBInterface.execute( + db, + """ + SELECT a.location as 'origin', + b.location as 'destination' + FROM locations a + INNER JOIN locations b + ON a.type = 'origin' AND b.type = 'destination' + """, + ) |> DataFrames.DataFrame # With a constraint that we cannot send more than 625 units between each pair: -for r in Tables.rows(od_pairs) - @constraint(model, sum(x[r.origin, r.destination, :]) <= 625) -end +od = natural_join(od_pairs, sum_value_by(x, [:origin, :destination])) +@constraint(model, od.value .<= 625); # ## Solution @@ -181,12 +204,7 @@ Test.@test is_solved_and_feasible(model) Test.@test objective_value(model) == 225_700.0 #src solution_summary(model) -# and print the solution: +# and obtain the solution: -begin - println(" ", join(products, ' ')) - for o in origins, d in destinations - v = lpad.([round(Int, value(x[o, d, p])) for p in products], 5) - println(o, " ", d, " ", join(replace.(v, " 0" => " . "), " ")) - end -end +x.value = value.(x.value) +x[x.value.>0, :] diff --git a/ext/JuMPDataFramesExt.jl b/ext/JuMPDataFramesExt.jl new file mode 100644 index 00000000000..c64f7fba9a8 --- /dev/null +++ b/ext/JuMPDataFramesExt.jl @@ -0,0 +1,23 @@ +# Copyright 2017, Iain Dunning, Joey Huchette, Miles Lubin, and contributors +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. + +module JuMPDataFramesExt + +import DataFrames +import JuMP + +function JuMP.Containers.container( + f::Function, + indices, + ::Type{DataFrames.DataFrame}, + names::AbstractVector, +) + rows = vec(collect(indices)) + df = DataFrames.DataFrame(NamedTuple{tuple(names...)}(arg) for arg in rows) + df.value = [f(arg...) for arg in rows] + return df +end + +end #module diff --git a/ext/test_DataFrames.jl b/ext/test_DataFrames.jl new file mode 100644 index 00000000000..80aa83ceb1b --- /dev/null +++ b/ext/test_DataFrames.jl @@ -0,0 +1,83 @@ +# Copyright 2017, Iain Dunning, Joey Huchette, Miles Lubin, and contributors +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. + +module TestContainersDataFrames + +using Test + +using DataFrames +using JuMP + +function test_dimension_data_vector() + model = Model() + @variable(model, x[i = 2:4], container = DataFrame) + @test x isa DataFrame + @test size(x) == (3, 2) + @test names(x) == ["i", "value"] + return +end + +function test_dimension_data_matrix() + model = Model() + @variable(model, x[i = 2:4, j = ["a", "b"]], container = DataFrame) + @test x isa DataFrame + @test size(x) == (6, 3) + @test names(x) == ["i", "j", "value"] + @test sum(x[x.j .== "a", :value]) isa AffExpr + return +end + +function test_dimension_data_triangle() + model = Model() + @variable(model, x[i = 2:4, j in i:4], container = DataFrame) + @test x isa DataFrame + @test size(x) == (6, 3) + @test names(x) == ["i", "j", "value"] + return +end + +function test_dimension_data_sparse() + model = Model() + @variable(model, x[i in 1:4, j in 1:4; isodd(i + j)], container = DataFrame) + @test x isa DataFrame + @test size(x) == (8, 3) + @test x.i == [1, 1, 2, 2, 3, 3, 4, 4] + @test x.j == [2, 4, 1, 3, 2, 4, 1, 3] + @test names(x) == ["i", "j", "value"] + return +end + +function test_dataframes_expression() + model = Model() + B = ["a", "b"] + @variable(model, x[i = 2:4, j = B], container = DataFrame) + @expression( + model, + expr[j = B], + sum(x[x.j .== j, :value]), + container = DataFrame, + ) + @test expr isa DataFrame + @test expr.j == ["a", "b"] + expr2 = DataFrames.combine( + DataFrames.groupby(x, :j), + :value => sum => :value, + ) + @test expr == expr2 + return +end + +function test_data_frames_missing_names() + model = Model() + x = @variable(model, [1:3, 1:2], container = DataFrame) + @test all(startswith.(names(x), ["##", "##", "value"])) + x = @variable(model, [i in 1:3, 1:2], container = DataFrame) + @test all(startswith.(names(x), ["i", "##", "value"])) + x = @variable(model, [1:3, j in 1:2], container = DataFrame) + @test all(startswith.(names(x), ["##", "j", "value"])) + return +end + +end