From f75d8082b58a40c59c1fb042cf87a9fcf64df3d7 Mon Sep 17 00:00:00 2001 From: odow Date: Tue, 10 Sep 2024 14:13:57 +1200 Subject: [PATCH 1/7] Add DataFrames.jl extension --- Project.toml | 10 +++-- ext/JuMPDataFramesExt.jl | 23 +++++++++++ ext/test_DataFrames.jl | 83 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 113 insertions(+), 3 deletions(-) create mode 100644 ext/JuMPDataFramesExt.jl create mode 100644 ext/test_DataFrames.jl diff --git a/Project.toml b/Project.toml index fd93219a7f6..12323a0df88 100644 --- a/Project.toml +++ b/Project.toml @@ -9,32 +9,36 @@ MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee" MutableArithmetics = "d8a4904e-b15c-11e9-3269-09a3773c0cb0" OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" -Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" +Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" [weakdeps] +DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0" [extensions] +JuMPDataFramesExt = "DataFrames" JuMPDimensionalDataExt = "DimensionalData" [compat] +DataFrames = "1" DimensionalData = "0.24, 0.25, 0.26.2, 0.27" LinearAlgebra = "<0.0.1, 1.6" MacroTools = "0.5" MathOptInterface = "1.25.2" MutableArithmetics = "1.1" OrderedCollections = "1" -Printf = "<0.0.1, 1.6" PrecompileTools = "1" +Printf = "<0.0.1, 1.6" SparseArrays = "<0.0.1, 1.6" Test = "<0.0.1, 1.6" julia = "1.6" [extras] +DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["DimensionalData", "Test"] +test = ["DataFrames", "DimensionalData", "Test"] diff --git a/ext/JuMPDataFramesExt.jl b/ext/JuMPDataFramesExt.jl new file mode 100644 index 00000000000..c64f7fba9a8 --- /dev/null +++ b/ext/JuMPDataFramesExt.jl @@ -0,0 +1,23 @@ +# Copyright 2017, Iain Dunning, Joey Huchette, Miles Lubin, and contributors +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. + +module JuMPDataFramesExt + +import DataFrames +import JuMP + +function JuMP.Containers.container( + f::Function, + indices, + ::Type{DataFrames.DataFrame}, + names::AbstractVector, +) + rows = vec(collect(indices)) + df = DataFrames.DataFrame(NamedTuple{tuple(names...)}(arg) for arg in rows) + df.value = [f(arg...) for arg in rows] + return df +end + +end #module diff --git a/ext/test_DataFrames.jl b/ext/test_DataFrames.jl new file mode 100644 index 00000000000..80aa83ceb1b --- /dev/null +++ b/ext/test_DataFrames.jl @@ -0,0 +1,83 @@ +# Copyright 2017, Iain Dunning, Joey Huchette, Miles Lubin, and contributors +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. + +module TestContainersDataFrames + +using Test + +using DataFrames +using JuMP + +function test_dimension_data_vector() + model = Model() + @variable(model, x[i = 2:4], container = DataFrame) + @test x isa DataFrame + @test size(x) == (3, 2) + @test names(x) == ["i", "value"] + return +end + +function test_dimension_data_matrix() + model = Model() + @variable(model, x[i = 2:4, j = ["a", "b"]], container = DataFrame) + @test x isa DataFrame + @test size(x) == (6, 3) + @test names(x) == ["i", "j", "value"] + @test sum(x[x.j .== "a", :value]) isa AffExpr + return +end + +function test_dimension_data_triangle() + model = Model() + @variable(model, x[i = 2:4, j in i:4], container = DataFrame) + @test x isa DataFrame + @test size(x) == (6, 3) + @test names(x) == ["i", "j", "value"] + return +end + +function test_dimension_data_sparse() + model = Model() + @variable(model, x[i in 1:4, j in 1:4; isodd(i + j)], container = DataFrame) + @test x isa DataFrame + @test size(x) == (8, 3) + @test x.i == [1, 1, 2, 2, 3, 3, 4, 4] + @test x.j == [2, 4, 1, 3, 2, 4, 1, 3] + @test names(x) == ["i", "j", "value"] + return +end + +function test_dataframes_expression() + model = Model() + B = ["a", "b"] + @variable(model, x[i = 2:4, j = B], container = DataFrame) + @expression( + model, + expr[j = B], + sum(x[x.j .== j, :value]), + container = DataFrame, + ) + @test expr isa DataFrame + @test expr.j == ["a", "b"] + expr2 = DataFrames.combine( + DataFrames.groupby(x, :j), + :value => sum => :value, + ) + @test expr == expr2 + return +end + +function test_data_frames_missing_names() + model = Model() + x = @variable(model, [1:3, 1:2], container = DataFrame) + @test all(startswith.(names(x), ["##", "##", "value"])) + x = @variable(model, [i in 1:3, 1:2], container = DataFrame) + @test all(startswith.(names(x), ["i", "##", "value"])) + x = @variable(model, [1:3, j in 1:2], container = DataFrame) + @test all(startswith.(names(x), ["##", "j", "value"])) + return +end + +end From 08ab62979ab5fd23faf859967620cf35204146d2 Mon Sep 17 00:00:00 2001 From: odow Date: Tue, 10 Sep 2024 15:58:08 +1200 Subject: [PATCH 2/7] Update multi tutorial --- docs/src/tutorials/linear/multi.jl | 88 ++++++++++++++++++------------ 1 file changed, 53 insertions(+), 35 deletions(-) diff --git a/docs/src/tutorials/linear/multi.jl b/docs/src/tutorials/linear/multi.jl index 3916ac2cb34..b22d884de84 100644 --- a/docs/src/tutorials/linear/multi.jl +++ b/docs/src/tutorials/linear/multi.jl @@ -3,7 +3,7 @@ # v.2.0. If a copy of the MPL was not distributed with this file, You can #src # obtain one at https://mozilla.org/MPL/2.0/. #src -# # The multi-commodity flow problem +# # Working with SQLite and DataFrames # **This tutorial was originally contributed by Louis Luangkesorn.** @@ -23,10 +23,10 @@ using JuMP import DataFrames import HiGHS import SQLite +import SQLite: DBInterface import Tables import Test -const DBInterface = SQLite.DBInterface # ## Formulation @@ -119,38 +119,62 @@ products = model = Model(HiGHS.Optimizer) set_silent(model) -@variable(model, x[origins, destinations, products] >= 0) +@variable( + model, + x[origin in origins, destination in destinations, product in products] >= 0, + container = DataFrames.DataFrame, +) # One approach when working with databases is to extract all of the data into a -# Julia datastructure. For example, let's pull the cost table into a DataFrame -# and then construct our objective by iterating over the rows of the DataFrame: +# Julia datastructure. For example, let's pull the cost table into a DataFrame: cost = DBInterface.execute(db, "SELECT * FROM cost") |> DataFrames.DataFrame -@objective( - model, - Max, - sum(r.cost * x[r.origin, r.destination, r.product] for r in eachrow(cost)), -); -# If we don't want to use a DataFrame, we can use a `Tables.rowtable` instead: +# and then join the decision variables: -supply = DBInterface.execute(db, "SELECT * FROM supply") |> Tables.rowtable -for r in supply - @constraint(model, sum(x[r.origin, :, r.product]) <= r.supply) +function natural_join(left, right) + on_names = intersect(names(left), names(right)) + return DataFrames.innerjoin(left, right; on = on_names) end -# Another approach is to execute the query, and then to iterate through the rows -# of the query using `Tables.rows`: +cost_x = natural_join(cost, x) + +# We've defined a new function, `natural_join`, to simplify the process of +# joining two DataFrames. This fuction acts like the `NATURAL JOIN` statment in +# SQL. + +# Our objective is the inner product of two columns: -demand = DBInterface.execute(db, "SELECT * FROM demand") -for r in Tables.rows(demand) - @constraint(model, sum(x[:, r.destination, r.product]) == r.demand) +@objective(model, Max, cost_x.cost' * cost_x.value); + +# The supply constraint is more complicated. A useful utility is a function that +# sums the `.value` column after grouping on a set of columns: + +function sum_value_by(df, cols) + gdf = DataFrames.groupby(df, cols) + return DataFrames.combine(gdf, :value => sum => :value) end -# !!! warning -# Iterating through the rows of a query result works by incrementing a -# cursor inside the database. As a consequence, you cannot call -# `Tables.rows` twice on the same query result. +# Here is it in action: + +sum_value_by(x, [:origin, :product]) + +# The constraint that the supply must be less than or equal to a capacity can +# now be written as: + +supply = natural_join( + DBInterface.execute(db, "SELECT * FROM supply") |> DataFrames.DataFrame, + sum_value_by(x, [:origin, :product]), +) +@constraint(model, supply.value .<= supply.supply); + +# The demand constraint ca be written similarly: + +demand = natural_join( + DBInterface.execute(db, "SELECT * FROM demand") |> DataFrames.DataFrame, + sum_value_by(x, [:destination, :product]), +) +@constraint(model, demand.value .== demand.demand); # The SQLite queries can be arbitrarily complex. For example, here's a query # which builds every possible origin-destination pair: @@ -164,13 +188,12 @@ od_pairs = DBInterface.execute( INNER JOIN locations b ON a.type = 'origin' AND b.type = 'destination' """, -) +) |> DataFrames.DataFrame # With a constraint that we cannot send more than 625 units between each pair: -for r in Tables.rows(od_pairs) - @constraint(model, sum(x[r.origin, r.destination, :]) <= 625) -end +od = natural_join(od_pairs, sum_value_by(x, [:origin, :destination])) +@constraint(model, od.value .<= 625); # ## Solution @@ -181,12 +204,7 @@ Test.@test is_solved_and_feasible(model) Test.@test objective_value(model) == 225_700.0 #src solution_summary(model) -# and print the solution: +# and obtain the solution: -begin - println(" ", join(products, ' ')) - for o in origins, d in destinations - v = lpad.([round(Int, value(x[o, d, p])) for p in products], 5) - println(o, " ", d, " ", join(replace.(v, " 0" => " . "), " ")) - end -end +x.value = value.(x.value) +x[x.value .> 0, :] From 5065efc48c8515af0e07fc6a2770e1879e88c2a7 Mon Sep 17 00:00:00 2001 From: Oscar Dowson Date: Tue, 10 Sep 2024 16:44:12 +1200 Subject: [PATCH 3/7] Update docs/src/tutorials/linear/multi.jl --- docs/src/tutorials/linear/multi.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/tutorials/linear/multi.jl b/docs/src/tutorials/linear/multi.jl index b22d884de84..a935f0017a9 100644 --- a/docs/src/tutorials/linear/multi.jl +++ b/docs/src/tutorials/linear/multi.jl @@ -3,7 +3,7 @@ # v.2.0. If a copy of the MPL was not distributed with this file, You can #src # obtain one at https://mozilla.org/MPL/2.0/. #src -# # Working with SQLite and DataFrames +# # The multi-commodity flow problem # **This tutorial was originally contributed by Louis Luangkesorn.** From c980a0ed5a35a77f748bc1f0b2295c00062dfc65 Mon Sep 17 00:00:00 2001 From: odow Date: Tue, 10 Sep 2024 17:26:16 +1200 Subject: [PATCH 4/7] Fix formattig --- docs/src/tutorials/linear/multi.jl | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/src/tutorials/linear/multi.jl b/docs/src/tutorials/linear/multi.jl index a935f0017a9..07510655763 100644 --- a/docs/src/tutorials/linear/multi.jl +++ b/docs/src/tutorials/linear/multi.jl @@ -27,7 +27,6 @@ import SQLite: DBInterface import Tables import Test - # ## Formulation # The multi-commondity flow problem is a simple extension of @@ -179,16 +178,17 @@ demand = natural_join( # The SQLite queries can be arbitrarily complex. For example, here's a query # which builds every possible origin-destination pair: -od_pairs = DBInterface.execute( - db, - """ - SELECT a.location as 'origin', - b.location as 'destination' - FROM locations a - INNER JOIN locations b - ON a.type = 'origin' AND b.type = 'destination' - """, -) |> DataFrames.DataFrame +od_pairs = + DBInterface.execute( + db, + """ + SELECT a.location as 'origin', + b.location as 'destination' + FROM locations a + INNER JOIN locations b + ON a.type = 'origin' AND b.type = 'destination' + """, + ) |> DataFrames.DataFrame # With a constraint that we cannot send more than 625 units between each pair: @@ -207,4 +207,4 @@ solution_summary(model) # and obtain the solution: x.value = value.(x.value) -x[x.value .> 0, :] +x[x.value.>0, :] From 601c697a02e1856d3361afc94f504bcaa034a246 Mon Sep 17 00:00:00 2001 From: Oscar Dowson Date: Tue, 10 Sep 2024 21:09:01 +1200 Subject: [PATCH 5/7] Update docs/src/tutorials/linear/multi.jl --- docs/src/tutorials/linear/multi.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/tutorials/linear/multi.jl b/docs/src/tutorials/linear/multi.jl index 07510655763..28c01c52cc1 100644 --- a/docs/src/tutorials/linear/multi.jl +++ b/docs/src/tutorials/linear/multi.jl @@ -139,7 +139,7 @@ end cost_x = natural_join(cost, x) # We've defined a new function, `natural_join`, to simplify the process of -# joining two DataFrames. This fuction acts like the `NATURAL JOIN` statment in +# joining two DataFrames. This function acts like the `NATURAL JOIN` statement in # SQL. # Our objective is the inner product of two columns: From 1a709728d31fa92af19d86b3569f00182dea57b2 Mon Sep 17 00:00:00 2001 From: odow Date: Wed, 11 Sep 2024 10:31:07 +1200 Subject: [PATCH 6/7] Update --- docs/make.jl | 1 + docs/src/extensions/DataFrames.md | 153 ++++++++++++++++++++++++++++++ 2 files changed, 154 insertions(+) create mode 100644 docs/src/extensions/DataFrames.md diff --git a/docs/make.jl b/docs/make.jl index 54ea801b60b..19bcb291f17 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -195,6 +195,7 @@ for (solver, data) in TOML.parsefile(joinpath(@__DIR__, "packages.toml")) end push!( _LIST_OF_EXTENSIONS, + "JuliaData/DataFrames.jl" => "extensions/DataFrames.md", "rafaqz/DimensionalData.jl" => "extensions/DimensionalData.md", ) diff --git a/docs/src/extensions/DataFrames.md b/docs/src/extensions/DataFrames.md new file mode 100644 index 00000000000..bbadd70d758 --- /dev/null +++ b/docs/src/extensions/DataFrames.md @@ -0,0 +1,153 @@ +# DataFrames.jl + +[DataFrames.jl](https://github.com/JuliaData/DataFrames.jl) provides tools for +working with in-memory tabular data in Julia. + +!!! compat + Using the DataFrames extension with JuMP requires Julia v1.9 or later. + +The DataFrames extension in JuMP lets you construct a `DataFrames.DataFrame` as +a container in the JuMP macros. + +## License + +DataFrames.jl is licensed under the [MIT license](https://github.com/JuliaData/DataFrames.jl/blob/main/LICENSE.md). + +## Installation + +Install DataFrames using `Pkg.add`: + +```julia +import Pkg +Pkg.add("DataFrames") +``` + +## Use with JuMP + +Activate the extension by loading both JuMP and DataFrames: + +```jldoctest ext_data_frames +julia> using JuMP, DataFrames +``` + +Then, pass `container = DataFrames.DataFrame` in the [`@variable`](@ref), +[`@constraint`](@ref), or [`@expression`](@ref) macros: + +```jldoctest ext_data_frames +julia> model = Model(); + +julia> @variable( + model, + x[i = 2:4, j = ["a", "b"]] >= i, + container = DataFrames.DataFrame, + ) +6×3 DataFrame + Row │ i j value + │ Int64 String GenericV… +─────┼────────────────────────── + 1 │ 2 a x[2,a] + 2 │ 3 a x[3,a] + 3 │ 4 a x[4,a] + 4 │ 2 b x[2,b] + 5 │ 3 b x[3,b] + 6 │ 4 b x[4,b] +``` + +Here `x` is a `DataFrames.DataFrame` array object, so operations use the +DataFrames syntax: + +```jldoctest ext_data_frames +julia> x[x.j .== "a", [:i, :value]] +3×2 DataFrame + Row │ i value + │ Int64 GenericV… +─────┼────────────────── + 1 │ 2 x[2,a] + 2 │ 3 x[3,a] + 3 │ 4 x[4,a] + +julia> DataFrames.unstack(x, :i, :j, :value) +3×3 DataFrame + Row │ i a b + │ Int64 GenericV…? GenericV…? +─────┼─────────────────────────────── + 1 │ 2 x[2,a] x[2,b] + 2 │ 3 x[3,a] x[3,b] + 3 │ 4 x[4,a] x[4,b] +``` + +You can use `container = DataFrames.DataFrame` in the [`@expression`](@ref) +macro: + +```jldoctest ext_data_frames +julia> @expression( + model, + expr[j = ["a", "b"]], + sum(x[x.j .== j, :value]), + container = DataFrames.DataFrame, + ) +2×2 DataFrame + Row │ j value + │ String AffExpr +─────┼────────────────────────────────── + 1 │ a x[2,a] + x[3,a] + x[4,a] + 2 │ b x[2,b] + x[3,b] + x[4,b] +``` + +and in [`@constraint`](@ref): + +```jldoctest ext_data_frames +julia> @constraint( + model, + [j = ["a", "b"]], + sum(x[x.j .== j, :value]) <= 1, + container = DataFrames.DataFrame, + ) +2×2 DataFrame + Row │ j value + │ String Constrai… +─────┼────────────────────────────────────── + 1 │ a x[2,a] + x[3,a] + x[4,a] ≤ 1 + 2 │ b x[2,b] + x[3,b] + x[4,b] ≤ 1 +``` + +### DataFrame-native syntax + +While you can use indexing in JuMP's `@expression` and `@constraint` macros, it +may be more convienent to use DataFrames.jl split-apply-combine framework. For +example, `expr` can be equivalently written as: + +```jldoctest ext_data_frames +julia> expr2 = model[:expr2] = DataFrames.combine( + DataFrames.groupby(x, :j), + :value => sum => :value, + ) +2×2 DataFrame + Row │ j value + │ String AffExpr +─────┼────────────────────────────────── + 1 │ a x[2,a] + x[3,a] + x[4,a] + 2 │ b x[2,b] + x[3,b] + x[4,b] +``` + +and the constraint could be written as + +```jldoctest ext_data_frames +julia> df_constraint(v) = @constraint(model, sum(v) <= 1); + +julia> DataFrames.combine( + DataFrames.groupby(x, :j), + :value => df_constraint => :value, + ) +2×2 DataFrame + Row │ j value + │ String Constrai… +─────┼────────────────────────────────────── + 1 │ a x[2,a] + x[3,a] + x[4,a] ≤ 1 + 2 │ b x[2,b] + x[3,b] + x[4,b] ≤ 1 +``` + +## Documentation + +See the [DataFrames.jl documentation](https://dataframes.juliadata.org/stable/) +for more details on the syntax and features of `DataFrames.DataFrame`. From 890ede8af9f58fdd82fdbbde4783807e33708d76 Mon Sep 17 00:00:00 2001 From: Oscar Dowson Date: Wed, 11 Sep 2024 11:40:40 +1200 Subject: [PATCH 7/7] Update docs/src/extensions/DataFrames.md --- docs/src/extensions/DataFrames.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/extensions/DataFrames.md b/docs/src/extensions/DataFrames.md index bbadd70d758..eec366d8424 100644 --- a/docs/src/extensions/DataFrames.md +++ b/docs/src/extensions/DataFrames.md @@ -114,7 +114,7 @@ julia> @constraint( ### DataFrame-native syntax While you can use indexing in JuMP's `@expression` and `@constraint` macros, it -may be more convienent to use DataFrames.jl split-apply-combine framework. For +may be more convenient to use DataFrames.jl split-apply-combine framework. For example, `expr` can be equivalently written as: ```jldoctest ext_data_frames