diff --git a/Project.toml b/Project.toml index f04e1dd..b8414a8 100644 --- a/Project.toml +++ b/Project.toml @@ -15,10 +15,12 @@ KLU = "ef3ab10e-7fda-4108-b977-705223b18434" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee" OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" +PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Requires = "ae029012-a4dd-5104-9daa-d747884805df" +SCIP = "82193955-e24f-5292-bf16-6f2c5261a85f" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" @@ -30,7 +32,6 @@ HDF5 = "0.16" HiGHS = "1" JLD2 = "0.4" JSON = "0.21" -julia = "1" JuMP = "1" KLU = "0.4" MathOptInterface = "1" @@ -39,3 +40,4 @@ PyCall = "1" Requires = "1" Statistics = "1" TimerOutputs = "0.5" +julia = "1" diff --git a/src/Cuts/tableau/gmi_dual.jl b/src/Cuts/tableau/gmi_dual.jl index 96406b7..2c375c4 100644 --- a/src/Cuts/tableau/gmi_dual.jl +++ b/src/Cuts/tableau/gmi_dual.jl @@ -442,34 +442,69 @@ function _dualgmi_features(h5_filename, extractor) end function _dualgmi_generate(train_h5, model) - data = ProblemData(model) - data_s, transforms = convert_to_standard_form(data) - all_cuts = nothing - visited = Set() - for h5_filename in train_h5 - h5 = H5File(h5_filename) - cut_basis_vars = h5.get_array("cuts_basis_vars") - cut_basis_sizes = h5.get_array("cuts_basis_sizes") - cut_rows = h5.get_array("cuts_rows") - h5.close() - current_basis = nothing - for (r, row) in enumerate(cut_rows) - if r == 1 || cut_basis_vars[r, :] != cut_basis_vars[r-1, :] - vbb, vnn, cbb, cnn = cut_basis_sizes[r, :] - current_basis = Basis(; - var_basic = cut_basis_vars[r, 1:vbb], - var_nonbasic = cut_basis_vars[r, vbb+1:vbb+vnn], - constr_basic = cut_basis_vars[r, vbb+vnn+1:vbb+vnn+cbb], - constr_nonbasic = cut_basis_vars[r, vbb+vnn+cbb+1:vbb+vnn+cbb+cnn], - ) + @timeit "Read problem data" begin + data = ProblemData(model) + end + @timeit "Convert to standard form" begin + data_s, transforms = convert_to_standard_form(data) + end + + @timeit "Collect cuts from H5 files" begin + cut_basis_vars = nothing + cut_basis_sizes = nothing + cut_rows = nothing + for h5_filename in train_h5 + h5 = H5File(h5_filename) + cut_basis_vars_sample = h5.get_array("cuts_basis_vars") + cut_basis_sizes_sample = h5.get_array("cuts_basis_sizes") + cut_rows_sample = h5.get_array("cuts_rows") + if cut_basis_vars === nothing + cut_basis_vars = cut_basis_vars_sample + cut_basis_sizes = cut_basis_sizes_sample + cut_rows = cut_rows_sample + else + cut_basis_vars = [cut_basis_vars; cut_basis_vars_sample] + cut_basis_sizes = [cut_basis_sizes; cut_basis_sizes_sample] + cut_rows = [cut_rows; cut_rows_sample] + end + h5.close() + end + ncuts, nvars = size(cut_basis_vars) + end + + @timeit "Group cuts by tableau basis" begin + vars_to_unique_basis_offset = Dict() + unique_basis_vars = Matrix{Int}(undef, 0, nvars) + unique_basis_sizes = Matrix{Int}(undef, 0, 4) + unique_basis_rows = Dict{Int,Set{Int}}() + for i in 1:ncuts + vars = cut_basis_vars[i, :] + sizes = cut_basis_sizes[i, :] + row = cut_rows[i] + if vars ∉ keys(vars_to_unique_basis_offset) + offset = size(unique_basis_vars)[1] + 1 + vars_to_unique_basis_offset[vars] = offset + unique_basis_vars = [unique_basis_vars; vars'] + unique_basis_sizes = [unique_basis_sizes; sizes'] + unique_basis_rows[offset] = Set() end + offset = vars_to_unique_basis_offset[vars] + push!(unique_basis_rows[offset], row) + end + end - # Detect and skip duplicated cuts - cut_id = (row, cut_basis_vars[r, :]) - cut_id ∉ visited || continue - push!(visited, cut_id) + @timeit "Compute tableaus and cuts" begin + all_cuts = nothing + for (offset, rows) in unique_basis_rows + vbb, vnn, cbb, cnn = unique_basis_sizes[offset, :] + current_basis = Basis(; + var_basic = unique_basis_vars[offset, 1:vbb], + var_nonbasic = unique_basis_vars[offset, vbb+1:vbb+vnn], + constr_basic = unique_basis_vars[offset, vbb+vnn+1:vbb+vnn+cbb], + constr_nonbasic = unique_basis_vars[offset, vbb+vnn+cbb+1:vbb+vnn+cbb+cnn], + ) - tableau = compute_tableau(data_s, current_basis, rows = [row]) + tableau = compute_tableau(data_s, current_basis; rows=collect(rows)) cuts_s = compute_gmi(data_s, tableau) cuts = backwards(transforms, cuts_s) @@ -509,38 +544,51 @@ end function KnnDualGmiComponent_before_mip(data::_KnnDualGmiData, test_h5, model, _) - x = _dualgmi_features(test_h5, data.extractor) - x = reshape(x, 1, length(x)) - neigh_dist, neigh_ind = data.model.kneighbors(x, return_distance = true) - neigh_ind = neigh_ind .+ 1 - N = length(neigh_dist) - - if data.strategy == "near" - selected = collect(1:(data.k)) - elseif data.strategy == "far" - selected = collect((N - data.k + 1) : N) - elseif data.strategy == "rand" - selected = shuffle(collect(1:N))[1:(data.k)] - else - error("unknown strategy: $(data.strategy)") + reset_timer!() + + @timeit "Extract features" begin + x = _dualgmi_features(test_h5, data.extractor) + x = reshape(x, 1, length(x)) end - @info "Dual GMI: Selected neighbors ($(data.strategy)):" - neigh_dist = neigh_dist[selected] - neigh_ind = neigh_ind[selected] - for i in 1:data.k - h5_filename = data.train_h5[neigh_ind[i]] - dist = neigh_dist[i] - @info " $(h5_filename) dist=$(dist)" + @timeit "Find neighbors" begin + neigh_dist, neigh_ind = data.model.kneighbors(x, return_distance = true) + neigh_ind = neigh_ind .+ 1 + N = length(neigh_dist) + + if data.strategy == "near" + selected = collect(1:(data.k)) + elseif data.strategy == "far" + selected = collect((N - data.k + 1) : N) + elseif data.strategy == "rand" + selected = shuffle(collect(1:N))[1:(data.k)] + else + error("unknown strategy: $(data.strategy)") + end + + @info "Dual GMI: Selected neighbors ($(data.strategy)):" + neigh_dist = neigh_dist[selected] + neigh_ind = neigh_ind[selected] + for i in 1:data.k + h5_filename = data.train_h5[neigh_ind[i]] + dist = neigh_dist[i] + @info " $(h5_filename) dist=$(dist)" + end end @info "Dual GMI: Generating cuts..." - time_generate = @elapsed begin - cuts = _dualgmi_generate(data.train_h5[neigh_ind], model) + @timeit "Generate cuts" begin + time_generate = @elapsed begin + cuts = _dualgmi_generate(data.train_h5[neigh_ind], model) + end + @info "Dual GMI: Generated $(length(cuts.lb)) unique cuts in $(time_generate) seconds" end - @info "Dual GMI: Generated $(length(cuts.lb)) unique cuts in $(time_generate) seconds" - _dualgmi_set_callback(model, cuts) + @timeit "Set callback" begin + _dualgmi_set_callback(model, cuts) + end + + print_timer() stats = Dict() stats["KnnDualGmi: k"] = data.k @@ -567,10 +615,11 @@ function __init_gmi_dual__() KnnDualGmiComponent_fit(self.data, train_h5) end function before_mip(self, test_h5, model, stats) - return KnnDualGmiComponent_before_mip(self.data, test_h5, model.inner, stats) + return @time KnnDualGmiComponent_before_mip(self.data, test_h5, model.inner, stats) end end copy!(KnnDualGmiComponent, Class2) end export collect_gmi_dual, expert_gmi_dual, ExpertDualGmiComponent, KnnDualGmiComponent + diff --git a/src/MIPLearn.jl b/src/MIPLearn.jl index 5034474..702e06b 100644 --- a/src/MIPLearn.jl +++ b/src/MIPLearn.jl @@ -6,6 +6,8 @@ module MIPLearn using PyCall using SparseArrays +using PrecompileTools: @setup_workload, @compile_workload + include("collectors.jl") include("components.jl") @@ -32,4 +34,51 @@ end include("BB/BB.jl") include("Cuts/Cuts.jl") +# Precompilation +# ============================================================================= + +function __precompile_cuts__() + function build_model(mps_filename) + model = read_from_file(mps_filename) + set_optimizer(model, SCIP.Optimizer) + return JumpModel(model) + end + BASEDIR = dirname(@__FILE__) + mps_filename = "$BASEDIR/../test/fixtures/bell5.mps.gz" + h5_filename = "$BASEDIR/../test/fixtures/bell5.h5" + collect_gmi_dual( + mps_filename; + optimizer=HiGHS.Optimizer, + max_rounds = 10, + max_cuts_per_round = 500, + ) + knn = KnnDualGmiComponent( + extractor = H5FieldsExtractor(instance_fields = ["static_var_obj_coeffs"]), + k = 2, + ) + knn.fit([h5_filename, h5_filename]) + solver = LearningSolver( + components = [ + ExpertPrimalComponent(action = SetWarmStart()), + knn, + ], + skip_lp = true, + ) + solver.optimize(mps_filename, build_model) +end + +@setup_workload begin + using SCIP + using HiGHS + using MIPLearn.Cuts + using PrecompileTools: @setup_workload, @compile_workload + + __init__() + Cuts.__init__() + + @compile_workload begin + __precompile_cuts__() + end +end + end # module diff --git a/test/fixtures/bell5.h5 b/test/fixtures/bell5.h5 index 541b0a6..a52c61d 100644 Binary files a/test/fixtures/bell5.h5 and b/test/fixtures/bell5.h5 differ diff --git a/test/src/Cuts/tableau/test_gmi_dual.jl b/test/src/Cuts/tableau/test_gmi_dual.jl index 8452be8..2b9bcf2 100644 --- a/test/src/Cuts/tableau/test_gmi_dual.jl +++ b/test/src/Cuts/tableau/test_gmi_dual.jl @@ -4,6 +4,7 @@ using SCIP using HiGHS +using MIPLearn.Cuts function test_cuts_tableau_gmi_dual_collect() mps_filename = "$BASEDIR/../fixtures/bell5.mps.gz" @@ -31,15 +32,15 @@ function test_cuts_tableau_gmi_dual_usage() mps_filename = "$BASEDIR/../fixtures/bell5.mps.gz" h5_filename = "$BASEDIR/../fixtures/bell5.h5" - # rm(h5_filename, force=true) + rm(h5_filename, force=true) - # # Run basic collector - # bc = BasicCollector(write_mps = false, skip_lp = true) - # bc.collect([mps_filename], build_model) + # Run basic collector + bc = BasicCollector(write_mps = false, skip_lp = true) + bc.collect([mps_filename], build_model) - # # Run dual GMI collector - # @info "Running dual GMI collector..." - # collect_gmi_dual(mps_filename, optimizer = HiGHS.Optimizer) + # Run dual GMI collector + @info "Running dual GMI collector..." + collect_gmi_dual(mps_filename, optimizer = HiGHS.Optimizer) # # Test expert component # solver = LearningSolver( @@ -65,6 +66,5 @@ function test_cuts_tableau_gmi_dual_usage() skip_lp = true, ) solver.optimize(mps_filename, build_model) - return end