Files
MIPLearn.jl/src/Cuts/tableau/gmi_dual.jl

1294 lines
48 KiB
Julia
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
# Copyright (C) 2020-2023, UChicago Argonne, LLC. All rights reserved.
# Released under the modified BSD license. See COPYING.md for more details.
using Printf
using JuMP
using HiGHS
using Random
using DataStructures
using Statistics
import ..H5FieldsExtractor
global ExpertDualGmiComponent = PyNULL()
global KnnDualGmiComponent = PyNULL()
Base.@kwdef mutable struct _KnnDualGmiData
k = nothing
extractor = nothing
train_h5 = nothing
model = nothing
strategy = nothing
end
function collect_gmi_dual(
mps_filename;
optimizer,
max_rounds = 10,
max_cuts_per_round = 1_000_000,
time_limit = 3_600,
)
reset_timer!()
initial_time = time()
@timeit "Read H5" begin
h5_filename = replace(mps_filename, ".mps.gz" => ".h5")
h5 = H5File(h5_filename, "r")
sol_opt_dict = Dict(
zip(
h5.get_array("static_var_names"),
convert(Array{Float64}, h5.get_array("mip_var_values")),
),
)
obj_mip = h5.get_scalar("mip_obj_value")
h5.file.close()
end
# Define relative MIP gap
gap(v) = 100 * abs(obj_mip - v) / abs(obj_mip)
@timeit "Initialize" begin
stats_obj = []
stats_gap = []
stats_ncuts = []
original_basis = nothing
all_cuts = nothing
all_cuts_bases = nothing
all_cuts_rows = nothing
last_round_obj = nothing
end
@timeit "Read problem" begin
model = read_from_file(mps_filename)
set_optimizer(model, optimizer)
obj_original = objective_function(model)
end
for round = 1:max_rounds
@info "Round $(round)..."
@timeit "Convert model to standard form" begin
# Extract problem data
data = ProblemData(model)
# Construct optimal solution vector (with correct variable sequence)
sol_opt = [sol_opt_dict[n] for n in data.var_names]
# Assert optimal solution is feasible for the original problem
assert_leq(data.constr_lb, data.constr_lhs * sol_opt)
assert_leq(data.constr_lhs * sol_opt, data.constr_ub)
# Convert to standard form
data_s, transforms = convert_to_standard_form(data)
model_s = to_model(data_s)
set_optimizer(model_s, optimizer)
relax_integrality(model_s)
# Convert optimal solution to standard form
sol_opt_s = forward(transforms, sol_opt)
# Assert converted solution is feasible for standard form problem
assert_eq(data_s.constr_lhs * sol_opt_s, data_s.constr_lb)
end
@timeit "Optimize standard model" begin
@info "Optimizing standard model..."
optimize!(model_s)
obj = objective_value(model_s)
if round == 1
push!(stats_obj, obj)
push!(stats_gap, gap(obj))
push!(stats_ncuts, 0)
else
if obj last_round_obj
@info ("No improvement in obj value. Aborting.")
break
end
end
if termination_status(model_s) != MOI.OPTIMAL
error("Non-optimal termination status")
end
last_round_obj = obj
end
@timeit "Select tableau rows" begin
basis = get_basis(model_s)
if round == 1
original_basis = basis
end
sol_frac = get_x(model_s)
selected_rows =
select_gmi_rows(data_s, basis, sol_frac, max_rows = max_cuts_per_round)
end
@timeit "Compute tableau rows" begin
tableau = compute_tableau(data_s, basis, x = sol_frac, rows = selected_rows)
# Assert tableau rows have been computed correctly
assert_eq(tableau.lhs * sol_frac, tableau.rhs, atol=1e-3)
assert_eq(tableau.lhs * sol_opt_s, tableau.rhs, atol=1e-3)
end
@timeit "Compute GMI cuts" begin
cuts_s = compute_gmi(data_s, tableau)
# Assert cuts have been generated correctly
assert_cuts_off(cuts_s, sol_frac)
assert_does_not_cut_off(cuts_s, sol_opt_s)
# Abort if no cuts are left
if length(cuts_s.lb) == 0
@info "No cuts generated. Aborting."
break
else
@info "Generated $(length(cuts_s.lb)) cuts"
end
end
@timeit "Add GMI cuts to original model" begin
@timeit "Convert to original form" begin
cuts = backwards(transforms, cuts_s)
end
@timeit "Prepare bv" begin
bv = repeat([basis], length(selected_rows))
end
@timeit "Append matrices" begin
if round == 1
all_cuts = cuts
all_cuts_bases = bv
all_cuts_rows = selected_rows
else
all_cuts.lhs = [all_cuts.lhs; cuts.lhs]
all_cuts.lb = [all_cuts.lb; cuts.lb]
all_cuts.ub = [all_cuts.ub; cuts.ub]
all_cuts_bases = [all_cuts_bases; bv]
all_cuts_rows = [all_cuts_rows; selected_rows]
end
end
@timeit "Add to model" begin
@info "Adding $(length(all_cuts.lb)) constraints to original model"
constrs, gmi_exps = add_constraint_set_dual_v2(model, all_cuts)
end
end
@timeit "Optimize original model" begin
set_objective_function(model, obj_original)
undo_relax = relax_integrality(model)
@info "Optimizing original model (constr)..."
optimize!(model)
obj = objective_value(model)
push!(stats_obj, obj)
push!(stats_gap, gap(obj))
sp = [shadow_price(c) for c in constrs]
undo_relax()
useful = [abs(sp[i]) > 1e-6 for (i, _) in enumerate(constrs)]
keep = findall(useful .== true)
end
@timeit "Filter out useless cuts" begin
@info "Keeping $(length(keep)) useful cuts"
all_cuts.lhs = all_cuts.lhs[keep, :]
all_cuts.lb = all_cuts.lb[keep]
all_cuts.ub = all_cuts.ub[keep]
all_cuts_bases = all_cuts_bases[keep, :]
all_cuts_rows = all_cuts_rows[keep, :]
push!(stats_ncuts, length(all_cuts_rows))
if isempty(keep)
break
end
end
@timeit "Update obj function of original model" begin
delete.(model, constrs)
set_objective_function(
model,
obj_original -
sum(sp[i] * gmi_exps[i] for (i, c) in enumerate(constrs) if useful[i]),
)
end
elapsed_time = time() - initial_time
if elapsed_time > time_limit
@info "Time limit exceeded. Stopping."
break
end
end
@timeit "Store cuts in H5 file" begin
if all_cuts !== nothing
ncuts = length(all_cuts_rows)
total =
length(original_basis.var_basic) +
length(original_basis.var_nonbasic) +
length(original_basis.constr_basic) +
length(original_basis.constr_nonbasic)
all_cuts_basis_sizes = Array{Int64,2}(undef, ncuts, 4)
all_cuts_basis_vars = Array{Int64,2}(undef, ncuts, total)
for i = 1:ncuts
vb = all_cuts_bases[i].var_basic
vn = all_cuts_bases[i].var_nonbasic
cb = all_cuts_bases[i].constr_basic
cn = all_cuts_bases[i].constr_nonbasic
all_cuts_basis_sizes[i, :] = [length(vb) length(vn) length(cb) length(cn)]
all_cuts_basis_vars[i, :] = [vb' vn' cb' cn']
end
@info "Storing $(length(all_cuts.ub)) GMI cuts..."
h5 = H5File(h5_filename)
h5.put_sparse("cuts_lhs", all_cuts.lhs)
h5.put_array("cuts_lb", all_cuts.lb)
h5.put_array("cuts_ub", all_cuts.ub)
h5.put_array("cuts_basis_vars", all_cuts_basis_vars)
h5.put_array("cuts_basis_sizes", all_cuts_basis_sizes)
h5.put_array("cuts_rows", all_cuts_rows)
h5.file.close()
end
end
to = TimerOutputs.get_defaulttimer()
stats_time = TimerOutputs.tottime(to) / 1e9
print_timer()
return OrderedDict(
"instance" => mps_filename,
"max_rounds" => max_rounds,
"rounds" => length(stats_obj) - 1,
"obj_mip" => obj_mip,
"stats_obj" => stats_obj,
"stats_gap" => stats_gap,
"stats_ncuts" => stats_ncuts,
"stats_time" => stats_time,
)
end
# TODO:
# blp-ic98
# neos-3627168-kasai
function collect_gmi_FisSal2011(
mps_filename;
interval_print_sec = 1,
max_cuts_per_round = 1_000_000,
max_pool_size_mb = 1024,
optimizer,
silent_solver = true,
time_limit = 300,
variant = :miplearn,
verify_cuts = true,
)
variant in [:subg, :hybr, :fast, :faster, :miplearn] || error("unknown variant: $variant")
if variant == :subg
max_rounds = 10_000
interval_large_lp = 10_000
interval_read_tableau = 10
elseif variant == :hybr
max_rounds = 10_000
interval_large_lp = 1_000
interval_read_tableau = 10
elseif variant == :fast
max_rounds = 1_000
interval_large_lp = 100
interval_read_tableau = 1
elseif variant == :faster
max_rounds = 500
interval_large_lp = 50
interval_read_tableau = 1
elseif variant == :miplearn
max_rounds = 1_000_000
interval_large_lp = 100
interval_read_tableau = 1
end
gapcl_best_patience = 2 * interval_large_lp + 5
reset_timer!()
initial_time = time()
@timeit "Read H5" begin
h5_filename = replace(mps_filename, ".mps.gz" => ".h5")
h5 = H5File(h5_filename, "r")
sol_opt_dict = Dict(
zip(
h5.get_array("static_var_names"),
convert(Array{Float64}, h5.get_array("mip_var_values")),
),
)
obj_mip = h5.get_scalar("mip_obj_value")
h5.file.close()
end
@timeit "Initialize" begin
count_backtrack = 0
count_deterioration = 0
gapcl_best = 0
gapcl_best_history = CircularBuffer{Float64}(gapcl_best_patience)
gapcl_curr = 0
last_print_time = 0
multipliers_best = Float64[]
multipliers_curr = Float64[]
obj_best = nothing
obj_curr = nothing
obj_hist = CircularBuffer{Float64}(100)
obj_initial = nothing
pool = nothing
pool_cut_age = nothing
pool_cut_hashes = Set{UInt64}()
pool_size_mb = 0
tableau_density::Float32 = 0.05
basis_cache = nothing
λ, Δ = 0, 0
μ = 10
basis_vars_to_id = Dict()
basis_id_to_vars = Dict{Int, Vector{Int}}()
basis_id_to_sizes = Dict{Int, Vector{Int}}()
next_basis_id = 1
cut_basis_id = Int[]
cut_row = Int[]
end
gapcl(v) = 100 * (v - obj_initial) / (obj_mip - obj_initial)
@timeit "Read problem" begin
model = read_from_file(mps_filename)
set_optimizer(model, optimizer)
end
@timeit "Convert model to standard form" begin
# Extract problem data
data = ProblemData(model)
# Construct optimal solution vector (with correct variable sequence)
sol_opt = [sol_opt_dict[n] for n in data.var_names]
# Assert optimal solution is feasible for the original problem
assert_leq(data.constr_lb, data.constr_lhs * sol_opt)
assert_leq(data.constr_lhs * sol_opt, data.constr_ub)
for (var_idx, var_type) in enumerate(data.var_types)
if var_type in ['B', 'I']
assert_int(sol_opt[var_idx])
end
end
# Convert to standard form
data_s, transforms = convert_to_standard_form(data)
model_s = to_model(data_s)
if silent_solver
set_silent(model_s)
end
vars_s = all_variables(model_s)
orig_obj_s = objective_function(model_s)
set_optimizer(model_s, optimizer)
relax_integrality(model_s)
# Convert optimal solution to standard form
sol_opt_s = forward(transforms, sol_opt)
# Assert converted solution is feasible for standard form problem
for (var_idx, var_type) in enumerate(data_s.var_types)
if var_type in ['B', 'I']
assert_int(sol_opt_s[var_idx])
end
end
assert_eq(data_s.constr_lhs * sol_opt_s, data_s.constr_lb)
end
@info "Standard form model has $(length(data_s.var_lb)) vars, $(length(data_s.constr_lb)) constrs"
for round = 1:max_rounds
log_prefix = ' '
log_should_print = false
is_last_iteration = false
if round == max_rounds
is_last_iteration = true
end
elapsed_time = time() - initial_time
if elapsed_time > time_limit
@info "Time limit exceeded. Stopping after current iteration."
is_last_iteration = true
end
if round > 1
@timeit "Build Lagrangian term" begin
@timeit "mul" begin
active_idx = findall(multipliers_curr .> 1e-6)
v = sparse(pool.lhs[:, active_idx] * multipliers_curr[active_idx])
end
@timeit "dot" begin
lagr_term = AffExpr(dot(multipliers_curr, pool.lb))
end
@timeit "add_to_expression!" begin
for offset in 1:nnz(v)
var_idx = v.nzind[offset]
add_to_expression!(
lagr_term,
vars_s[var_idx],
- v.nzval[offset],
)
end
end
end
@timeit "Update objective" begin
set_objective_function(
model_s,
orig_obj_s + lagr_term,
)
end
end
@timeit "Optimize LP (lagrangian)" begin
basis_cache === nothing || set_basis(model_s, basis_cache)
set_silent(model_s)
optimize!(model_s)
basis_cache = get_basis(model_s)
status = termination_status(model_s)
if status == MOI.DUAL_INFEASIBLE
@warn "LP is unbounded (dual infeasible). Resetting to best known multipliers."
copy!(multipliers_curr, multipliers_best)
obj_curr = obj_best
continue
elseif status != MOI.OPTIMAL
error("Non-optimal termination status: $status")
end
sol_frac = get_x(model_s)
obj_curr = objective_value(model_s)
end
@timeit "Update history and μ" begin
push!(obj_hist, obj_curr)
if obj_best === nothing || obj_curr > obj_best
log_prefix = '*'
obj_best = obj_curr
copy!(multipliers_best, multipliers_curr)
end
if round == 1
obj_initial = obj_curr
end
gapcl_curr = gapcl(obj_curr)
gapcl_best = gapcl(obj_best)
push!(gapcl_best_history, gapcl_best)
if variant in [:subg, :hybr]
Δ = obj_mip - obj_best
if obj_curr < obj_best - Δ
count_deterioration += 1
else
count_deterioration = 0
end
if count_deterioration >= 10
μ *= 0.5
copy!(multipliers_curr, multipliers_best)
count_deterioration = 0
count_backtrack += 1
elseif length(obj_hist) >= 100
obj_hist_avg = mean(obj_hist)
improv = obj_best - obj_hist[1]
if improv < 0.01 * Δ
if obj_best - obj_hist_avg < 0.001 * Δ
μ = 10 * μ
elseif obj_best - obj_hist_avg < 0.01 * Δ
μ = 2 * μ
else
μ = 0.5 * μ
end
end
end
elseif variant in [:fast, :faster, :miplearn]
μ = 0.01
else
error("not implemented")
end
end
if mod(round - 1, interval_read_tableau) == 0
@timeit "Get basis" begin
basis = get_basis(model_s)
end
@timeit "Select tableau rows" begin
selected_rows =
select_gmi_rows(data_s, basis, sol_frac, max_rows = max_cuts_per_round)
end
@timeit "Compute tableau rows" begin
tableau = compute_tableau(data_s, basis, x = sol_frac, rows = selected_rows, estimated_density=tableau_density * 1.05)
tableau_density = nnz(tableau.lhs) / length(tableau.lhs)
assert_eq(tableau.lhs * sol_frac, tableau.rhs, atol=1e-3)
assert_eq(tableau.lhs * sol_opt_s, tableau.rhs, atol=1e-3)
end
@timeit "Compute GMI cuts" begin
cuts_s = compute_gmi(data_s, tableau)
end
@timeit "Check cut validity" begin
assert_cuts_off(cuts_s, sol_frac)
assert_does_not_cut_off(cuts_s, sol_opt_s)
ncuts = length(cuts_s.lb)
end
@timeit "Add new cuts to the pool" begin
@timeit "Compute cut hashses" begin
unique_indices = Int[]
for i in 1:ncuts
cut_hash = cuts_s.hash[i]
if !(cut_hash in pool_cut_hashes)
push!(pool_cut_hashes, cut_hash)
push!(unique_indices, i)
end
end
end
@timeit "Append unique cuts" begin
@timeit "Track basis" begin
vb = basis.var_basic
vn = basis.var_nonbasic
cb = basis.constr_basic
cn = basis.constr_nonbasic
basis_vars = [vb; vn; cb; cn]
basis_sizes = [length(vb), length(vn), length(cb), length(cn)]
if basis_vars keys(basis_vars_to_id)
basis_id = next_basis_id
basis_vars_to_id[basis_vars] = basis_id
basis_id_to_vars[basis_id] = basis_vars
basis_id_to_sizes[basis_id] = basis_sizes
next_basis_id += 1
else
basis_id = basis_vars_to_id[basis_vars]
end
end
if round == 1
pool = ConstraintSet(
lhs = sparse(cuts_s.lhs[unique_indices, :]'),
lb = cuts_s.lb[unique_indices],
ub = cuts_s.ub[unique_indices],
hash = cuts_s.hash[unique_indices],
)
ncuts_unique = length(unique_indices)
multipliers_curr = zeros(ncuts_unique)
multipliers_best = zeros(ncuts_unique)
pool_cut_age = zeros(ncuts_unique)
for i in unique_indices
push!(cut_basis_id, basis_id)
push!(cut_row, selected_rows[i])
end
else
if !isempty(unique_indices)
@timeit "Append LHS" begin
# Transpose cuts matrix for better performance
new_cuts_lhs = sparse(cuts_s.lhs[unique_indices, :]')
# Resize existing matrix in-place to accommodate new columns
old_cols = pool.lhs.n
new_cols = new_cuts_lhs.n
total_cols = old_cols + new_cols
resize!(pool.lhs.colptr, total_cols + 1)
# Append new column pointers with offset
old_nnz = nnz(pool.lhs)
for i in 1:new_cols
pool.lhs.colptr[old_cols + i + 1] = old_nnz + new_cuts_lhs.colptr[i + 1]
end
# Expand rowval and nzval arrays
append!(pool.lhs.rowval, new_cuts_lhs.rowval)
append!(pool.lhs.nzval, new_cuts_lhs.nzval)
# Update matrix dimensions
pool.lhs = SparseMatrixCSC(pool.lhs.m, total_cols, pool.lhs.colptr, pool.lhs.rowval, pool.lhs.nzval)
end
@timeit "Append others" begin
ncuts_unique = length(unique_indices)
append!(pool.lb, cuts_s.lb[unique_indices])
append!(pool.ub, cuts_s.ub[unique_indices])
append!(pool.hash, cuts_s.hash[unique_indices])
append!(multipliers_curr, zeros(ncuts_unique))
append!(multipliers_best, zeros(ncuts_unique))
append!(pool_cut_age, zeros(ncuts_unique))
for i in unique_indices
push!(cut_basis_id, basis_id)
push!(cut_row, selected_rows[i])
end
end
end
end
end
end
@timeit "Prune the pool" begin
pool_size_mb = Base.summarysize(pool) / 1024^2
while pool_size_mb >= max_pool_size_mb
@timeit "Identify cuts to remove" begin
scores = collect(zip(multipliers_best .> 1e-6, -pool_cut_age))
σ = sortperm(scores, rev=true)
pool_size = length(pool.ub)
n_keep = Int(floor(pool_size * 0.8))
idx_keep = σ[1:n_keep]
idx_remove = σ[(n_keep+1):end]
positive_multipliers_dropped = sum(multipliers_best[idx_remove] .> 1e-6)
@info "Dropping $(length(idx_remove)) cuts ($(positive_multipliers_dropped) with multipliers_best)"
end
@timeit "Update cut hashes" begin
for idx in idx_remove
cut_hash = pool.hash[idx]
delete!(pool_cut_hashes, cut_hash)
end
end
@timeit "Update cut pool" begin
pool.lhs = pool.lhs[:, idx_keep]
pool.ub = pool.ub[idx_keep]
pool.lb = pool.lb[idx_keep]
pool.hash = pool.hash[idx_keep]
multipliers_curr = multipliers_curr[idx_keep]
multipliers_best = multipliers_best[idx_keep]
pool_cut_age = pool_cut_age[idx_keep]
cut_basis_id = cut_basis_id[idx_keep]
cut_row = cut_row[idx_keep]
end
@timeit "Update known bases" begin
used_basis_ids = Set(cut_basis_id)
for basis_id in collect(keys(basis_id_to_vars))
if basis_id used_basis_ids
basis_vars = basis_id_to_vars[basis_id]
delete!(basis_vars_to_id, basis_vars)
delete!(basis_id_to_vars, basis_id)
delete!(basis_id_to_sizes, basis_id)
end
end
end
pool_size_mb = Base.summarysize(pool) / 1024^2
end
end
end
if mod(round - 1, interval_large_lp) == 0 || is_last_iteration
log_should_print = true
@timeit "Update multipliers (large LP)" begin
selected_idx = []
selected_contrs = []
while true
@timeit "Optimize LP (extended)" begin
set_silent(model_s)
set_objective_function(model_s, orig_obj_s)
optimize!(model_s)
status = termination_status(model_s)
if status != MOI.OPTIMAL
error("Non-optimal termination status: $status")
end
obj_curr = objective_value(model_s)
sol_frac = get_x(model_s)
end
@timeit "Computing cut violations" begin
violations = pool.lb - pool.lhs' * sol_frac
end
@timeit "Sorting cut violations" begin
σ = sortperm(violations, rev=true)
end
if violations[σ[1]] <= 1e-6
break
end
@timeit "Add constraints to the model" begin
ncuts = min(max(1, sum(violations .> 1e-6) ÷ 10), length(σ))
for i in 1:ncuts
if violations[σ[i]] <= 1e-6
break
end
cut_lhs = pool.lhs[:, σ[i]]
cut_lhs_value = 0.0
cut_lb = pool.lb[σ[i]]
cut_expr = AffExpr()
for offset in 1:nnz(cut_lhs)
var_idx = cut_lhs.nzind[offset]
add_to_expression!(
cut_expr,
vars_s[var_idx],
cut_lhs.nzval[offset],
)
cut_lhs_value += sol_frac[var_idx] * cut_lhs.nzval[offset]
end
cut_constr = @constraint(model_s, cut_expr >= cut_lb)
push!(selected_idx, σ[i])
push!(selected_contrs, cut_constr)
end
end
end
@timeit "Find dual values for all selected cuts" begin
multipliers_curr .= 0
pool_cut_age .+= 1
for (offset, idx) in enumerate(selected_idx)
multipliers_curr[idx] = -shadow_price(selected_contrs[offset])
if multipliers_curr[idx] > 1e-5
pool_cut_age[idx] = 0
end
end
end
@timeit "Update best" begin
if obj_curr > obj_best
log_prefix = '*'
obj_best = obj_curr
copy!(multipliers_best, multipliers_curr)
end
gapcl_curr = gapcl(obj_curr)
gapcl_best = gapcl(obj_best)
end
@timeit "Delete all cut constraints" begin
delete.(model_s, selected_contrs)
end
end
else
@timeit "Update multipliers (subgradient)" begin
subgrad = (pool.lb' - (sol_frac' * pool.lhs))'
subgrad_norm_sq = norm(subgrad)^2
if subgrad_norm_sq < 1e-10
λ = 0
else
λ = μ * (obj_mip - obj_curr) / subgrad_norm_sq
end
multipliers_curr = max.(0, multipliers_curr .+ λ * subgrad)
end
end
if round == 1
@printf(
" %8s %8s %10s %9s %9s %9s %9s %4s %8s %8s %8s\n",
"time",
"round",
"obj",
"cl_curr",
"cl_best",
"pool_cuts",
"pool_mb",
"bktk",
"Δ",
"μ",
"λ",
)
end
if time() - last_print_time > interval_print_sec
log_should_print = true
end
if is_last_iteration
log_should_print = true
end
if log_should_print
last_print_time = time()
@printf(
"%c %8.2f %8d %10.3e %9.2e %9.2e %9d %9.2f %4d %8.2e %8.2e %8.2e\n",
log_prefix,
time() - initial_time,
round,
obj_curr,
gapcl_curr,
gapcl_best,
length(pool.ub),
pool_size_mb,
count_backtrack,
Δ,
μ,
λ,
)
end
push!(gapcl_best_history, gapcl_best)
if length(gapcl_best_history) >= gapcl_best_patience
if gapcl_best <= gapcl_best_history[1]
@info "No gap closure improvement. Stopping."
break
end
end
if is_last_iteration
break
end
end
@info "Best gap closure: $(gapcl_best)"
@timeit "Keep only active cuts" begin
positive_idx = findall(multipliers_best .> 1e-6)
if length(positive_idx) == 0 && gapcl_best > 0
error("gap closure with zero cuts")
end
@timeit "Clean up cut pool" begin
pool.lhs = pool.lhs[:, positive_idx]
pool.lb = pool.lb[positive_idx]
pool.ub = pool.ub[positive_idx]
pool.hash = pool.hash[positive_idx]
multipliers_best = multipliers_best[positive_idx]
multipliers_curr = multipliers_curr[positive_idx]
cut_basis_id = cut_basis_id[positive_idx]
cut_row = cut_row[positive_idx]
end
@timeit "Clean up known bases" begin
used_basis_ids = Set(cut_basis_id)
for basis_id in collect(keys(basis_id_to_vars))
if basis_id used_basis_ids
basis_vars = basis_id_to_vars[basis_id]
delete!(basis_vars_to_id, basis_vars)
delete!(basis_id_to_vars, basis_id)
delete!(basis_id_to_sizes, basis_id)
end
end
end
@info "Keeping $(length(positive_idx)) cuts from $(length(used_basis_ids)) unique bases"
end
to = TimerOutputs.get_defaulttimer()
stats_time = TimerOutputs.tottime(to) / 1e9
print_timer()
if length(positive_idx) > 0
@timeit "Write cuts to H5" begin
if !isempty(cut_basis_id)
@timeit "Convert IDs to offsets" begin
id_to_offset = Dict{Int, Int}()
gmi_basis_vars = []
gmi_basis_sizes = []
for (offset, basis_id) in enumerate(sort(collect(keys(basis_id_to_vars))))
id_to_offset[basis_id] = offset
push!(gmi_basis_vars, basis_id_to_vars[basis_id])
push!(gmi_basis_sizes, basis_id_to_sizes[basis_id])
end
gmi_cut_basis = [id_to_offset[basis_id] for basis_id in cut_basis_id]
gmi_cut_row = cut_row
end
@timeit "Convert to matrices" begin
gmi_basis_vars_matrix = hcat(gmi_basis_vars...)'
gmi_basis_sizes_matrix = hcat(gmi_basis_sizes...)'
end
@timeit "Write H5" begin
h5 = H5File(h5_filename, "r+")
h5.put_array("gmi_basis_vars", gmi_basis_vars_matrix)
h5.put_array("gmi_basis_sizes", gmi_basis_sizes_matrix)
h5.put_array("gmi_cut_basis", gmi_cut_basis)
h5.put_array("gmi_cut_row", gmi_cut_row)
h5.file.close()
end
end
end
if verify_cuts
@timeit "Verify cuts in current model" begin
@info "Verifying cuts in current standard form model using pool..."
if !isempty(cut_basis_id)
@info "Adding $(length(pool.lb)) cuts from pool to current model"
pool.lhs = sparse(pool.lhs')
constrs = build_constraints(model_s, pool)
add_constraint.(model_s, constrs)
set_objective_function(model_s, orig_obj_s)
optimize!(model_s)
status = termination_status(model_s)
if status != MOI.OPTIMAL
error("Non-optimal termination status: $status")
end
obj_verify_s = objective_value(model_s)
gapcl_verify_s = gapcl(obj_verify_s)
@show gapcl_verify_s
@show gapcl_best
if abs(gapcl_best - gapcl_verify_s) > 0.01
error("Gap closures differ: $(gapcl_best)$(gapcl_verify_s)")
end
@info "Current model gap closure matches: $(gapcl_best)$(gapcl_verify_s)"
else
@warn "No cuts in pool to verify"
end
end
@timeit "Verify stored cuts" begin
@info "Verifying stored cuts..."
model_verify = read_from_file(mps_filename)
set_optimizer(model_verify, optimizer)
verification_cuts = _dualgmi_generate([h5_filename], model_verify; test_h5=h5_filename)
constrs = build_constraints(model_verify, verification_cuts)
add_constraint.(model_verify, constrs)
relax_integrality(model_verify)
optimize!(model_verify)
status = termination_status(model_verify)
if status != MOI.OPTIMAL
error("Non-optimal termination status: $status")
end
obj_verify = objective_value(model_verify)
gapcl_verify = gapcl(obj_verify)
@show gapcl_verify
@show gapcl_best
if abs(gapcl_best - gapcl_verify) > 0.01
error("Gap closures differ: $(gapcl_best)$(gapcl_verify)")
end
@info "Gap closure matches gapcl_best: $(gapcl_best)$(gapcl_verify)"
end
end
end
return OrderedDict(
"gapcl_best" => gapcl_best,
"gapcl_curr" => gapcl_curr,
"instance" => mps_filename,
"obj_final" => obj_curr,
"obj_initial" => obj_initial,
"obj_mip" => obj_mip,
"pool_size_mb" => pool_size_mb,
"pool_total" => length(pool.lb),
"time" => stats_time,
)
end
function add_constraint_set_dual_v2(model::JuMP.Model, cs::ConstraintSet)
vars = all_variables(model)
nrows, ncols = size(cs.lhs)
@timeit "Transpose LHS" begin
lhs_t = spzeros(ncols, nrows)
ftranspose!(lhs_t, cs.lhs, x -> x)
lhs_t_rows = rowvals(lhs_t)
lhs_t_vals = nonzeros(lhs_t)
end
constrs = []
gmi_exps = []
for i = 1:nrows
c = nothing
gmi_exp = nothing
gmi_exp2 = nothing
@timeit "Build expr" begin
expr = AffExpr()
for k in nzrange(lhs_t, i)
add_to_expression!(expr, lhs_t_vals[k], vars[lhs_t_rows[k]])
end
end
@timeit "Add constraints" begin
if isinf(cs.ub[i])
c = @constraint(model, cs.lb[i] <= expr)
gmi_exp = cs.lb[i] - expr
elseif isinf(cs.lb[i])
c = @constraint(model, expr <= cs.ub[i])
gmi_exp = expr - cs.ub[i]
else
c = @constraint(model, cs.lb[i] <= expr <= cs.ub[i])
gmi_exp = cs.lb[i] - expr
gmi_exp2 = expr - cs.ub[i]
end
end
@timeit "Update structs" begin
push!(constrs, c)
push!(gmi_exps, gmi_exp)
if !isnothing(gmi_exp2)
push!(gmi_exps, gmi_exp2)
end
end
end
return constrs, gmi_exps
end
function _dualgmi_features(h5_filename, extractor)
h5 = H5File(h5_filename, "r")
try
return extractor.get_instance_features(h5)
finally
h5.close()
end
end
function _dualgmi_compress_h5(h5_filename)
vars_to_basis_offset = Dict()
basis_vars = []
basis_sizes = []
cut_basis::Array{Int} = []
cut_row::Array{Int} = []
h5 = H5File(h5_filename, "r")
orig_cut_basis_vars = h5.get_array("cuts_basis_vars")
orig_cut_basis_sizes = h5.get_array("cuts_basis_sizes")
orig_cut_rows = h5.get_array("cuts_rows")
h5.close()
if orig_cut_basis_vars === nothing
@warn "orig_cut_basis_vars is null; skipping file"
return
end
ncuts, _ = size(orig_cut_basis_vars)
if ncuts == 0
return
end
for i in 1:ncuts
vars = orig_cut_basis_vars[i, :]
sizes = orig_cut_basis_sizes[i, :]
row = orig_cut_rows[i]
if vars keys(vars_to_basis_offset)
offset = size(basis_vars)[1] + 1
vars_to_basis_offset[vars] = offset
push!(basis_vars, vars)
push!(basis_sizes, sizes)
end
offset = vars_to_basis_offset[vars]
push!(cut_basis, offset)
push!(cut_row, row)
end
basis_vars = hcat(basis_vars...)'
basis_sizes = hcat(basis_sizes...)'
_, n_vars = size(basis_vars)
if n_vars == 0
@warn "n_vars is zero; skipping file"
return
end
h5 = H5File(h5_filename, "r+")
h5.put_array("gmi_basis_vars", basis_vars)
h5.put_array("gmi_basis_sizes", basis_sizes)
h5.put_array("gmi_cut_basis", cut_basis)
h5.put_array("gmi_cut_row", cut_row)
h5.file.close()
end
function _dualgmi_generate(train_h5, model; test_h5=nothing)
@timeit "Read problem data" begin
data = ProblemData(model)
end
@timeit "Convert to standard form" begin
data_s, transforms = convert_to_standard_form(data)
end
@timeit "Read optimal solution from test H5" begin
sol_opt_dict = nothing
sol_opt = nothing
sol_opt_s = nothing
if test_h5 !== nothing
try
h5 = H5File(test_h5, "r")
var_names = h5.get_array("static_var_names")
var_values = h5.get_array("mip_var_values")
h5.close()
if var_names !== nothing && var_values !== nothing
sol_opt_dict = Dict(zip(var_names, convert(Array{Float64}, var_values)))
sol_opt = [sol_opt_dict[n] for n in data.var_names]
sol_opt_s = forward(transforms, sol_opt)
@info "Loaded optimal solution for cut validation"
end
catch e
@warn "Could not read optimal solution from test H5 file: $e"
end
end
end
@timeit "Collect cuts from H5 files" begin
basis_vars_to_basis_offset = Dict()
combined_basis_sizes = nothing
combined_basis_sizes_list = Any[]
combined_basis_vars = nothing
combined_basis_vars_list = Any[]
combined_cut_rows = Any[]
for h5_filename in train_h5
@timeit "get_array (new)" begin
h5 = H5File(h5_filename, "r")
gmi_basis_vars = h5.get_array("gmi_basis_vars")
if gmi_basis_vars === nothing
@warn "$(h5_filename) does not contain gmi_basis_vars; skipping"
continue
end
gmi_basis_sizes = h5.get_array("gmi_basis_sizes")
gmi_cut_basis = h5.get_array("gmi_cut_basis")
gmi_cut_row = h5.get_array("gmi_cut_row")
h5.close()
end
@timeit "combine basis" begin
nbasis, _ = size(gmi_basis_vars)
local_to_combined_offset = Dict()
for local_offset in 1:nbasis
vars = gmi_basis_vars[local_offset, :]
sizes = gmi_basis_sizes[local_offset, :]
if vars keys(basis_vars_to_basis_offset)
combined_offset = length(combined_basis_vars_list) + 1
basis_vars_to_basis_offset[vars] = combined_offset
push!(combined_basis_vars_list, vars)
push!(combined_basis_sizes_list, sizes)
push!(combined_cut_rows, Set{Int}())
end
combined_offset = basis_vars_to_basis_offset[vars]
local_to_combined_offset[local_offset] = combined_offset
end
end
@timeit "combine rows" begin
ncuts = length(gmi_cut_row)
for i in 1:ncuts
local_offset = gmi_cut_basis[i]
combined_offset = local_to_combined_offset[local_offset]
row = gmi_cut_row[i]
push!(combined_cut_rows[combined_offset], row)
end
end
@timeit "convert lists to matrices" begin
combined_basis_vars = hcat(combined_basis_vars_list...)'
combined_basis_sizes = hcat(combined_basis_sizes_list...)'
end
end
end
@timeit "Compute tableaus and cuts" begin
all_cuts = nothing
nbasis = length(combined_cut_rows)
for offset in 1:nbasis
rows = combined_cut_rows[offset]
try
vbb, vnn, cbb, cnn = combined_basis_sizes[offset, :]
current_basis = Basis(;
var_basic = combined_basis_vars[offset, 1:vbb],
var_nonbasic = combined_basis_vars[offset, vbb+1:vbb+vnn],
constr_basic = combined_basis_vars[offset, vbb+vnn+1:vbb+vnn+cbb],
constr_nonbasic = combined_basis_vars[offset, vbb+vnn+cbb+1:vbb+vnn+cbb+cnn],
)
tableau = compute_tableau(data_s, current_basis; rows=collect(rows))
cuts_s = compute_gmi(data_s, tableau)
cuts = backwards(transforms, cuts_s)
if sol_opt_s !== nothing && sol_opt !== nothing
assert_does_not_cut_off(cuts_s, sol_opt_s)
assert_does_not_cut_off(cuts, sol_opt)
end
if all_cuts === nothing
all_cuts = cuts
else
all_cuts.lhs = [all_cuts.lhs; cuts.lhs]
all_cuts.lb = [all_cuts.lb; cuts.lb]
all_cuts.ub = [all_cuts.ub; cuts.ub]
end
catch e
if isa(e, AssertionError)
@warn "Numerical error detected. Skipping cuts from current tableau."
continue
else
rethrow(e)
end
end
end
end
return all_cuts
end
function _dualgmi_set_callback(model, all_cuts)
function cut_callback(cb_data)
if all_cuts !== nothing
constrs = build_constraints(model, all_cuts)
@info "Enforcing $(length(constrs)) cuts..."
for c in constrs
MOI.submit(model, MOI.UserCut(cb_data), c)
end
all_cuts = nothing
end
end
set_attribute(model, MOI.UserCutCallback(), cut_callback)
end
function KnnDualGmiComponent_fit(data::_KnnDualGmiData, train_h5)
x = hcat([_dualgmi_features(filename, data.extractor) for filename in train_h5]...)'
model = pyimport("sklearn.neighbors").NearestNeighbors(n_neighbors = length(train_h5))
model.fit(x)
data.model = model
data.train_h5 = train_h5
end
function KnnDualGmiComponent_before_mip(data::_KnnDualGmiData, test_h5, model, _)
reset_timer!()
@timeit "Extract features" begin
x = _dualgmi_features(test_h5, data.extractor)
x = reshape(x, 1, length(x))
end
@timeit "Find neighbors" begin
neigh_dist, neigh_ind = data.model.kneighbors(x, return_distance = true)
neigh_ind = neigh_ind .+ 1
N = length(neigh_dist)
k = min(N, data.k)
if data.strategy == "near"
selected = collect(1:k)
elseif data.strategy == "far"
selected = collect((N - k + 1) : N)
elseif data.strategy == "rand"
selected = shuffle(collect(1:N))[1:k]
else
error("unknown strategy: $(data.strategy)")
end
@info "Dual GMI: Selected neighbors ($(data.strategy)):"
neigh_dist = neigh_dist[selected]
neigh_ind = neigh_ind[selected]
for i in 1:k
h5_filename = data.train_h5[neigh_ind[i]]
dist = neigh_dist[i]
@info " $(h5_filename) dist=$(dist)"
end
end
@info "Dual GMI: Generating cuts..."
@timeit "Generate cuts" begin
time_generate = @elapsed begin
cuts = _dualgmi_generate(data.train_h5[neigh_ind], model)
end
@info "Dual GMI: Generated $(length(cuts.lb)) unique cuts in $(time_generate) seconds"
end
@timeit "Set callback" begin
_dualgmi_set_callback(model, cuts)
end
print_timer()
stats = Dict()
stats["KnnDualGmi: k"] = k
stats["KnnDualGmi: strategy"] = data.strategy
stats["KnnDualGmi: cuts"] = length(cuts.lb)
stats["KnnDualGmi: time generate"] = time_generate
return stats
end
function __init_gmi_dual__()
@pydef mutable struct KnnDualGmiComponentPy
function __init__(self; extractor, k = 3, strategy = "near")
self.data = _KnnDualGmiData(; extractor, k, strategy)
end
function fit(self, train_h5)
KnnDualGmiComponent_fit(self.data, train_h5)
end
function before_mip(self, test_h5, model, stats)
return @time KnnDualGmiComponent_before_mip(self.data, test_h5, model.inner, stats)
end
end
copy!(KnnDualGmiComponent, KnnDualGmiComponentPy)
@pydef mutable struct ExpertDualGmiComponentPy
function __init__(self)
self.inner = KnnDualGmiComponentPy(
extractor=H5FieldsExtractor(instance_fields=["static_var_obj_coeffs"]),
k=1,
)
end
function fit(self, train_h5)
end
function before_mip(self, test_h5, model, stats)
self.inner.fit([test_h5])
return self.inner.before_mip(test_h5, model, stats)
end
end
copy!(ExpertDualGmiComponent, ExpertDualGmiComponentPy)
end
export collect_gmi_dual, expert_gmi_dual, ExpertDualGmiComponent, KnnDualGmiComponent, collect_gmi_FisSal2011