Move components into submodule

This commit is contained in:
2020-02-05 12:50:36 -06:00
parent 52b476f0a3
commit 85b804610f
14 changed files with 25 additions and 23 deletions

View File

View File

@@ -0,0 +1,66 @@
import Base.Threads.@threads
using TinyBnB, CPLEXW, Printf
instance_name = ARGS[1]
output_filename = ARGS[2]
node_limit = parse(Int, ARGS[3])
mip = open_mip(instance_name)
n_vars = CPXgetnumcols(mip.cplex_env[1], mip.cplex_lp[1])
pseudocost_count_up = [0 for i in 1:n_vars]
pseudocost_count_down = [0 for i in 1:n_vars]
pseudocost_sum_up = [0. for i in 1:n_vars]
pseudocost_sum_down = [0. for i in 1:n_vars]
function full_strong_branching_track(node::Node, progress::Progress)::TinyBnB.Variable
N = length(node.fractional_variables)
scores = Array{Float64}(undef, N)
rates_up = Array{Float64}(undef, N)
rates_down = Array{Float64}(undef, N)
@threads for v in 1:N
fix_vars!(node.mip, node.branch_variables, node.branch_values)
obj_up, obj_down = TinyBnB.probe(node.mip, node.fractional_variables[v])
unfix_vars!(node.mip, node.branch_variables)
delta_up = obj_up - node.obj
delta_down = obj_down - node.obj
frac_up = ceil(node.fractional_values[v]) - node.fractional_values[v]
frac_down = node.fractional_values[v] - floor(node.fractional_values[v])
rates_up[v] = delta_up / frac_up
rates_down[v] = delta_down / frac_down
scores[v] = delta_up * delta_down
end
max_score, max_offset = findmax(scores)
selected_var = node.fractional_variables[max_offset]
if abs(rates_up[max_offset]) < 1e6
pseudocost_count_up[selected_var.index] += 1
pseudocost_sum_up[selected_var.index] += rates_up[max_offset]
end
if abs(rates_down[max_offset]) < 1e6
pseudocost_count_down[selected_var.index] += 1
pseudocost_sum_down[selected_var.index] += rates_down[max_offset]
end
return selected_var
end
branch_and_bound(mip,
node_limit = node_limit,
branch_rule = full_strong_branching_track,
node_rule = best_bound,
print_interval = 100)
priority = [(pseudocost_count_up[v] == 0 || pseudocost_count_down[v] == 0) ? 0 :
(pseudocost_sum_up[v] / pseudocost_count_up[v]) *
(pseudocost_sum_down[v] / pseudocost_count_down[v])
for v in 1:n_vars];
open(output_filename, "w") do file
for var in mip.binary_variables
write(file, @sprintf("%s,%.0f\n", name(mip, var), priority[var.index]))
end
end

View File

@@ -0,0 +1,140 @@
# MIPLearn, an extensible framework for Learning-Enhanced Mixed-Integer Optimization
# Copyright (C) 2019-2020 Argonne National Laboratory. All rights reserved.
# Written by Alinson S. Xavier <axavier@anl.gov>
from .component import Component
from ..extractors import Extractor
from abc import ABC, abstractmethod
from sklearn.neighbors import KNeighborsRegressor
import numpy as np
from tqdm.auto import tqdm
from joblib import Parallel, delayed
import multiprocessing
class BranchPriorityComponent(Component):
def __init__(self,
node_limit=1_000,
):
self.pending_instances = []
self.x_train = {}
self.y_train = {}
self.predictors = {}
self.node_limit = node_limit
def before_solve(self, solver, instance, model):
assert solver.is_persistent, "BranchPriorityComponent requires a persistent solver"
from gurobipy import GRB
var_split = Extractor.split_variables(instance, model)
for category in var_split.keys():
if category not in self.predictors.keys():
continue
var_index_pairs = var_split[category]
for (i, (var, index)) in enumerate(var_index_pairs):
x = self._build_x(instance, var, index)
y = self.predictors[category].predict([x])[0][0]
gvar = solver.internal_solver._pyomo_var_to_solver_var_map[var[index]]
gvar.setAttr(GRB.Attr.BranchPriority, int(round(y)))
def after_solve(self, solver, instance, model):
self.pending_instances += [instance]
def fit(self, solver, n_jobs=1):
def _process(instance):
# Create LP file
import subprocess, tempfile, os, sys
lp_file = tempfile.NamedTemporaryFile(suffix=".lp")
priority_file = tempfile.NamedTemporaryFile()
model = instance.to_model()
model.write(lp_file.name)
# Run Julia script
src_dirname = os.path.dirname(os.path.realpath(__file__))
priority_file = tempfile.NamedTemporaryFile(mode="r")
subprocess.run(["julia",
"%s/branching.jl" % src_dirname,
lp_file.name,
priority_file.name,
str(self.node_limit),
],
check=True,
)
# Parse output
tokens = [line.strip().split(",") for line in priority_file.readlines()]
lp_varname_to_priority = {t[0]: int(t[1]) for t in tokens}
# Map priorities back to Pyomo variables
pyomo_var_to_priority = {}
from pyomo.core import Var
from pyomo.core.base.label import TextLabeler
labeler = TextLabeler()
symbol_map = list(model.solutions.symbol_map.values())[0]
# Build x_train and y_train
comp = BranchPriorityComponent()
for var in model.component_objects(Var):
for index in var:
category = instance.get_variable_category(var, index)
if category is None:
continue
lp_varname = symbol_map.getSymbol(var[index], labeler)
var_priority = lp_varname_to_priority[lp_varname]
x = self._build_x(instance, var, index)
y = np.array([var_priority])
if category not in comp.x_train.keys():
comp.x_train[category] = np.array([x])
comp.y_train[category] = np.array([y])
else:
comp.x_train[category] = np.vstack([comp.x_train[category], x])
comp.y_train[category] = np.vstack([comp.y_train[category], y])
return comp
subcomponents = Parallel(n_jobs=n_jobs)(
delayed(_process)(instance)
for instance in tqdm(self.pending_instances, desc="Branch priority")
)
self.merge(subcomponents)
self.pending_instances.clear()
# Retrain ML predictors
for category in self.x_train.keys():
x_train = self.x_train[category]
y_train = self.y_train[category]
self.predictors[category] = KNeighborsRegressor(n_neighbors=1)
self.predictors[category].fit(x_train, y_train)
def _build_x(self, instance, var, index):
instance_features = instance.get_instance_features()
var_features = instance.get_variable_features(var, index)
return np.hstack([instance_features, var_features])
def merge(self, other_components):
keys = set(self.x_train.keys())
for comp in other_components:
self.pending_instances += comp.pending_instances
keys = keys.union(set(comp.x_train.keys()))
# Merge x_train and y_train
for key in keys:
x_train_submatrices = [comp.x_train[key]
for comp in other_components
if key in comp.x_train.keys()]
y_train_submatrices = [comp.y_train[key]
for comp in other_components
if key in comp.y_train.keys()]
if key in self.x_train.keys():
x_train_submatrices += [self.x_train[key]]
y_train_submatrices += [self.y_train[key]]
self.x_train[key] = np.vstack(x_train_submatrices)
self.y_train[key] = np.vstack(y_train_submatrices)
# Merge trained ML predictors
for comp in other_components:
for key in comp.predictors.keys():
if key not in self.predictors.keys():
self.predictors[key] = comp.predictors[key]

View File

@@ -0,0 +1,27 @@
# MIPLearn, an extensible framework for Learning-Enhanced Mixed-Integer Optimization
# Copyright (C) 2019-2020 Argonne National Laboratory. All rights reserved.
# Written by Alinson S. Xavier <axavier@anl.gov>
from abc import ABC, abstractmethod
class Component(ABC):
"""
A Component is an object which adds functionality to a LearningSolver.
"""
@abstractmethod
def before_solve(self, solver, instance, model):
pass
@abstractmethod
def after_solve(self, solver, instance, model):
pass
@abstractmethod
def merge(self, other):
pass
@abstractmethod
def fit(self, solver):
pass

View File

View File

@@ -0,0 +1,49 @@
# MIPLearn, an extensible framework for Learning-Enhanced Mixed-Integer Optimization
# Copyright (C) 2019-2020 Argonne National Laboratory. All rights reserved.
# Written by Alinson S. Xavier <axavier@anl.gov>
from miplearn import BranchPriorityComponent, LearningSolver
from miplearn.problems.knapsack import KnapsackInstance
import numpy as np
import tempfile
def _get_instances():
return [
KnapsackInstance(
weights=[23., 26., 20., 18.],
prices=[505., 352., 458., 220.],
capacity=67.,
),
] * 2
def test_branching():
instances = _get_instances()
component = BranchPriorityComponent()
for instance in instances:
component.after_solve(None, instance, None)
component.fit(None)
for key in ["default"]:
assert key in component.x_train.keys()
assert key in component.y_train.keys()
assert component.x_train[key].shape == (8, 4)
assert component.y_train[key].shape == (8, 1)
def test_branch_priority_save_load():
state_file = tempfile.NamedTemporaryFile(mode="r")
solver = LearningSolver(components={"branch-priority": BranchPriorityComponent()})
solver.parallel_solve(_get_instances(), n_jobs=2)
solver.fit()
comp = solver.components["branch-priority"]
assert comp.x_train["default"].shape == (8, 4)
assert comp.y_train["default"].shape == (8, 1)
assert "default" in comp.predictors.keys()
solver.save_state(state_file.name)
solver = LearningSolver(components={"branch-priority": BranchPriorityComponent()})
solver.load_state(state_file.name)
comp = solver.components["branch-priority"]
assert comp.x_train["default"].shape == (8, 4)
assert comp.y_train["default"].shape == (8, 1)
assert "default" in comp.predictors.keys()

View File

@@ -0,0 +1,37 @@
# MIPLearn, an extensible framework for Learning-Enhanced Mixed-Integer Optimization
# Copyright (C) 2019-2020 Argonne National Laboratory. All rights reserved.
# Written by Alinson S. Xavier <axavier@anl.gov>
from miplearn import WarmStartComponent, LearningSolver
from miplearn.problems.knapsack import KnapsackInstance
import numpy as np
import tempfile
def _get_instances():
return [
KnapsackInstance(
weights=[23., 26., 20., 18.],
prices=[505., 352., 458., 220.],
capacity=67.,
),
] * 2
def test_warm_start_save_load():
state_file = tempfile.NamedTemporaryFile(mode="r")
solver = LearningSolver(components={"warm-start": WarmStartComponent()})
solver.parallel_solve(_get_instances(), n_jobs=2)
solver.fit()
comp = solver.components["warm-start"]
assert comp.x_train["default"].shape == (8, 6)
assert comp.y_train["default"].shape == (8, 2)
assert "default" in comp.predictors.keys()
solver.save_state(state_file.name)
solver = LearningSolver(components={"warm-start": WarmStartComponent()})
solver.load_state(state_file.name)
comp = solver.components["warm-start"]
assert comp.x_train["default"].shape == (8, 6)
assert comp.y_train["default"].shape == (8, 2)
assert "default" in comp.predictors.keys()

View File

@@ -0,0 +1,88 @@
# MIPLearn, an extensible framework for Learning-Enhanced Mixed-Integer Optimization
# Copyright (C) 2019-2020 Argonne National Laboratory. All rights reserved.
# Written by Alinson S. Xavier <axavier@anl.gov>
from miplearn import KnnWarmStartPredictor
from sklearn.metrics import accuracy_score, precision_score
import numpy as np
def test_knn_with_consensus():
x_train = np.array([
[0.0, 0.0],
[0.1, 0.0],
[0.0, 0.1],
[1.0, 1.0],
])
y_train = np.array([
[0., 1.],
[0., 1.],
[0., 1.],
[1., 0.],
])
ws = KnnWarmStartPredictor(k=3, thr_clip=[0.75, 0.75])
ws.fit(x_train, y_train)
x_test = np.array([[0.0, 0.0]])
y_test = np.array([[0, 1]])
assert (ws.predict(x_test) == y_test).all()
def test_knn_without_consensus():
x_train = np.array([
[0.0, 0.0],
[0.1, 0.1],
[0.9, 0.9],
[1.0, 1.0],
])
y_train = np.array([
[0., 1.],
[0., 1.],
[1., 0.],
[1., 0.],
])
ws = KnnWarmStartPredictor(k=4, thr_clip=[0.75, 0.75])
ws.fit(x_train, y_train)
x_test = np.array([[0.5, 0.5]])
y_test = np.array([[0, 0]])
assert (ws.predict(x_test) == y_test).all()
def test_knn_always_true():
x_train = np.array([
[0.0, 0.0],
[0.1, 0.1],
[0.9, 0.9],
[1.0, 1.0],
])
y_train = np.array([
[1., 0.],
[1., 0.],
[1., 0.],
[1., 0.],
])
ws = KnnWarmStartPredictor(k=4, thr_clip=[0.75, 0.75])
ws.fit(x_train, y_train)
x_test = np.array([[0.5, 0.5]])
y_test = np.array([[1, 0]])
assert (ws.predict(x_test) == y_test).all()
def test_knn_always_false():
x_train = np.array([
[0.0, 0.0],
[0.1, 0.1],
[0.9, 0.9],
[1.0, 1.0],
])
y_train = np.array([
[0., 1.],
[0., 1.],
[0., 1.],
[0., 1.],
])
ws = KnnWarmStartPredictor(k=4, thr_clip=[0.75, 0.75])
ws.fit(x_train, y_train)
x_test = np.array([[0.5, 0.5]])
y_test = np.array([[0, 1]])
assert (ws.predict(x_test) == y_test).all()

View File

@@ -0,0 +1,64 @@
# MIPLearn, an extensible framework for Learning-Enhanced Mixed-Integer Optimization
# Copyright (C) 2019-2020 Argonne National Laboratory. All rights reserved.
# Written by Alinson S. Xavier <axavier@anl.gov>
from miplearn import LogisticWarmStartPredictor
from sklearn.metrics import accuracy_score, precision_score
import numpy as np
def _generate_dataset(ground_truth, n_samples=10_000):
x_train = np.random.rand(n_samples,5)
x_test = np.random.rand(n_samples,5)
y_train = ground_truth(x_train)
y_test = ground_truth(x_test)
return x_train, y_train, x_test, y_test
def _is_sum_greater_than_two(x):
y = (np.sum(x, axis=1) > 2.0).astype(int)
return np.vstack([y, 1 - y]).transpose()
def _always_zero(x):
y = np.zeros((1, x.shape[0]))
return np.vstack([y, 1 - y]).transpose()
def _random_values(x):
y = np.random.randint(2, size=x.shape[0])
return np.vstack([y, 1 - y]).transpose()
def test_logistic_ws_with_balanced_labels():
x_train, y_train, x_test, y_test = _generate_dataset(_is_sum_greater_than_two)
ws = LogisticWarmStartPredictor()
ws.fit(x_train, y_train)
y_pred = ws.predict(x_test)
assert accuracy_score(y_test[:,0], y_pred[:,0]) > 0.99
assert accuracy_score(y_test[:,1], y_pred[:,1]) > 0.99
def test_logistic_ws_with_unbalanced_labels():
x_train, y_train, x_test, y_test = _generate_dataset(_always_zero)
ws = LogisticWarmStartPredictor()
ws.fit(x_train, y_train)
y_pred = ws.predict(x_test)
assert accuracy_score(y_test[:,0], y_pred[:,0]) == 1.0
assert accuracy_score(y_test[:,1], y_pred[:,1]) == 1.0
def test_logistic_ws_with_unpredictable_labels():
x_train, y_train, x_test, y_test = _generate_dataset(_random_values)
ws = LogisticWarmStartPredictor()
ws.fit(x_train, y_train)
y_pred = ws.predict(x_test)
assert np.sum(y_pred) == 0
def test_logistic_ws_with_small_sample_size():
x_train, y_train, x_test, y_test = _generate_dataset(_random_values, n_samples=3)
ws = LogisticWarmStartPredictor()
ws.fit(x_train, y_train)
y_pred = ws.predict(x_test)
assert np.sum(y_pred) == 0

View File

@@ -0,0 +1,222 @@
# MIPLearn, an extensible framework for Learning-Enhanced Mixed-Integer Optimization
# Copyright (C) 2019-2020 Argonne National Laboratory. All rights reserved.
# Written by Alinson S. Xavier <axavier@anl.gov>
from .component import Component
from ..extractors import *
from abc import ABC, abstractmethod
from copy import deepcopy
import numpy as np
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsClassifier
from tqdm.auto import tqdm
import logging
logger = logging.getLogger(__name__)
class WarmStartPredictor(ABC):
def __init__(self, thr_clip=[0.50, 0.50]):
self.models = [None, None]
self.thr_clip = thr_clip
def fit(self, x_train, y_train):
assert isinstance(x_train, np.ndarray)
assert isinstance(y_train, np.ndarray)
y_train = y_train.astype(int)
assert y_train.shape[0] == x_train.shape[0]
assert y_train.shape[1] == 2
for i in [0,1]:
self.models[i] = self._fit(x_train, y_train[:, i], i)
def predict(self, x_test):
assert isinstance(x_test, np.ndarray)
y_pred = np.zeros((x_test.shape[0], 2))
for i in [0,1]:
if isinstance(self.models[i], int):
y_pred[:, i] = self.models[i]
else:
y = self.models[i].predict_proba(x_test)[:,1]
y[y < self.thr_clip[i]] = 0.
y[y > 0.] = 1.
y_pred[:, i] = y
return y_pred.astype(int)
@abstractmethod
def _fit(self, x_train, y_train, label):
pass
class LogisticWarmStartPredictor(WarmStartPredictor):
def __init__(self,
min_samples=100,
thr_fix=[0.99, 0.99],
thr_balance=[0.80, 0.80],
thr_alpha=[0.50, 0.50],
):
super().__init__()
self.min_samples = min_samples
self.thr_fix = thr_fix
self.thr_balance = thr_balance
self.thr_alpha = thr_alpha
def _fit(self, x_train, y_train, label):
y_train_avg = np.average(y_train)
# If number of samples is too small, don't predict anything.
if x_train.shape[0] < self.min_samples:
return 0
# If vast majority of observations are true, always return true.
if y_train_avg > self.thr_fix[label]:
return 1
# If dataset is not balanced enough, don't predict anything.
if y_train_avg < (1 - self.thr_balance[label]) or y_train_avg > self.thr_balance[label]:
return 0
reg = make_pipeline(StandardScaler(), LogisticRegression())
reg_score = np.mean(cross_val_score(reg, x_train, y_train, cv=5))
dummy_score = max(y_train_avg, 1 - y_train_avg)
reg_thr = 1. * self.thr_alpha[label] + dummy_score * (1 - self.thr_alpha[label])
# If cross-validation score is too low, don't predict anything.
if reg_score < reg_thr:
return 0
reg.fit(x_train, y_train.astype(int))
return reg
class KnnWarmStartPredictor(WarmStartPredictor):
def __init__(self,
k=50,
min_samples=1,
thr_clip=[0.80, 0.80],
thr_fix=[1.0, 1.0],
):
super().__init__(thr_clip=thr_clip)
self.k = k
self.thr_fix = thr_fix
self.min_samples = min_samples
def _fit(self, x_train, y_train, label):
y_train_avg = np.average(y_train)
# If number of training samples is too small, don't predict anything.
if x_train.shape[0] < self.min_samples:
logger.debug("Too few samples; return 0")
return 0
# If vast majority of observations are true, always return true.
if y_train_avg >= self.thr_fix[label]:
logger.debug("Consensus reached; return 1")
return 1
# If vast majority of observations are false, always return false.
if y_train_avg <= (1 - self.thr_fix[label]):
logger.debug("Consensus reached; return 0")
return 0
logger.debug("Training classifier...")
k = min(self.k, x_train.shape[0])
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(x_train, y_train)
return knn
class WarmStartComponent(Component):
def __init__(self,
predictor_prototype=KnnWarmStartPredictor(),
mode="exact",
):
self.mode = mode
self.x_train = {}
self.y_train = {}
self.predictors = {}
self.predictor_prototype = predictor_prototype
self.is_warm_start_available = False
def before_solve(self, solver, instance, model):
# Build x_test
x_test = CombinedExtractor([UserFeaturesExtractor(),
SolutionExtractor(),
]).extract([instance], [model])
# Update self.x_train
self.x_train = Extractor.merge([self.x_train, x_test],
vertical=True)
# Predict solutions
count_total, count_fixed = 0, 0
var_split = Extractor.split_variables(instance, model)
for category in var_split.keys():
var_index_pairs = var_split[category]
if category not in self.predictors.keys():
continue
ws = self.predictors[category].predict(x_test[category])
assert ws.shape == (len(var_index_pairs), 2)
for i in range(len(var_index_pairs)):
var, index = var_index_pairs[i]
count_total += 1
if self.mode == "heuristic":
if ws[i,0] > 0.5:
var[index].fix(0)
count_fixed += 1
if solver.is_persistent:
solver.internal_solver.update_var(var[index])
elif ws[i,1] > 0.5:
var[index].fix(1)
count_fixed += 1
if solver.is_persistent:
solver.internal_solver.update_var(var[index])
else:
var[index].value = None
if ws[i,0] > 0.5:
count_fixed += 1
var[index].value = 0
self.is_warm_start_available = True
elif ws[i,1] > 0.5:
count_fixed += 1
var[index].value = 1
self.is_warm_start_available = True
logger.info("Setting values for %d variables (out of %d)" % (count_fixed, count_total))
def after_solve(self, solver, instance, model):
y_test = SolutionExtractor().extract([instance], [model])
self.y_train = Extractor.merge([self.y_train, y_test], vertical=True)
def fit(self, solver, n_jobs=1):
for category in tqdm(self.x_train.keys(), desc="Warm start"):
x_train = self.x_train[category]
y_train = self.y_train[category]
self.predictors[category] = deepcopy(self.predictor_prototype)
self.predictors[category].fit(x_train, y_train)
def merge(self, other_components):
# Merge x_train and y_train
keys = set(self.x_train.keys())
for comp in other_components:
keys = keys.union(set(comp.x_train.keys()))
for key in keys:
x_train_submatrices = [comp.x_train[key]
for comp in other_components
if key in comp.x_train.keys()]
y_train_submatrices = [comp.y_train[key]
for comp in other_components
if key in comp.y_train.keys()]
if key in self.x_train.keys():
x_train_submatrices += [self.x_train[key]]
y_train_submatrices += [self.y_train[key]]
self.x_train[key] = np.vstack(x_train_submatrices)
self.y_train[key] = np.vstack(y_train_submatrices)
# Merge trained predictors
for comp in other_components:
for key in comp.predictors.keys():
if key not in self.predictors.keys():
self.predictors[key] = comp.predictors[key]