mirror of
https://github.com/ANL-CEEESA/MIPLearn.git
synced 2025-12-06 09:28:51 -06:00
Move components into submodule
This commit is contained in:
0
miplearn/components/__init__.py
Normal file
0
miplearn/components/__init__.py
Normal file
66
miplearn/components/branching.jl
Normal file
66
miplearn/components/branching.jl
Normal file
@@ -0,0 +1,66 @@
|
||||
import Base.Threads.@threads
|
||||
using TinyBnB, CPLEXW, Printf
|
||||
|
||||
instance_name = ARGS[1]
|
||||
output_filename = ARGS[2]
|
||||
node_limit = parse(Int, ARGS[3])
|
||||
|
||||
mip = open_mip(instance_name)
|
||||
n_vars = CPXgetnumcols(mip.cplex_env[1], mip.cplex_lp[1])
|
||||
|
||||
pseudocost_count_up = [0 for i in 1:n_vars]
|
||||
pseudocost_count_down = [0 for i in 1:n_vars]
|
||||
pseudocost_sum_up = [0. for i in 1:n_vars]
|
||||
pseudocost_sum_down = [0. for i in 1:n_vars]
|
||||
|
||||
function full_strong_branching_track(node::Node, progress::Progress)::TinyBnB.Variable
|
||||
N = length(node.fractional_variables)
|
||||
scores = Array{Float64}(undef, N)
|
||||
rates_up = Array{Float64}(undef, N)
|
||||
rates_down = Array{Float64}(undef, N)
|
||||
|
||||
@threads for v in 1:N
|
||||
fix_vars!(node.mip, node.branch_variables, node.branch_values)
|
||||
obj_up, obj_down = TinyBnB.probe(node.mip, node.fractional_variables[v])
|
||||
unfix_vars!(node.mip, node.branch_variables)
|
||||
delta_up = obj_up - node.obj
|
||||
delta_down = obj_down - node.obj
|
||||
frac_up = ceil(node.fractional_values[v]) - node.fractional_values[v]
|
||||
frac_down = node.fractional_values[v] - floor(node.fractional_values[v])
|
||||
rates_up[v] = delta_up / frac_up
|
||||
rates_down[v] = delta_down / frac_down
|
||||
scores[v] = delta_up * delta_down
|
||||
end
|
||||
|
||||
max_score, max_offset = findmax(scores)
|
||||
selected_var = node.fractional_variables[max_offset]
|
||||
|
||||
if abs(rates_up[max_offset]) < 1e6
|
||||
pseudocost_count_up[selected_var.index] += 1
|
||||
pseudocost_sum_up[selected_var.index] += rates_up[max_offset]
|
||||
end
|
||||
|
||||
if abs(rates_down[max_offset]) < 1e6
|
||||
pseudocost_count_down[selected_var.index] += 1
|
||||
pseudocost_sum_down[selected_var.index] += rates_down[max_offset]
|
||||
end
|
||||
|
||||
return selected_var
|
||||
end
|
||||
|
||||
branch_and_bound(mip,
|
||||
node_limit = node_limit,
|
||||
branch_rule = full_strong_branching_track,
|
||||
node_rule = best_bound,
|
||||
print_interval = 100)
|
||||
|
||||
priority = [(pseudocost_count_up[v] == 0 || pseudocost_count_down[v] == 0) ? 0 :
|
||||
(pseudocost_sum_up[v] / pseudocost_count_up[v]) *
|
||||
(pseudocost_sum_down[v] / pseudocost_count_down[v])
|
||||
for v in 1:n_vars];
|
||||
|
||||
open(output_filename, "w") do file
|
||||
for var in mip.binary_variables
|
||||
write(file, @sprintf("%s,%.0f\n", name(mip, var), priority[var.index]))
|
||||
end
|
||||
end
|
||||
140
miplearn/components/branching.py
Normal file
140
miplearn/components/branching.py
Normal file
@@ -0,0 +1,140 @@
|
||||
# MIPLearn, an extensible framework for Learning-Enhanced Mixed-Integer Optimization
|
||||
# Copyright (C) 2019-2020 Argonne National Laboratory. All rights reserved.
|
||||
# Written by Alinson S. Xavier <axavier@anl.gov>
|
||||
|
||||
from .component import Component
|
||||
from ..extractors import Extractor
|
||||
from abc import ABC, abstractmethod
|
||||
from sklearn.neighbors import KNeighborsRegressor
|
||||
import numpy as np
|
||||
from tqdm.auto import tqdm
|
||||
from joblib import Parallel, delayed
|
||||
import multiprocessing
|
||||
|
||||
class BranchPriorityComponent(Component):
|
||||
def __init__(self,
|
||||
node_limit=1_000,
|
||||
):
|
||||
self.pending_instances = []
|
||||
self.x_train = {}
|
||||
self.y_train = {}
|
||||
self.predictors = {}
|
||||
self.node_limit = node_limit
|
||||
|
||||
def before_solve(self, solver, instance, model):
|
||||
assert solver.is_persistent, "BranchPriorityComponent requires a persistent solver"
|
||||
from gurobipy import GRB
|
||||
var_split = Extractor.split_variables(instance, model)
|
||||
for category in var_split.keys():
|
||||
if category not in self.predictors.keys():
|
||||
continue
|
||||
var_index_pairs = var_split[category]
|
||||
for (i, (var, index)) in enumerate(var_index_pairs):
|
||||
x = self._build_x(instance, var, index)
|
||||
y = self.predictors[category].predict([x])[0][0]
|
||||
gvar = solver.internal_solver._pyomo_var_to_solver_var_map[var[index]]
|
||||
gvar.setAttr(GRB.Attr.BranchPriority, int(round(y)))
|
||||
|
||||
|
||||
def after_solve(self, solver, instance, model):
|
||||
self.pending_instances += [instance]
|
||||
|
||||
def fit(self, solver, n_jobs=1):
|
||||
def _process(instance):
|
||||
# Create LP file
|
||||
import subprocess, tempfile, os, sys
|
||||
lp_file = tempfile.NamedTemporaryFile(suffix=".lp")
|
||||
priority_file = tempfile.NamedTemporaryFile()
|
||||
model = instance.to_model()
|
||||
model.write(lp_file.name)
|
||||
|
||||
# Run Julia script
|
||||
src_dirname = os.path.dirname(os.path.realpath(__file__))
|
||||
priority_file = tempfile.NamedTemporaryFile(mode="r")
|
||||
subprocess.run(["julia",
|
||||
"%s/branching.jl" % src_dirname,
|
||||
lp_file.name,
|
||||
priority_file.name,
|
||||
str(self.node_limit),
|
||||
],
|
||||
check=True,
|
||||
)
|
||||
|
||||
# Parse output
|
||||
tokens = [line.strip().split(",") for line in priority_file.readlines()]
|
||||
lp_varname_to_priority = {t[0]: int(t[1]) for t in tokens}
|
||||
|
||||
# Map priorities back to Pyomo variables
|
||||
pyomo_var_to_priority = {}
|
||||
from pyomo.core import Var
|
||||
from pyomo.core.base.label import TextLabeler
|
||||
labeler = TextLabeler()
|
||||
symbol_map = list(model.solutions.symbol_map.values())[0]
|
||||
|
||||
# Build x_train and y_train
|
||||
comp = BranchPriorityComponent()
|
||||
for var in model.component_objects(Var):
|
||||
for index in var:
|
||||
category = instance.get_variable_category(var, index)
|
||||
if category is None:
|
||||
continue
|
||||
lp_varname = symbol_map.getSymbol(var[index], labeler)
|
||||
var_priority = lp_varname_to_priority[lp_varname]
|
||||
x = self._build_x(instance, var, index)
|
||||
y = np.array([var_priority])
|
||||
|
||||
if category not in comp.x_train.keys():
|
||||
comp.x_train[category] = np.array([x])
|
||||
comp.y_train[category] = np.array([y])
|
||||
else:
|
||||
comp.x_train[category] = np.vstack([comp.x_train[category], x])
|
||||
comp.y_train[category] = np.vstack([comp.y_train[category], y])
|
||||
|
||||
return comp
|
||||
|
||||
|
||||
subcomponents = Parallel(n_jobs=n_jobs)(
|
||||
delayed(_process)(instance)
|
||||
for instance in tqdm(self.pending_instances, desc="Branch priority")
|
||||
)
|
||||
self.merge(subcomponents)
|
||||
self.pending_instances.clear()
|
||||
|
||||
# Retrain ML predictors
|
||||
for category in self.x_train.keys():
|
||||
x_train = self.x_train[category]
|
||||
y_train = self.y_train[category]
|
||||
self.predictors[category] = KNeighborsRegressor(n_neighbors=1)
|
||||
self.predictors[category].fit(x_train, y_train)
|
||||
|
||||
|
||||
def _build_x(self, instance, var, index):
|
||||
instance_features = instance.get_instance_features()
|
||||
var_features = instance.get_variable_features(var, index)
|
||||
return np.hstack([instance_features, var_features])
|
||||
|
||||
def merge(self, other_components):
|
||||
keys = set(self.x_train.keys())
|
||||
for comp in other_components:
|
||||
self.pending_instances += comp.pending_instances
|
||||
keys = keys.union(set(comp.x_train.keys()))
|
||||
|
||||
# Merge x_train and y_train
|
||||
for key in keys:
|
||||
x_train_submatrices = [comp.x_train[key]
|
||||
for comp in other_components
|
||||
if key in comp.x_train.keys()]
|
||||
y_train_submatrices = [comp.y_train[key]
|
||||
for comp in other_components
|
||||
if key in comp.y_train.keys()]
|
||||
if key in self.x_train.keys():
|
||||
x_train_submatrices += [self.x_train[key]]
|
||||
y_train_submatrices += [self.y_train[key]]
|
||||
self.x_train[key] = np.vstack(x_train_submatrices)
|
||||
self.y_train[key] = np.vstack(y_train_submatrices)
|
||||
|
||||
# Merge trained ML predictors
|
||||
for comp in other_components:
|
||||
for key in comp.predictors.keys():
|
||||
if key not in self.predictors.keys():
|
||||
self.predictors[key] = comp.predictors[key]
|
||||
27
miplearn/components/component.py
Normal file
27
miplearn/components/component.py
Normal file
@@ -0,0 +1,27 @@
|
||||
# MIPLearn, an extensible framework for Learning-Enhanced Mixed-Integer Optimization
|
||||
# Copyright (C) 2019-2020 Argonne National Laboratory. All rights reserved.
|
||||
# Written by Alinson S. Xavier <axavier@anl.gov>
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
|
||||
class Component(ABC):
|
||||
"""
|
||||
A Component is an object which adds functionality to a LearningSolver.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def before_solve(self, solver, instance, model):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def after_solve(self, solver, instance, model):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def merge(self, other):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def fit(self, solver):
|
||||
pass
|
||||
0
miplearn/components/tests/__init__.py
Normal file
0
miplearn/components/tests/__init__.py
Normal file
49
miplearn/components/tests/test_branching.py
Normal file
49
miplearn/components/tests/test_branching.py
Normal file
@@ -0,0 +1,49 @@
|
||||
# MIPLearn, an extensible framework for Learning-Enhanced Mixed-Integer Optimization
|
||||
# Copyright (C) 2019-2020 Argonne National Laboratory. All rights reserved.
|
||||
# Written by Alinson S. Xavier <axavier@anl.gov>
|
||||
|
||||
from miplearn import BranchPriorityComponent, LearningSolver
|
||||
from miplearn.problems.knapsack import KnapsackInstance
|
||||
import numpy as np
|
||||
import tempfile
|
||||
|
||||
def _get_instances():
|
||||
return [
|
||||
KnapsackInstance(
|
||||
weights=[23., 26., 20., 18.],
|
||||
prices=[505., 352., 458., 220.],
|
||||
capacity=67.,
|
||||
),
|
||||
] * 2
|
||||
|
||||
|
||||
def test_branching():
|
||||
instances = _get_instances()
|
||||
component = BranchPriorityComponent()
|
||||
for instance in instances:
|
||||
component.after_solve(None, instance, None)
|
||||
component.fit(None)
|
||||
for key in ["default"]:
|
||||
assert key in component.x_train.keys()
|
||||
assert key in component.y_train.keys()
|
||||
assert component.x_train[key].shape == (8, 4)
|
||||
assert component.y_train[key].shape == (8, 1)
|
||||
|
||||
|
||||
def test_branch_priority_save_load():
|
||||
state_file = tempfile.NamedTemporaryFile(mode="r")
|
||||
solver = LearningSolver(components={"branch-priority": BranchPriorityComponent()})
|
||||
solver.parallel_solve(_get_instances(), n_jobs=2)
|
||||
solver.fit()
|
||||
comp = solver.components["branch-priority"]
|
||||
assert comp.x_train["default"].shape == (8, 4)
|
||||
assert comp.y_train["default"].shape == (8, 1)
|
||||
assert "default" in comp.predictors.keys()
|
||||
solver.save_state(state_file.name)
|
||||
|
||||
solver = LearningSolver(components={"branch-priority": BranchPriorityComponent()})
|
||||
solver.load_state(state_file.name)
|
||||
comp = solver.components["branch-priority"]
|
||||
assert comp.x_train["default"].shape == (8, 4)
|
||||
assert comp.y_train["default"].shape == (8, 1)
|
||||
assert "default" in comp.predictors.keys()
|
||||
37
miplearn/components/tests/test_warmstart.py
Normal file
37
miplearn/components/tests/test_warmstart.py
Normal file
@@ -0,0 +1,37 @@
|
||||
# MIPLearn, an extensible framework for Learning-Enhanced Mixed-Integer Optimization
|
||||
# Copyright (C) 2019-2020 Argonne National Laboratory. All rights reserved.
|
||||
# Written by Alinson S. Xavier <axavier@anl.gov>
|
||||
|
||||
from miplearn import WarmStartComponent, LearningSolver
|
||||
from miplearn.problems.knapsack import KnapsackInstance
|
||||
import numpy as np
|
||||
import tempfile
|
||||
|
||||
|
||||
def _get_instances():
|
||||
return [
|
||||
KnapsackInstance(
|
||||
weights=[23., 26., 20., 18.],
|
||||
prices=[505., 352., 458., 220.],
|
||||
capacity=67.,
|
||||
),
|
||||
] * 2
|
||||
|
||||
|
||||
def test_warm_start_save_load():
|
||||
state_file = tempfile.NamedTemporaryFile(mode="r")
|
||||
solver = LearningSolver(components={"warm-start": WarmStartComponent()})
|
||||
solver.parallel_solve(_get_instances(), n_jobs=2)
|
||||
solver.fit()
|
||||
comp = solver.components["warm-start"]
|
||||
assert comp.x_train["default"].shape == (8, 6)
|
||||
assert comp.y_train["default"].shape == (8, 2)
|
||||
assert "default" in comp.predictors.keys()
|
||||
solver.save_state(state_file.name)
|
||||
|
||||
solver = LearningSolver(components={"warm-start": WarmStartComponent()})
|
||||
solver.load_state(state_file.name)
|
||||
comp = solver.components["warm-start"]
|
||||
assert comp.x_train["default"].shape == (8, 6)
|
||||
assert comp.y_train["default"].shape == (8, 2)
|
||||
assert "default" in comp.predictors.keys()
|
||||
88
miplearn/components/tests/test_warmstart_knn.py
Normal file
88
miplearn/components/tests/test_warmstart_knn.py
Normal file
@@ -0,0 +1,88 @@
|
||||
# MIPLearn, an extensible framework for Learning-Enhanced Mixed-Integer Optimization
|
||||
# Copyright (C) 2019-2020 Argonne National Laboratory. All rights reserved.
|
||||
# Written by Alinson S. Xavier <axavier@anl.gov>
|
||||
|
||||
from miplearn import KnnWarmStartPredictor
|
||||
from sklearn.metrics import accuracy_score, precision_score
|
||||
import numpy as np
|
||||
|
||||
|
||||
def test_knn_with_consensus():
|
||||
x_train = np.array([
|
||||
[0.0, 0.0],
|
||||
[0.1, 0.0],
|
||||
[0.0, 0.1],
|
||||
[1.0, 1.0],
|
||||
])
|
||||
y_train = np.array([
|
||||
[0., 1.],
|
||||
[0., 1.],
|
||||
[0., 1.],
|
||||
[1., 0.],
|
||||
])
|
||||
ws = KnnWarmStartPredictor(k=3, thr_clip=[0.75, 0.75])
|
||||
ws.fit(x_train, y_train)
|
||||
|
||||
x_test = np.array([[0.0, 0.0]])
|
||||
y_test = np.array([[0, 1]])
|
||||
assert (ws.predict(x_test) == y_test).all()
|
||||
|
||||
def test_knn_without_consensus():
|
||||
x_train = np.array([
|
||||
[0.0, 0.0],
|
||||
[0.1, 0.1],
|
||||
[0.9, 0.9],
|
||||
[1.0, 1.0],
|
||||
])
|
||||
y_train = np.array([
|
||||
[0., 1.],
|
||||
[0., 1.],
|
||||
[1., 0.],
|
||||
[1., 0.],
|
||||
])
|
||||
ws = KnnWarmStartPredictor(k=4, thr_clip=[0.75, 0.75])
|
||||
ws.fit(x_train, y_train)
|
||||
|
||||
x_test = np.array([[0.5, 0.5]])
|
||||
y_test = np.array([[0, 0]])
|
||||
assert (ws.predict(x_test) == y_test).all()
|
||||
|
||||
def test_knn_always_true():
|
||||
x_train = np.array([
|
||||
[0.0, 0.0],
|
||||
[0.1, 0.1],
|
||||
[0.9, 0.9],
|
||||
[1.0, 1.0],
|
||||
])
|
||||
y_train = np.array([
|
||||
[1., 0.],
|
||||
[1., 0.],
|
||||
[1., 0.],
|
||||
[1., 0.],
|
||||
])
|
||||
ws = KnnWarmStartPredictor(k=4, thr_clip=[0.75, 0.75])
|
||||
ws.fit(x_train, y_train)
|
||||
|
||||
x_test = np.array([[0.5, 0.5]])
|
||||
y_test = np.array([[1, 0]])
|
||||
assert (ws.predict(x_test) == y_test).all()
|
||||
|
||||
def test_knn_always_false():
|
||||
x_train = np.array([
|
||||
[0.0, 0.0],
|
||||
[0.1, 0.1],
|
||||
[0.9, 0.9],
|
||||
[1.0, 1.0],
|
||||
])
|
||||
y_train = np.array([
|
||||
[0., 1.],
|
||||
[0., 1.],
|
||||
[0., 1.],
|
||||
[0., 1.],
|
||||
])
|
||||
ws = KnnWarmStartPredictor(k=4, thr_clip=[0.75, 0.75])
|
||||
ws.fit(x_train, y_train)
|
||||
|
||||
x_test = np.array([[0.5, 0.5]])
|
||||
y_test = np.array([[0, 1]])
|
||||
assert (ws.predict(x_test) == y_test).all()
|
||||
64
miplearn/components/tests/test_warmstart_logistic.py
Normal file
64
miplearn/components/tests/test_warmstart_logistic.py
Normal file
@@ -0,0 +1,64 @@
|
||||
# MIPLearn, an extensible framework for Learning-Enhanced Mixed-Integer Optimization
|
||||
# Copyright (C) 2019-2020 Argonne National Laboratory. All rights reserved.
|
||||
# Written by Alinson S. Xavier <axavier@anl.gov>
|
||||
|
||||
from miplearn import LogisticWarmStartPredictor
|
||||
from sklearn.metrics import accuracy_score, precision_score
|
||||
import numpy as np
|
||||
|
||||
|
||||
def _generate_dataset(ground_truth, n_samples=10_000):
|
||||
x_train = np.random.rand(n_samples,5)
|
||||
x_test = np.random.rand(n_samples,5)
|
||||
y_train = ground_truth(x_train)
|
||||
y_test = ground_truth(x_test)
|
||||
return x_train, y_train, x_test, y_test
|
||||
|
||||
|
||||
def _is_sum_greater_than_two(x):
|
||||
y = (np.sum(x, axis=1) > 2.0).astype(int)
|
||||
return np.vstack([y, 1 - y]).transpose()
|
||||
|
||||
|
||||
def _always_zero(x):
|
||||
y = np.zeros((1, x.shape[0]))
|
||||
return np.vstack([y, 1 - y]).transpose()
|
||||
|
||||
|
||||
def _random_values(x):
|
||||
y = np.random.randint(2, size=x.shape[0])
|
||||
return np.vstack([y, 1 - y]).transpose()
|
||||
|
||||
|
||||
def test_logistic_ws_with_balanced_labels():
|
||||
x_train, y_train, x_test, y_test = _generate_dataset(_is_sum_greater_than_two)
|
||||
ws = LogisticWarmStartPredictor()
|
||||
ws.fit(x_train, y_train)
|
||||
y_pred = ws.predict(x_test)
|
||||
assert accuracy_score(y_test[:,0], y_pred[:,0]) > 0.99
|
||||
assert accuracy_score(y_test[:,1], y_pred[:,1]) > 0.99
|
||||
|
||||
|
||||
def test_logistic_ws_with_unbalanced_labels():
|
||||
x_train, y_train, x_test, y_test = _generate_dataset(_always_zero)
|
||||
ws = LogisticWarmStartPredictor()
|
||||
ws.fit(x_train, y_train)
|
||||
y_pred = ws.predict(x_test)
|
||||
assert accuracy_score(y_test[:,0], y_pred[:,0]) == 1.0
|
||||
assert accuracy_score(y_test[:,1], y_pred[:,1]) == 1.0
|
||||
|
||||
|
||||
def test_logistic_ws_with_unpredictable_labels():
|
||||
x_train, y_train, x_test, y_test = _generate_dataset(_random_values)
|
||||
ws = LogisticWarmStartPredictor()
|
||||
ws.fit(x_train, y_train)
|
||||
y_pred = ws.predict(x_test)
|
||||
assert np.sum(y_pred) == 0
|
||||
|
||||
|
||||
def test_logistic_ws_with_small_sample_size():
|
||||
x_train, y_train, x_test, y_test = _generate_dataset(_random_values, n_samples=3)
|
||||
ws = LogisticWarmStartPredictor()
|
||||
ws.fit(x_train, y_train)
|
||||
y_pred = ws.predict(x_test)
|
||||
assert np.sum(y_pred) == 0
|
||||
222
miplearn/components/warmstart.py
Normal file
222
miplearn/components/warmstart.py
Normal file
@@ -0,0 +1,222 @@
|
||||
# MIPLearn, an extensible framework for Learning-Enhanced Mixed-Integer Optimization
|
||||
# Copyright (C) 2019-2020 Argonne National Laboratory. All rights reserved.
|
||||
# Written by Alinson S. Xavier <axavier@anl.gov>
|
||||
|
||||
from .component import Component
|
||||
from ..extractors import *
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from copy import deepcopy
|
||||
import numpy as np
|
||||
from sklearn.pipeline import make_pipeline
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.model_selection import cross_val_score
|
||||
from sklearn.neighbors import KNeighborsClassifier
|
||||
from tqdm.auto import tqdm
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class WarmStartPredictor(ABC):
|
||||
def __init__(self, thr_clip=[0.50, 0.50]):
|
||||
self.models = [None, None]
|
||||
self.thr_clip = thr_clip
|
||||
|
||||
def fit(self, x_train, y_train):
|
||||
assert isinstance(x_train, np.ndarray)
|
||||
assert isinstance(y_train, np.ndarray)
|
||||
y_train = y_train.astype(int)
|
||||
assert y_train.shape[0] == x_train.shape[0]
|
||||
assert y_train.shape[1] == 2
|
||||
for i in [0,1]:
|
||||
self.models[i] = self._fit(x_train, y_train[:, i], i)
|
||||
|
||||
def predict(self, x_test):
|
||||
assert isinstance(x_test, np.ndarray)
|
||||
y_pred = np.zeros((x_test.shape[0], 2))
|
||||
for i in [0,1]:
|
||||
if isinstance(self.models[i], int):
|
||||
y_pred[:, i] = self.models[i]
|
||||
else:
|
||||
y = self.models[i].predict_proba(x_test)[:,1]
|
||||
y[y < self.thr_clip[i]] = 0.
|
||||
y[y > 0.] = 1.
|
||||
y_pred[:, i] = y
|
||||
return y_pred.astype(int)
|
||||
|
||||
@abstractmethod
|
||||
def _fit(self, x_train, y_train, label):
|
||||
pass
|
||||
|
||||
|
||||
class LogisticWarmStartPredictor(WarmStartPredictor):
|
||||
def __init__(self,
|
||||
min_samples=100,
|
||||
thr_fix=[0.99, 0.99],
|
||||
thr_balance=[0.80, 0.80],
|
||||
thr_alpha=[0.50, 0.50],
|
||||
):
|
||||
super().__init__()
|
||||
self.min_samples = min_samples
|
||||
self.thr_fix = thr_fix
|
||||
self.thr_balance = thr_balance
|
||||
self.thr_alpha = thr_alpha
|
||||
|
||||
def _fit(self, x_train, y_train, label):
|
||||
y_train_avg = np.average(y_train)
|
||||
|
||||
# If number of samples is too small, don't predict anything.
|
||||
if x_train.shape[0] < self.min_samples:
|
||||
return 0
|
||||
|
||||
# If vast majority of observations are true, always return true.
|
||||
if y_train_avg > self.thr_fix[label]:
|
||||
return 1
|
||||
|
||||
# If dataset is not balanced enough, don't predict anything.
|
||||
if y_train_avg < (1 - self.thr_balance[label]) or y_train_avg > self.thr_balance[label]:
|
||||
return 0
|
||||
|
||||
reg = make_pipeline(StandardScaler(), LogisticRegression())
|
||||
reg_score = np.mean(cross_val_score(reg, x_train, y_train, cv=5))
|
||||
dummy_score = max(y_train_avg, 1 - y_train_avg)
|
||||
reg_thr = 1. * self.thr_alpha[label] + dummy_score * (1 - self.thr_alpha[label])
|
||||
|
||||
# If cross-validation score is too low, don't predict anything.
|
||||
if reg_score < reg_thr:
|
||||
return 0
|
||||
|
||||
reg.fit(x_train, y_train.astype(int))
|
||||
return reg
|
||||
|
||||
|
||||
class KnnWarmStartPredictor(WarmStartPredictor):
|
||||
def __init__(self,
|
||||
k=50,
|
||||
min_samples=1,
|
||||
thr_clip=[0.80, 0.80],
|
||||
thr_fix=[1.0, 1.0],
|
||||
):
|
||||
super().__init__(thr_clip=thr_clip)
|
||||
self.k = k
|
||||
self.thr_fix = thr_fix
|
||||
self.min_samples = min_samples
|
||||
|
||||
def _fit(self, x_train, y_train, label):
|
||||
y_train_avg = np.average(y_train)
|
||||
|
||||
# If number of training samples is too small, don't predict anything.
|
||||
if x_train.shape[0] < self.min_samples:
|
||||
logger.debug("Too few samples; return 0")
|
||||
return 0
|
||||
|
||||
# If vast majority of observations are true, always return true.
|
||||
if y_train_avg >= self.thr_fix[label]:
|
||||
logger.debug("Consensus reached; return 1")
|
||||
return 1
|
||||
|
||||
# If vast majority of observations are false, always return false.
|
||||
if y_train_avg <= (1 - self.thr_fix[label]):
|
||||
logger.debug("Consensus reached; return 0")
|
||||
return 0
|
||||
|
||||
logger.debug("Training classifier...")
|
||||
k = min(self.k, x_train.shape[0])
|
||||
knn = KNeighborsClassifier(n_neighbors=k)
|
||||
knn.fit(x_train, y_train)
|
||||
return knn
|
||||
|
||||
|
||||
class WarmStartComponent(Component):
|
||||
def __init__(self,
|
||||
predictor_prototype=KnnWarmStartPredictor(),
|
||||
mode="exact",
|
||||
):
|
||||
self.mode = mode
|
||||
self.x_train = {}
|
||||
self.y_train = {}
|
||||
self.predictors = {}
|
||||
self.predictor_prototype = predictor_prototype
|
||||
self.is_warm_start_available = False
|
||||
|
||||
def before_solve(self, solver, instance, model):
|
||||
# Build x_test
|
||||
x_test = CombinedExtractor([UserFeaturesExtractor(),
|
||||
SolutionExtractor(),
|
||||
]).extract([instance], [model])
|
||||
|
||||
# Update self.x_train
|
||||
self.x_train = Extractor.merge([self.x_train, x_test],
|
||||
vertical=True)
|
||||
|
||||
# Predict solutions
|
||||
count_total, count_fixed = 0, 0
|
||||
var_split = Extractor.split_variables(instance, model)
|
||||
for category in var_split.keys():
|
||||
var_index_pairs = var_split[category]
|
||||
if category not in self.predictors.keys():
|
||||
continue
|
||||
ws = self.predictors[category].predict(x_test[category])
|
||||
assert ws.shape == (len(var_index_pairs), 2)
|
||||
for i in range(len(var_index_pairs)):
|
||||
var, index = var_index_pairs[i]
|
||||
count_total += 1
|
||||
if self.mode == "heuristic":
|
||||
if ws[i,0] > 0.5:
|
||||
var[index].fix(0)
|
||||
count_fixed += 1
|
||||
if solver.is_persistent:
|
||||
solver.internal_solver.update_var(var[index])
|
||||
elif ws[i,1] > 0.5:
|
||||
var[index].fix(1)
|
||||
count_fixed += 1
|
||||
if solver.is_persistent:
|
||||
solver.internal_solver.update_var(var[index])
|
||||
else:
|
||||
var[index].value = None
|
||||
if ws[i,0] > 0.5:
|
||||
count_fixed += 1
|
||||
var[index].value = 0
|
||||
self.is_warm_start_available = True
|
||||
elif ws[i,1] > 0.5:
|
||||
count_fixed += 1
|
||||
var[index].value = 1
|
||||
self.is_warm_start_available = True
|
||||
logger.info("Setting values for %d variables (out of %d)" % (count_fixed, count_total))
|
||||
|
||||
|
||||
def after_solve(self, solver, instance, model):
|
||||
y_test = SolutionExtractor().extract([instance], [model])
|
||||
self.y_train = Extractor.merge([self.y_train, y_test], vertical=True)
|
||||
|
||||
def fit(self, solver, n_jobs=1):
|
||||
for category in tqdm(self.x_train.keys(), desc="Warm start"):
|
||||
x_train = self.x_train[category]
|
||||
y_train = self.y_train[category]
|
||||
self.predictors[category] = deepcopy(self.predictor_prototype)
|
||||
self.predictors[category].fit(x_train, y_train)
|
||||
|
||||
def merge(self, other_components):
|
||||
# Merge x_train and y_train
|
||||
keys = set(self.x_train.keys())
|
||||
for comp in other_components:
|
||||
keys = keys.union(set(comp.x_train.keys()))
|
||||
for key in keys:
|
||||
x_train_submatrices = [comp.x_train[key]
|
||||
for comp in other_components
|
||||
if key in comp.x_train.keys()]
|
||||
y_train_submatrices = [comp.y_train[key]
|
||||
for comp in other_components
|
||||
if key in comp.y_train.keys()]
|
||||
if key in self.x_train.keys():
|
||||
x_train_submatrices += [self.x_train[key]]
|
||||
y_train_submatrices += [self.y_train[key]]
|
||||
self.x_train[key] = np.vstack(x_train_submatrices)
|
||||
self.y_train[key] = np.vstack(y_train_submatrices)
|
||||
|
||||
# Merge trained predictors
|
||||
for comp in other_components:
|
||||
for key in comp.predictors.keys():
|
||||
if key not in self.predictors.keys():
|
||||
self.predictors[key] = comp.predictors[key]
|
||||
|
||||
Reference in New Issue
Block a user