Implement LogisticWarmStartPredicitor with tests

pull/1/head
Alinson S. Xavier 6 years ago
parent e13eba86b0
commit eb9a677136

@ -2,9 +2,8 @@
# Copyright (C) 2019-2020 Argonne National Laboratory. All rights reserved. # Copyright (C) 2019-2020 Argonne National Laboratory. All rights reserved.
# Written by Alinson S. Xavier <axavier@anl.gov> # Written by Alinson S. Xavier <axavier@anl.gov>
# from .warmstart import WarmStartPredictor
from .transformers import PerVariableTransformer from .transformers import PerVariableTransformer
from .warmstart import WarmStartPredictor from .warmstart import LogisticWarmStartPredictor
import pyomo.environ as pe import pyomo.environ as pe
import numpy as np import numpy as np
@ -17,9 +16,11 @@ class LearningSolver:
def __init__(self, def __init__(self,
threads=4, threads=4,
parent_solver=pe.SolverFactory('cbc')): parent_solver=pe.SolverFactory('cbc'),
ws_predictor_factory=LogisticWarmStartPredictor):
self.parent_solver = parent_solver self.parent_solver = parent_solver
self.parent_solver.options["threads"] = threads self.parent_solver.options["threads"] = threads
self.ws_predictor_factory = ws_predictor_factory
self.x_train = {} self.x_train = {}
self.y_train = {} self.y_train = {}
self.ws_predictors = {} self.ws_predictors = {}
@ -75,7 +76,7 @@ class LearningSolver:
for category in x_train_dict.keys(): for category in x_train_dict.keys():
x_train = x_train_dict[category] x_train = x_train_dict[category]
y_train = y_train_dict[category] y_train = y_train_dict[category]
self.ws_predictors[category] = WarmStartPredictor() self.ws_predictors[category] = self.ws_predictor_factory()
self.ws_predictors[category].fit(x_train, y_train) self.ws_predictors[category].fit(x_train, y_train)
def _solve(self, model, tee=False): def _solve(self, model, tee=False):

@ -0,0 +1,64 @@
# MIPLearn: A Machine-Learning Framework for Mixed-Integer Optimization
# Copyright (C) 2019-2020 Argonne National Laboratory. All rights reserved.
# Written by Alinson S. Xavier <axavier@anl.gov>
from miplearn.warmstart import LogisticWarmStartPredictor
from sklearn.metrics import accuracy_score, precision_score
import numpy as np
def _generate_dataset(ground_truth, n_samples=10_000):
x_train = np.random.rand(n_samples,5)
x_test = np.random.rand(n_samples,5)
y_train = ground_truth(x_train)
y_test = ground_truth(x_test)
return x_train, y_train, x_test, y_test
def _is_sum_greater_than_two(x):
y = (np.sum(x, axis=1) > 2.0).astype(int)
return np.vstack([y, 1 - y]).transpose()
def _always_zero(x):
y = np.zeros((1, x.shape[0]))
return np.vstack([y, 1 - y]).transpose()
def _random_values(x):
y = np.random.randint(2, size=x.shape[0])
return np.vstack([y, 1 - y]).transpose()
def test_logistic_ws_with_balanced_labels():
x_train, y_train, x_test, y_test = _generate_dataset(_is_sum_greater_than_two)
ws = LogisticWarmStartPredictor()
ws.fit(x_train, y_train)
y_pred = ws.predict(x_test)
assert accuracy_score(y_test[:,0], y_pred[:,0]) > 0.99
assert accuracy_score(y_test[:,1], y_pred[:,1]) > 0.99
def test_logistic_ws_with_unbalanced_labels():
x_train, y_train, x_test, y_test = _generate_dataset(_always_zero)
ws = LogisticWarmStartPredictor()
ws.fit(x_train, y_train)
y_pred = ws.predict(x_test)
assert accuracy_score(y_test[:,0], y_pred[:,0]) == 1.0
assert accuracy_score(y_test[:,1], y_pred[:,1]) == 1.0
def test_logistic_ws_with_unpredictable_labels():
x_train, y_train, x_test, y_test = _generate_dataset(_random_values)
ws = LogisticWarmStartPredictor()
ws.fit(x_train, y_train)
y_pred = ws.predict(x_test)
assert np.sum(y_pred) == 0
def test_logistic_ws_with_small_sample_size():
x_train, y_train, x_test, y_test = _generate_dataset(_random_values, n_samples=3)
ws = LogisticWarmStartPredictor()
ws.fit(x_train, y_train)
y_pred = ws.predict(x_test)
assert np.sum(y_pred) == 0

@ -2,42 +2,73 @@
# Copyright (C) 2019-2020 Argonne National Laboratory. All rights reserved. # Copyright (C) 2019-2020 Argonne National Laboratory. All rights reserved.
# Written by Alinson S. Xavier <axavier@anl.gov> # Written by Alinson S. Xavier <axavier@anl.gov>
from abc import ABC, abstractmethod
import numpy as np import numpy as np
from sklearn.pipeline import make_pipeline from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score
class WarmStartPredictor(ABC):
class WarmStartPredictor: def __init__(self):
def __init__(self, self.models = [None, None]
thr_fix_zero=0.05,
thr_fix_one=0.95,
thr_predict=0.95):
self.model = None
self.thr_predict = thr_predict
self.thr_fix_zero = thr_fix_zero
self.thr_fix_one = thr_fix_one
def fit(self, x_train, y_train): def fit(self, x_train, y_train):
assert isinstance(x_train, np.ndarray) assert isinstance(x_train, np.ndarray)
assert isinstance(y_train, np.ndarray) assert isinstance(y_train, np.ndarray)
assert y_train.shape[1] == 2
assert y_train.shape[0] == x_train.shape[0] assert y_train.shape[0] == x_train.shape[0]
y_hat = np.average(y_train[:, 1]) assert y_train.shape[1] == 2
if y_hat < self.thr_fix_zero or y_hat > self.thr_fix_one: for i in [0,1]:
self.model = int(y_hat) self.models[i] = self._fit(x_train, y_train[:, i], i)
else:
self.model = make_pipeline(StandardScaler(), LogisticRegression())
self.model.fit(x_train, y_train[:, 1].astype(int))
def predict(self, x_test): def predict(self, x_test):
assert isinstance(x_test, np.ndarray) assert isinstance(x_test, np.ndarray)
if isinstance(self.model, int): y_pred = np.zeros((x_test.shape[0], 2), dtype=np.int)
p_test = np.array([[1 - self.model, self.model] for i in [0,1]:
for _ in range(x_test.shape[0])]) if isinstance(self.models[i], int):
else: y_pred[:, i] = self.models[i]
p_test = self.model.predict_proba(x_test) else:
p_test[p_test < self.thr_predict] = 0 y_pred[:, i] = self.models[i].predict(x_test)
p_test[p_test > 0] = 1 return y_pred
p_test = p_test.astype(int)
return p_test @abstractmethod
def _fit(self, x_train, y_train, label):
pass
class LogisticWarmStartPredictor(WarmStartPredictor):
def __init__(self,
min_samples=100,
thr_fix=[0.99, 0.99],
thr_balance=[0.95, 0.95],
thr_score=[0.95, 0.95]):
super().__init__()
self.min_samples = min_samples
self.thr_fix = thr_fix
self.thr_balance = thr_balance
self.thr_score = thr_score
def _fit(self, x_train, y_train, label):
y_train_avg = np.average(y_train)
# If number of samples is too small, don't predict anything.
if x_train.shape[0] < self.min_samples:
return 0
# If vast majority of observations are true, always return true.
if y_train_avg > self.thr_fix[label]:
return 1
# If dataset is not balanced enough, don't predict anything.
if y_train_avg < (1 - self.thr_balance[label]) or y_train_avg > self.thr_balance[label]:
return 0
reg = make_pipeline(StandardScaler(), LogisticRegression())
reg_score = np.mean(cross_val_score(reg, x_train, y_train, cv=5))
# If cross-validation score is too low, don't predict anything.
if reg_score < self.thr_score[label]:
return 0
reg.fit(x_train, y_train.astype(int))
return reg
Loading…
Cancel
Save