Add ScikitLearnRegressor; move sklean classes to their own file

master
Alinson S. Xavier 5 years ago
parent 820a6256c2
commit 59c734f2a1

@ -6,6 +6,9 @@ from .benchmark import BenchmarkRunner
from .classifiers import ( from .classifiers import (
Classifier, Classifier,
Regressor, Regressor,
)
from .classifiers.sklearn import (
ScikitLearnRegressor,
ScikitLearnClassifier, ScikitLearnClassifier,
) )
from .classifiers.adaptive import AdaptiveClassifier from .classifiers.adaptive import AdaptiveClassifier

@ -3,10 +3,9 @@
# Released under the modified BSD license. See COPYING.md for more details. # Released under the modified BSD license. See COPYING.md for more details.
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import Optional, Any, cast from typing import Optional
import numpy as np import numpy as np
import sklearn
class Classifier(ABC): class Classifier(ABC):
@ -150,58 +149,3 @@ class Regressor(ABC):
Returns an unfitted copy of this regressor with the same hyperparameters. Returns an unfitted copy of this regressor with the same hyperparameters.
""" """
pass pass
class ScikitLearnClassifier(Classifier):
"""
Wrapper for ScikitLearn classifiers, which makes sure inputs and outputs have the
correct dimensions and types.
"""
def __init__(self, clf: Any) -> None:
super().__init__()
self.inner_clf = clf
self.constant: Optional[np.ndarray] = None
def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
super().fit(x_train, y_train)
(n_samples, n_classes) = y_train.shape
assert n_classes == 2, (
f"Scikit-learn classifiers must have exactly two classes. "
f"{n_classes} classes were provided instead."
)
# When all samples belong to the same class, sklearn's predict_proba returns
# an array with a single column. The following check avoid this strange
# behavior.
mean = cast(np.ndarray, y_train.astype(float).mean(axis=0))
if mean.max() == 1.0:
self.constant = mean
return
self.inner_clf.fit(x_train, y_train[:, 1])
def predict_proba(self, x_test: np.ndarray) -> np.ndarray:
super().predict_proba(x_test)
n_samples = x_test.shape[0]
if self.constant is not None:
return np.array([self.constant for n in range(n_samples)])
sklearn_proba = self.inner_clf.predict_proba(x_test)
if isinstance(sklearn_proba, list):
assert len(sklearn_proba) == self.n_classes
for pb in sklearn_proba:
assert isinstance(pb, np.ndarray)
assert pb.dtype in [np.float16, np.float32, np.float64]
assert pb.shape == (n_samples, 2)
proba = np.hstack([pb[:, [1]] for pb in sklearn_proba])
assert proba.shape == (n_samples, self.n_classes)
return proba
else:
assert isinstance(sklearn_proba, np.ndarray)
assert sklearn_proba.shape == (n_samples, 2)
return sklearn_proba
def clone(self) -> "ScikitLearnClassifier":
return ScikitLearnClassifier(
clf=sklearn.base.clone(self.inner_clf),
)

@ -13,7 +13,8 @@ from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import make_pipeline from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler from sklearn.preprocessing import StandardScaler
from miplearn.classifiers import Classifier, ScikitLearnClassifier from miplearn.classifiers import Classifier
from miplearn.classifiers.sklearn import ScikitLearnClassifier
from miplearn.classifiers.counting import CountingClassifier from miplearn.classifiers.counting import CountingClassifier
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)

@ -10,7 +10,8 @@ from sklearn.dummy import DummyClassifier
from sklearn.linear_model import LogisticRegression from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score from sklearn.model_selection import cross_val_score
from miplearn.classifiers import Classifier, ScikitLearnClassifier from miplearn.classifiers import Classifier
from miplearn.classifiers.sklearn import ScikitLearnClassifier
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)

@ -0,0 +1,93 @@
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
# Released under the modified BSD license. See COPYING.md for more details.
from typing import Optional, Any, cast
import numpy as np
import sklearn
from miplearn.classifiers import Classifier, Regressor
class ScikitLearnClassifier(Classifier):
"""
Wrapper for ScikitLearn classifiers, which makes sure inputs and outputs have the
correct dimensions and types.
"""
def __init__(self, clf: Any) -> None:
super().__init__()
self.inner_clf = clf
self.constant: Optional[np.ndarray] = None
def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
super().fit(x_train, y_train)
(n_samples, n_classes) = y_train.shape
assert n_classes == 2, (
f"Scikit-learn classifiers must have exactly two classes. "
f"{n_classes} classes were provided instead."
)
# When all samples belong to the same class, sklearn's predict_proba returns
# an array with a single column. The following check avoid this strange
# behavior.
mean = cast(np.ndarray, y_train.astype(float).mean(axis=0))
if mean.max() == 1.0:
self.constant = mean
return
self.inner_clf.fit(x_train, y_train[:, 1])
def predict_proba(self, x_test: np.ndarray) -> np.ndarray:
super().predict_proba(x_test)
n_samples = x_test.shape[0]
if self.constant is not None:
return np.array([self.constant for n in range(n_samples)])
sklearn_proba = self.inner_clf.predict_proba(x_test)
if isinstance(sklearn_proba, list):
assert len(sklearn_proba) == self.n_classes
for pb in sklearn_proba:
assert isinstance(pb, np.ndarray)
assert pb.dtype in [np.float16, np.float32, np.float64]
assert pb.shape == (n_samples, 2)
proba = np.hstack([pb[:, [1]] for pb in sklearn_proba])
assert proba.shape == (n_samples, self.n_classes)
return proba
else:
assert isinstance(sklearn_proba, np.ndarray)
assert sklearn_proba.shape == (n_samples, 2)
return sklearn_proba
def clone(self) -> "ScikitLearnClassifier":
return ScikitLearnClassifier(
clf=sklearn.base.clone(self.inner_clf),
)
class ScikitLearnRegressor(Regressor):
"""
Wrapper for ScikitLearn regressors, which makes sure inputs and outputs have the
correct dimensions and types.
"""
def __init__(self, reg: Any) -> None:
super().__init__()
self.inner_reg = reg
def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
super().fit(x_train, y_train)
self.inner_reg.fit(x_train, y_train)
def predict(self, x_test: np.ndarray) -> np.ndarray:
super().predict(x_test)
n_samples = x_test.shape[0]
sklearn_pred = self.inner_reg.predict(x_test)
assert isinstance(sklearn_pred, np.ndarray)
assert sklearn_pred.shape[0] == n_samples
return sklearn_pred
def clone(self) -> "ScikitLearnRegressor":
return ScikitLearnRegressor(
reg=sklearn.base.clone(self.inner_reg),
)

@ -1,7 +1,6 @@
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization # MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved. # Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
# Released under the modified BSD license. See COPYING.md for more details. # Released under the modified BSD license. See COPYING.md for more details.
from typing import cast
from numpy.linalg import norm from numpy.linalg import norm
from sklearn.svm import SVC from sklearn.svm import SVC

@ -6,7 +6,7 @@ import numpy as np
from numpy.linalg import norm from numpy.linalg import norm
from sklearn.svm import SVC from sklearn.svm import SVC
from miplearn.classifiers import ScikitLearnClassifier from miplearn.classifiers.sklearn import ScikitLearnClassifier
from miplearn.classifiers.cv import CrossValidatedClassifier from miplearn.classifiers.cv import CrossValidatedClassifier
from tests.classifiers import _build_circle_training_data from tests.classifiers import _build_circle_training_data

@ -4,37 +4,30 @@
import numpy as np import numpy as np
from numpy.testing import assert_array_equal from numpy.testing import assert_array_equal
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsClassifier from sklearn.neighbors import KNeighborsClassifier
from miplearn import ScikitLearnClassifier from miplearn.classifiers.sklearn import ScikitLearnClassifier, ScikitLearnRegressor
def test_constant_prediction(): def test_constant_prediction():
x_train = np.array( x_train = np.array([[0.0, 1.0], [1.0, 0.0]])
[ y_train = np.array([[True, False], [True, False]])
[0.0, 1.0], clf = ScikitLearnClassifier(KNeighborsClassifier(n_neighbors=1))
[1.0, 0.0],
]
)
y_train = np.array(
[
[True, False],
[True, False],
]
)
clf = ScikitLearnClassifier(
KNeighborsClassifier(
n_neighbors=1,
)
)
clf.fit(x_train, y_train) clf.fit(x_train, y_train)
proba = clf.predict_proba(x_train) proba = clf.predict_proba(x_train)
assert_array_equal( assert_array_equal(
proba, proba,
np.array( np.array([[1.0, 0.0], [1.0, 0.0]]),
[
[1.0, 0.0],
[1.0, 0.0],
]
),
) )
def test_regressor():
x_train = np.array([[0.0, 1.0], [1.0, 4.0], [2.0, 2.0]])
y_train = np.array([[1.0], [5.0], [4.0]])
x_test = np.array([[4.0, 4.0], [0.0, 0.0]])
clf = ScikitLearnRegressor(LinearRegression())
clf.fit(x_train, y_train)
y_test_actual = clf.predict(x_test)
y_test_expected = np.array([[8.0], [0.0]])
assert_array_equal(np.round(y_test_actual, 2), y_test_expected)

Loading…
Cancel
Save