Add ScikitLearnRegressor; move sklean classes to their own file

master
Alinson S. Xavier 5 years ago
parent 820a6256c2
commit 59c734f2a1

@ -6,6 +6,9 @@ from .benchmark import BenchmarkRunner
from .classifiers import (
Classifier,
Regressor,
)
from .classifiers.sklearn import (
ScikitLearnRegressor,
ScikitLearnClassifier,
)
from .classifiers.adaptive import AdaptiveClassifier

@ -3,10 +3,9 @@
# Released under the modified BSD license. See COPYING.md for more details.
from abc import ABC, abstractmethod
from typing import Optional, Any, cast
from typing import Optional
import numpy as np
import sklearn
class Classifier(ABC):
@ -150,58 +149,3 @@ class Regressor(ABC):
Returns an unfitted copy of this regressor with the same hyperparameters.
"""
pass
class ScikitLearnClassifier(Classifier):
"""
Wrapper for ScikitLearn classifiers, which makes sure inputs and outputs have the
correct dimensions and types.
"""
def __init__(self, clf: Any) -> None:
super().__init__()
self.inner_clf = clf
self.constant: Optional[np.ndarray] = None
def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
super().fit(x_train, y_train)
(n_samples, n_classes) = y_train.shape
assert n_classes == 2, (
f"Scikit-learn classifiers must have exactly two classes. "
f"{n_classes} classes were provided instead."
)
# When all samples belong to the same class, sklearn's predict_proba returns
# an array with a single column. The following check avoid this strange
# behavior.
mean = cast(np.ndarray, y_train.astype(float).mean(axis=0))
if mean.max() == 1.0:
self.constant = mean
return
self.inner_clf.fit(x_train, y_train[:, 1])
def predict_proba(self, x_test: np.ndarray) -> np.ndarray:
super().predict_proba(x_test)
n_samples = x_test.shape[0]
if self.constant is not None:
return np.array([self.constant for n in range(n_samples)])
sklearn_proba = self.inner_clf.predict_proba(x_test)
if isinstance(sklearn_proba, list):
assert len(sklearn_proba) == self.n_classes
for pb in sklearn_proba:
assert isinstance(pb, np.ndarray)
assert pb.dtype in [np.float16, np.float32, np.float64]
assert pb.shape == (n_samples, 2)
proba = np.hstack([pb[:, [1]] for pb in sklearn_proba])
assert proba.shape == (n_samples, self.n_classes)
return proba
else:
assert isinstance(sklearn_proba, np.ndarray)
assert sklearn_proba.shape == (n_samples, 2)
return sklearn_proba
def clone(self) -> "ScikitLearnClassifier":
return ScikitLearnClassifier(
clf=sklearn.base.clone(self.inner_clf),
)

@ -13,7 +13,8 @@ from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from miplearn.classifiers import Classifier, ScikitLearnClassifier
from miplearn.classifiers import Classifier
from miplearn.classifiers.sklearn import ScikitLearnClassifier
from miplearn.classifiers.counting import CountingClassifier
logger = logging.getLogger(__name__)

@ -10,7 +10,8 @@ from sklearn.dummy import DummyClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from miplearn.classifiers import Classifier, ScikitLearnClassifier
from miplearn.classifiers import Classifier
from miplearn.classifiers.sklearn import ScikitLearnClassifier
logger = logging.getLogger(__name__)

@ -0,0 +1,93 @@
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
# Released under the modified BSD license. See COPYING.md for more details.
from typing import Optional, Any, cast
import numpy as np
import sklearn
from miplearn.classifiers import Classifier, Regressor
class ScikitLearnClassifier(Classifier):
"""
Wrapper for ScikitLearn classifiers, which makes sure inputs and outputs have the
correct dimensions and types.
"""
def __init__(self, clf: Any) -> None:
super().__init__()
self.inner_clf = clf
self.constant: Optional[np.ndarray] = None
def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
super().fit(x_train, y_train)
(n_samples, n_classes) = y_train.shape
assert n_classes == 2, (
f"Scikit-learn classifiers must have exactly two classes. "
f"{n_classes} classes were provided instead."
)
# When all samples belong to the same class, sklearn's predict_proba returns
# an array with a single column. The following check avoid this strange
# behavior.
mean = cast(np.ndarray, y_train.astype(float).mean(axis=0))
if mean.max() == 1.0:
self.constant = mean
return
self.inner_clf.fit(x_train, y_train[:, 1])
def predict_proba(self, x_test: np.ndarray) -> np.ndarray:
super().predict_proba(x_test)
n_samples = x_test.shape[0]
if self.constant is not None:
return np.array([self.constant for n in range(n_samples)])
sklearn_proba = self.inner_clf.predict_proba(x_test)
if isinstance(sklearn_proba, list):
assert len(sklearn_proba) == self.n_classes
for pb in sklearn_proba:
assert isinstance(pb, np.ndarray)
assert pb.dtype in [np.float16, np.float32, np.float64]
assert pb.shape == (n_samples, 2)
proba = np.hstack([pb[:, [1]] for pb in sklearn_proba])
assert proba.shape == (n_samples, self.n_classes)
return proba
else:
assert isinstance(sklearn_proba, np.ndarray)
assert sklearn_proba.shape == (n_samples, 2)
return sklearn_proba
def clone(self) -> "ScikitLearnClassifier":
return ScikitLearnClassifier(
clf=sklearn.base.clone(self.inner_clf),
)
class ScikitLearnRegressor(Regressor):
"""
Wrapper for ScikitLearn regressors, which makes sure inputs and outputs have the
correct dimensions and types.
"""
def __init__(self, reg: Any) -> None:
super().__init__()
self.inner_reg = reg
def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
super().fit(x_train, y_train)
self.inner_reg.fit(x_train, y_train)
def predict(self, x_test: np.ndarray) -> np.ndarray:
super().predict(x_test)
n_samples = x_test.shape[0]
sklearn_pred = self.inner_reg.predict(x_test)
assert isinstance(sklearn_pred, np.ndarray)
assert sklearn_pred.shape[0] == n_samples
return sklearn_pred
def clone(self) -> "ScikitLearnRegressor":
return ScikitLearnRegressor(
reg=sklearn.base.clone(self.inner_reg),
)

@ -1,7 +1,6 @@
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
# Released under the modified BSD license. See COPYING.md for more details.
from typing import cast
from numpy.linalg import norm
from sklearn.svm import SVC

@ -6,7 +6,7 @@ import numpy as np
from numpy.linalg import norm
from sklearn.svm import SVC
from miplearn.classifiers import ScikitLearnClassifier
from miplearn.classifiers.sklearn import ScikitLearnClassifier
from miplearn.classifiers.cv import CrossValidatedClassifier
from tests.classifiers import _build_circle_training_data

@ -4,37 +4,30 @@
import numpy as np
from numpy.testing import assert_array_equal
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsClassifier
from miplearn import ScikitLearnClassifier
from miplearn.classifiers.sklearn import ScikitLearnClassifier, ScikitLearnRegressor
def test_constant_prediction():
x_train = np.array(
[
[0.0, 1.0],
[1.0, 0.0],
]
)
y_train = np.array(
[
[True, False],
[True, False],
]
)
clf = ScikitLearnClassifier(
KNeighborsClassifier(
n_neighbors=1,
)
)
x_train = np.array([[0.0, 1.0], [1.0, 0.0]])
y_train = np.array([[True, False], [True, False]])
clf = ScikitLearnClassifier(KNeighborsClassifier(n_neighbors=1))
clf.fit(x_train, y_train)
proba = clf.predict_proba(x_train)
assert_array_equal(
proba,
np.array(
[
[1.0, 0.0],
[1.0, 0.0],
]
),
np.array([[1.0, 0.0], [1.0, 0.0]]),
)
def test_regressor():
x_train = np.array([[0.0, 1.0], [1.0, 4.0], [2.0, 2.0]])
y_train = np.array([[1.0], [5.0], [4.0]])
x_test = np.array([[4.0, 4.0], [0.0, 0.0]])
clf = ScikitLearnRegressor(LinearRegression())
clf.fit(x_train, y_train)
y_test_actual = clf.predict(x_test)
y_test_expected = np.array([[8.0], [0.0]])
assert_array_equal(np.round(y_test_actual, 2), y_test_expected)

Loading…
Cancel
Save