mirror of
https://github.com/ANL-CEEESA/MIPLearn.git
synced 2025-12-06 01:18:52 -06:00
Add ScikitLearnRegressor; move sklean classes to their own file
This commit is contained in:
@@ -6,6 +6,9 @@ from .benchmark import BenchmarkRunner
|
|||||||
from .classifiers import (
|
from .classifiers import (
|
||||||
Classifier,
|
Classifier,
|
||||||
Regressor,
|
Regressor,
|
||||||
|
)
|
||||||
|
from .classifiers.sklearn import (
|
||||||
|
ScikitLearnRegressor,
|
||||||
ScikitLearnClassifier,
|
ScikitLearnClassifier,
|
||||||
)
|
)
|
||||||
from .classifiers.adaptive import AdaptiveClassifier
|
from .classifiers.adaptive import AdaptiveClassifier
|
||||||
|
|||||||
@@ -3,10 +3,9 @@
|
|||||||
# Released under the modified BSD license. See COPYING.md for more details.
|
# Released under the modified BSD license. See COPYING.md for more details.
|
||||||
|
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from typing import Optional, Any, cast
|
from typing import Optional
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import sklearn
|
|
||||||
|
|
||||||
|
|
||||||
class Classifier(ABC):
|
class Classifier(ABC):
|
||||||
@@ -150,58 +149,3 @@ class Regressor(ABC):
|
|||||||
Returns an unfitted copy of this regressor with the same hyperparameters.
|
Returns an unfitted copy of this regressor with the same hyperparameters.
|
||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class ScikitLearnClassifier(Classifier):
|
|
||||||
"""
|
|
||||||
Wrapper for ScikitLearn classifiers, which makes sure inputs and outputs have the
|
|
||||||
correct dimensions and types.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, clf: Any) -> None:
|
|
||||||
super().__init__()
|
|
||||||
self.inner_clf = clf
|
|
||||||
self.constant: Optional[np.ndarray] = None
|
|
||||||
|
|
||||||
def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
|
|
||||||
super().fit(x_train, y_train)
|
|
||||||
(n_samples, n_classes) = y_train.shape
|
|
||||||
assert n_classes == 2, (
|
|
||||||
f"Scikit-learn classifiers must have exactly two classes. "
|
|
||||||
f"{n_classes} classes were provided instead."
|
|
||||||
)
|
|
||||||
|
|
||||||
# When all samples belong to the same class, sklearn's predict_proba returns
|
|
||||||
# an array with a single column. The following check avoid this strange
|
|
||||||
# behavior.
|
|
||||||
mean = cast(np.ndarray, y_train.astype(float).mean(axis=0))
|
|
||||||
if mean.max() == 1.0:
|
|
||||||
self.constant = mean
|
|
||||||
return
|
|
||||||
|
|
||||||
self.inner_clf.fit(x_train, y_train[:, 1])
|
|
||||||
|
|
||||||
def predict_proba(self, x_test: np.ndarray) -> np.ndarray:
|
|
||||||
super().predict_proba(x_test)
|
|
||||||
n_samples = x_test.shape[0]
|
|
||||||
if self.constant is not None:
|
|
||||||
return np.array([self.constant for n in range(n_samples)])
|
|
||||||
sklearn_proba = self.inner_clf.predict_proba(x_test)
|
|
||||||
if isinstance(sklearn_proba, list):
|
|
||||||
assert len(sklearn_proba) == self.n_classes
|
|
||||||
for pb in sklearn_proba:
|
|
||||||
assert isinstance(pb, np.ndarray)
|
|
||||||
assert pb.dtype in [np.float16, np.float32, np.float64]
|
|
||||||
assert pb.shape == (n_samples, 2)
|
|
||||||
proba = np.hstack([pb[:, [1]] for pb in sklearn_proba])
|
|
||||||
assert proba.shape == (n_samples, self.n_classes)
|
|
||||||
return proba
|
|
||||||
else:
|
|
||||||
assert isinstance(sklearn_proba, np.ndarray)
|
|
||||||
assert sklearn_proba.shape == (n_samples, 2)
|
|
||||||
return sklearn_proba
|
|
||||||
|
|
||||||
def clone(self) -> "ScikitLearnClassifier":
|
|
||||||
return ScikitLearnClassifier(
|
|
||||||
clf=sklearn.base.clone(self.inner_clf),
|
|
||||||
)
|
|
||||||
|
|||||||
@@ -13,7 +13,8 @@ from sklearn.neighbors import KNeighborsClassifier
|
|||||||
from sklearn.pipeline import make_pipeline
|
from sklearn.pipeline import make_pipeline
|
||||||
from sklearn.preprocessing import StandardScaler
|
from sklearn.preprocessing import StandardScaler
|
||||||
|
|
||||||
from miplearn.classifiers import Classifier, ScikitLearnClassifier
|
from miplearn.classifiers import Classifier
|
||||||
|
from miplearn.classifiers.sklearn import ScikitLearnClassifier
|
||||||
from miplearn.classifiers.counting import CountingClassifier
|
from miplearn.classifiers.counting import CountingClassifier
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|||||||
@@ -10,7 +10,8 @@ from sklearn.dummy import DummyClassifier
|
|||||||
from sklearn.linear_model import LogisticRegression
|
from sklearn.linear_model import LogisticRegression
|
||||||
from sklearn.model_selection import cross_val_score
|
from sklearn.model_selection import cross_val_score
|
||||||
|
|
||||||
from miplearn.classifiers import Classifier, ScikitLearnClassifier
|
from miplearn.classifiers import Classifier
|
||||||
|
from miplearn.classifiers.sklearn import ScikitLearnClassifier
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|||||||
93
miplearn/classifiers/sklearn.py
Normal file
93
miplearn/classifiers/sklearn.py
Normal file
@@ -0,0 +1,93 @@
|
|||||||
|
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
|
||||||
|
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
|
||||||
|
# Released under the modified BSD license. See COPYING.md for more details.
|
||||||
|
|
||||||
|
from typing import Optional, Any, cast
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import sklearn
|
||||||
|
|
||||||
|
from miplearn.classifiers import Classifier, Regressor
|
||||||
|
|
||||||
|
|
||||||
|
class ScikitLearnClassifier(Classifier):
|
||||||
|
"""
|
||||||
|
Wrapper for ScikitLearn classifiers, which makes sure inputs and outputs have the
|
||||||
|
correct dimensions and types.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, clf: Any) -> None:
|
||||||
|
super().__init__()
|
||||||
|
self.inner_clf = clf
|
||||||
|
self.constant: Optional[np.ndarray] = None
|
||||||
|
|
||||||
|
def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
|
||||||
|
super().fit(x_train, y_train)
|
||||||
|
(n_samples, n_classes) = y_train.shape
|
||||||
|
assert n_classes == 2, (
|
||||||
|
f"Scikit-learn classifiers must have exactly two classes. "
|
||||||
|
f"{n_classes} classes were provided instead."
|
||||||
|
)
|
||||||
|
|
||||||
|
# When all samples belong to the same class, sklearn's predict_proba returns
|
||||||
|
# an array with a single column. The following check avoid this strange
|
||||||
|
# behavior.
|
||||||
|
mean = cast(np.ndarray, y_train.astype(float).mean(axis=0))
|
||||||
|
if mean.max() == 1.0:
|
||||||
|
self.constant = mean
|
||||||
|
return
|
||||||
|
|
||||||
|
self.inner_clf.fit(x_train, y_train[:, 1])
|
||||||
|
|
||||||
|
def predict_proba(self, x_test: np.ndarray) -> np.ndarray:
|
||||||
|
super().predict_proba(x_test)
|
||||||
|
n_samples = x_test.shape[0]
|
||||||
|
if self.constant is not None:
|
||||||
|
return np.array([self.constant for n in range(n_samples)])
|
||||||
|
sklearn_proba = self.inner_clf.predict_proba(x_test)
|
||||||
|
if isinstance(sklearn_proba, list):
|
||||||
|
assert len(sklearn_proba) == self.n_classes
|
||||||
|
for pb in sklearn_proba:
|
||||||
|
assert isinstance(pb, np.ndarray)
|
||||||
|
assert pb.dtype in [np.float16, np.float32, np.float64]
|
||||||
|
assert pb.shape == (n_samples, 2)
|
||||||
|
proba = np.hstack([pb[:, [1]] for pb in sklearn_proba])
|
||||||
|
assert proba.shape == (n_samples, self.n_classes)
|
||||||
|
return proba
|
||||||
|
else:
|
||||||
|
assert isinstance(sklearn_proba, np.ndarray)
|
||||||
|
assert sklearn_proba.shape == (n_samples, 2)
|
||||||
|
return sklearn_proba
|
||||||
|
|
||||||
|
def clone(self) -> "ScikitLearnClassifier":
|
||||||
|
return ScikitLearnClassifier(
|
||||||
|
clf=sklearn.base.clone(self.inner_clf),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ScikitLearnRegressor(Regressor):
|
||||||
|
"""
|
||||||
|
Wrapper for ScikitLearn regressors, which makes sure inputs and outputs have the
|
||||||
|
correct dimensions and types.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, reg: Any) -> None:
|
||||||
|
super().__init__()
|
||||||
|
self.inner_reg = reg
|
||||||
|
|
||||||
|
def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
|
||||||
|
super().fit(x_train, y_train)
|
||||||
|
self.inner_reg.fit(x_train, y_train)
|
||||||
|
|
||||||
|
def predict(self, x_test: np.ndarray) -> np.ndarray:
|
||||||
|
super().predict(x_test)
|
||||||
|
n_samples = x_test.shape[0]
|
||||||
|
sklearn_pred = self.inner_reg.predict(x_test)
|
||||||
|
assert isinstance(sklearn_pred, np.ndarray)
|
||||||
|
assert sklearn_pred.shape[0] == n_samples
|
||||||
|
return sklearn_pred
|
||||||
|
|
||||||
|
def clone(self) -> "ScikitLearnRegressor":
|
||||||
|
return ScikitLearnRegressor(
|
||||||
|
reg=sklearn.base.clone(self.inner_reg),
|
||||||
|
)
|
||||||
@@ -1,7 +1,6 @@
|
|||||||
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
|
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
|
||||||
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
|
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
|
||||||
# Released under the modified BSD license. See COPYING.md for more details.
|
# Released under the modified BSD license. See COPYING.md for more details.
|
||||||
from typing import cast
|
|
||||||
|
|
||||||
from numpy.linalg import norm
|
from numpy.linalg import norm
|
||||||
from sklearn.svm import SVC
|
from sklearn.svm import SVC
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ import numpy as np
|
|||||||
from numpy.linalg import norm
|
from numpy.linalg import norm
|
||||||
from sklearn.svm import SVC
|
from sklearn.svm import SVC
|
||||||
|
|
||||||
from miplearn.classifiers import ScikitLearnClassifier
|
from miplearn.classifiers.sklearn import ScikitLearnClassifier
|
||||||
from miplearn.classifiers.cv import CrossValidatedClassifier
|
from miplearn.classifiers.cv import CrossValidatedClassifier
|
||||||
from tests.classifiers import _build_circle_training_data
|
from tests.classifiers import _build_circle_training_data
|
||||||
|
|
||||||
|
|||||||
@@ -4,37 +4,30 @@
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from numpy.testing import assert_array_equal
|
from numpy.testing import assert_array_equal
|
||||||
|
from sklearn.linear_model import LinearRegression
|
||||||
from sklearn.neighbors import KNeighborsClassifier
|
from sklearn.neighbors import KNeighborsClassifier
|
||||||
|
|
||||||
from miplearn import ScikitLearnClassifier
|
from miplearn.classifiers.sklearn import ScikitLearnClassifier, ScikitLearnRegressor
|
||||||
|
|
||||||
|
|
||||||
def test_constant_prediction():
|
def test_constant_prediction():
|
||||||
x_train = np.array(
|
x_train = np.array([[0.0, 1.0], [1.0, 0.0]])
|
||||||
[
|
y_train = np.array([[True, False], [True, False]])
|
||||||
[0.0, 1.0],
|
clf = ScikitLearnClassifier(KNeighborsClassifier(n_neighbors=1))
|
||||||
[1.0, 0.0],
|
|
||||||
]
|
|
||||||
)
|
|
||||||
y_train = np.array(
|
|
||||||
[
|
|
||||||
[True, False],
|
|
||||||
[True, False],
|
|
||||||
]
|
|
||||||
)
|
|
||||||
clf = ScikitLearnClassifier(
|
|
||||||
KNeighborsClassifier(
|
|
||||||
n_neighbors=1,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
clf.fit(x_train, y_train)
|
clf.fit(x_train, y_train)
|
||||||
proba = clf.predict_proba(x_train)
|
proba = clf.predict_proba(x_train)
|
||||||
assert_array_equal(
|
assert_array_equal(
|
||||||
proba,
|
proba,
|
||||||
np.array(
|
np.array([[1.0, 0.0], [1.0, 0.0]]),
|
||||||
[
|
|
||||||
[1.0, 0.0],
|
|
||||||
[1.0, 0.0],
|
|
||||||
]
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_regressor():
|
||||||
|
x_train = np.array([[0.0, 1.0], [1.0, 4.0], [2.0, 2.0]])
|
||||||
|
y_train = np.array([[1.0], [5.0], [4.0]])
|
||||||
|
x_test = np.array([[4.0, 4.0], [0.0, 0.0]])
|
||||||
|
clf = ScikitLearnRegressor(LinearRegression())
|
||||||
|
clf.fit(x_train, y_train)
|
||||||
|
y_test_actual = clf.predict(x_test)
|
||||||
|
y_test_expected = np.array([[8.0], [0.0]])
|
||||||
|
assert_array_equal(np.round(y_test_actual, 2), y_test_expected)
|
||||||
|
|||||||
Reference in New Issue
Block a user