Start refactoring of classifiers

master
Alinson S. Xavier 5 years ago
parent b87ef651e1
commit 8dba65dd9c

@ -43,6 +43,6 @@ reformat:
test: test:
$(MYPY) -p miplearn $(MYPY) -p miplearn
$(MYPY) -p tests $(MYPY) -p tests
$(PYTEST) $(PYTEST_ARGS) $(PYTEST) $(PYTEST_ARGS) tests/classifiers
.PHONY: test test-watch docs install .PHONY: test test-watch docs install

@ -152,25 +152,18 @@ dtype: float64
### Using customized ML classifiers and regressors ### Using customized ML classifiers and regressors
By default, given a training set of instantes, MIPLearn trains a fixed set of ML classifiers and regressors, then By default, given a training set of instantes, MIPLearn trains a fixed set of ML classifiers and regressors, then selects the best one based on cross-validation performance. Alternatively, the user may specify which ML model a component should use through the `classifier` or `regressor` contructor parameters. Scikit-learn classifiers and regressors are currently supported. A future version of the package will add compatibility with Keras models.
selects the best one based on cross-validation performance. Alternatively, the user may specify which ML model a component
should use through the `classifier` or `regressor` contructor parameters. The provided classifiers and regressors must The example below shows how to construct a `PrimalSolutionComponent` which internally uses scikit-learn's `KNeighborsClassifiers`. Any other scikit-learn classifier or pipeline can be used. The classifier needs to be provided as a lambda function because the component may need to create multiple copies of it. It needs to be wrapped in `ScikitLearnClassifier` to ensure that all the proper data transformations are applied.
follow the sklearn API. In particular, classifiers must provide the methods `fit`, `predict_proba` and `predict`,
while regressors must provide the methods `fit` and `predict`
!!! danger
MIPLearn must be able to generate a copy of any custom ML classifiers and regressors through
the standard `copy.deepcopy` method. This currently makes it incompatible with Keras and TensorFlow
predictors. This is a known limitation, which will be addressed in a future version.
The example below shows how to construct a `PrimalSolutionComponent` which internally uses
sklearn's `KNeighborsClassifiers`. Any other sklearn classifier or pipeline can be used.
```python ```python
from miplearn import PrimalSolutionComponent from miplearn import PrimalSolutionComponent, ScikitLearnClassifier
from sklearn.neighbors import KNeighborsClassifier from sklearn.neighbors import KNeighborsClassifier
comp = PrimalSolutionComponent(classifier=KNeighborsClassifier(n_neighbors=5)) comp = PrimalSolutionComponent(
classifier=lambda: ScikitLearnClassifier(
KNeighborsClassifier(n_neighbors=5),
),
)
comp.fit(train_instances) comp.fit(train_instances)
``` ```

@ -3,7 +3,11 @@
# Released under the modified BSD license. See COPYING.md for more details. # Released under the modified BSD license. See COPYING.md for more details.
from .benchmark import BenchmarkRunner from .benchmark import BenchmarkRunner
from .classifiers import Classifier, Regressor from .classifiers import (
Classifier,
Regressor,
ScikitLearnClassifier,
)
from .classifiers.adaptive import AdaptiveClassifier from .classifiers.adaptive import AdaptiveClassifier
from .classifiers.threshold import MinPrecisionThreshold from .classifiers.threshold import MinPrecisionThreshold
from .components.component import Component from .components.component import Component

@ -3,7 +3,7 @@
# Released under the modified BSD license. See COPYING.md for more details. # Released under the modified BSD license. See COPYING.md for more details.
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import Optional from typing import Optional, Any
import numpy as np import numpy as np
@ -14,7 +14,7 @@ class Classifier(ABC):
data. data.
""" """
def __init__(self): def __init__(self) -> None:
self.n_features: Optional[int] = None self.n_features: Optional[int] = None
self.n_classes: Optional[int] = None self.n_classes: Optional[int] = None
@ -77,7 +77,7 @@ class Regressor(ABC):
values of other variables. values of other variables.
""" """
def __init__(self): def __init__(self) -> None:
self.n_inputs: Optional[int] = None self.n_inputs: Optional[int] = None
@abstractmethod @abstractmethod
@ -128,3 +128,38 @@ class Regressor(ABC):
(n_samples, n_inputs_x) = x_test.shape (n_samples, n_inputs_x) = x_test.shape
assert n_inputs_x == self.n_inputs assert n_inputs_x == self.n_inputs
return np.ndarray([]) return np.ndarray([])
class ScikitLearnClassifier(Classifier):
"""
Wrapper for ScikitLearn classifiers, which makes sure inputs and outputs have the
correct dimensions and types.
"""
def __init__(self, clf: Any) -> None:
super().__init__()
self.inner_clf = clf
def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
super().fit(x_train, y_train)
(n_samples, n_classes) = x_train.shape
assert n_classes == 2, "scikit-learn classifiers must have exactly two classes"
self.inner_clf.fit(x_train, y_train[:, 1])
def predict_proba(self, x_test: np.ndarray) -> np.ndarray:
super().predict_proba(x_test)
n_samples = x_test.shape[0]
sklearn_proba = self.inner_clf.predict_proba(x_test)
if isinstance(sklearn_proba, list):
assert len(sklearn_proba) == self.n_classes
for pb in sklearn_proba:
assert isinstance(pb, np.ndarray)
assert pb.dtype in [np.float16, np.float32, np.float64]
assert pb.shape == (n_samples, 2)
proba = np.hstack([pb[:, [1]] for pb in sklearn_proba])
assert proba.shape == (n_samples, self.n_classes)
return proba
else:
assert isinstance(sklearn_proba, np.ndarray)
assert sklearn_proba.shape == (n_samples, 2)
return sklearn_proba

@ -29,9 +29,6 @@ class AdaptiveClassifier(Classifier):
candidates: Dict[str, Any] = None, candidates: Dict[str, Any] = None,
evaluator: ClassifierEvaluator = ClassifierEvaluator(), evaluator: ClassifierEvaluator = ClassifierEvaluator(),
) -> None: ) -> None:
"""
Initializes the meta-classifier.
"""
if candidates is None: if candidates is None:
candidates = { candidates = {
"knn(100)": { "knn(100)": {

@ -1,6 +1,7 @@
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization # MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved. # Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
# Released under the modified BSD license. See COPYING.md for more details. # Released under the modified BSD license. See COPYING.md for more details.
from typing import Optional, cast
import numpy as np import numpy as np
@ -9,20 +10,33 @@ from miplearn.classifiers import Classifier
class CountingClassifier(Classifier): class CountingClassifier(Classifier):
""" """
A classifier that generates constant predictions, based only on the
frequency of the training labels. For example, if y_train is [1.0, 0.0, 0.0] A classifier that generates constant predictions, based only on the frequency of
this classifier always returns [0.66 0.33] for any x_test. It essentially the training labels. For example, suppose `y_train` is given by:
counts how many times each label appeared, hence the name. ```python
y_train = np.array([
[True, False],
[False, True],
[False, True],
])
```
Then `predict_proba` always returns `[0.33 0.66]` for every sample, regardless of
`x_train`. It essentially counts how many times each label appeared, hence the name.
""" """
def __init__(self) -> None: def __init__(self) -> None:
self.mean = None super().__init__()
self.mean: Optional[np.ndarray] = None
def fit(self, x_train, y_train): def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
self.mean = np.mean(y_train) super().fit(x_train, y_train)
self.mean = cast(np.ndarray, np.mean(y_train, axis=0))
def predict_proba(self, x_test): def predict_proba(self, x_test: np.ndarray) -> np.ndarray:
return np.array([[1 - self.mean, self.mean] for _ in range(x_test.shape[0])]) super().predict_proba(x_test)
n_samples = x_test.shape[0]
return np.array([self.mean for _ in range(n_samples)])
def __repr__(self): def __repr__(self):
return "CountingClassifier(mean=%s)" % self.mean return "CountingClassifier(mean=%s)" % self.mean

@ -3,14 +3,14 @@
# Released under the modified BSD license. See COPYING.md for more details. # Released under the modified BSD license. See COPYING.md for more details.
import logging import logging
from copy import deepcopy from typing import Optional, Callable, List
import numpy as np import numpy as np
from sklearn.dummy import DummyClassifier from sklearn.dummy import DummyClassifier
from sklearn.linear_model import LogisticRegression from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score from sklearn.model_selection import cross_val_score
from miplearn.classifiers import Classifier from miplearn.classifiers import Classifier, ScikitLearnClassifier
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -18,35 +18,58 @@ logger = logging.getLogger(__name__)
class CrossValidatedClassifier(Classifier): class CrossValidatedClassifier(Classifier):
""" """
A meta-classifier that, upon training, evaluates the performance of another A meta-classifier that, upon training, evaluates the performance of another
classifier on the training data set using k-fold cross validation, then candidate classifier on the training data set, using k-fold cross validation,
either adopts the other classifier it if the cv-score is high enough, or then either adopts it, if its cv-score is high enough, or returns constant
returns a constant label for every x_test otherwise. predictions for every x_test, otherwise.
The threshold is specified in comparison to a dummy classifier trained Parameters
on the same dataset. For example, a threshold of 0.0 indicates that any ----------
classifier as good as the dummy predictor is acceptable. A threshold of 1.0 classifier: Callable[[], ScikitLearnClassifier]
indicates that only classifier with a perfect cross-validation score are A callable that constructs the candidate classifier.
acceptable. Other numbers are a linear interpolation of these two extremes. threshold: float
Number from zero to one indicating how well must the candidate classifier
perform to be adopted. The threshold is specified in comparison to a dummy
classifier trained on the same dataset. For example, a threshold of 0.0
indicates that any classifier as good as the dummy predictor is acceptable. A
threshold of 1.0 indicates that only classifiers with perfect
cross-validation scores are acceptable. Other numbers are a linear
interpolation of these two extremes.
constant: Optional[List[bool]]
If the candidate classifier fails to meet the threshold, use a dummy classifier
which always returns this prediction instead. The list should have exactly as
many elements as the number of columns of `x_train` provided to `fit`.
cv: int
Number of folds.
scoring: str
Scoring function.
""" """
def __init__( def __init__(
self, self,
classifier=LogisticRegression(), classifier: Callable[[], ScikitLearnClassifier] = (
threshold=0.75, lambda: ScikitLearnClassifier(LogisticRegression())
constant=0.0, ),
cv=5, threshold: float = 0.75,
scoring="accuracy", constant: Optional[List[bool]] = None,
cv: int = 5,
scoring: str = "accuracy",
): ):
""""""
super().__init__() super().__init__()
self.classifier = None if constant is None:
self.classifier_prototype = classifier constant = [True, False]
self.constant = constant self.n_classes = len(constant)
self.classifier: Optional[ScikitLearnClassifier] = None
self.classifier_factory = classifier
self.constant: List[bool] = constant
self.threshold = threshold self.threshold = threshold
self.cv = cv self.cv = cv
self.scoring = scoring self.scoring = scoring
def fit(self, x_train, y_train): def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
# super().fit(x_train, y_train) super().fit(x_train, y_train)
(n_samples, n_classes) = x_train.shape
assert n_classes == self.n_classes
# Calculate dummy score and absolute score threshold # Calculate dummy score and absolute score threshold
y_train_avg = np.average(y_train) y_train_avg = np.average(y_train)
@ -54,13 +77,20 @@ class CrossValidatedClassifier(Classifier):
absolute_threshold = 1.0 * self.threshold + dummy_score * (1 - self.threshold) absolute_threshold = 1.0 * self.threshold + dummy_score * (1 - self.threshold)
# Calculate cross validation score and decide which classifier to use # Calculate cross validation score and decide which classifier to use
clf = deepcopy(self.classifier_prototype) clf = self.classifier_factory()
assert clf is not None
assert isinstance(clf, ScikitLearnClassifier), (
f"The provided classifier callable must return a ScikitLearnClassifier. "
f"Found {clf.__class__.__name__} instead. If this is a scikit-learn "
f"classifier, you must wrap it with ScikitLearnClassifier."
)
cv_score = float( cv_score = float(
np.mean( np.mean(
cross_val_score( cross_val_score(
clf, clf.inner_clf,
x_train, x_train,
y_train, y_train[:, 1],
cv=self.cv, cv=self.cv,
scoring=self.scoring, scoring=self.scoring,
) )
@ -77,14 +107,19 @@ class CrossValidatedClassifier(Classifier):
"cv_score is below threshold (%.2f < %.2f); discarding" "cv_score is below threshold (%.2f < %.2f); discarding"
% (cv_score, absolute_threshold) % (cv_score, absolute_threshold)
) )
self.classifier = DummyClassifier( self.classifier = ScikitLearnClassifier(
strategy="constant", DummyClassifier(
constant=self.constant, strategy="constant",
constant=self.constant[1],
)
) )
# Train chosen classifier # Train chosen classifier
assert self.classifier is not None
assert isinstance(self.classifier, ScikitLearnClassifier)
self.classifier.fit(x_train, y_train) self.classifier.fit(x_train, y_train)
def predict_proba(self, x_test): def predict_proba(self, x_test: np.ndarray) -> np.ndarray:
# super().predict_proba(x_test) super().predict_proba(x_test)
assert self.classifier is not None
return self.classifier.predict_proba(x_test) return self.classifier.predict_proba(x_test)

@ -12,7 +12,27 @@ E = 0.1
def test_counting(): def test_counting():
clf = CountingClassifier() clf = CountingClassifier()
clf.fit(np.zeros((8, 25)), [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0]) n_features = 25
expected_proba = np.array([[0.375, 0.625], [0.375, 0.625]]) x_train = np.zeros((8, n_features))
actual_proba = clf.predict_proba(np.zeros((2, 25))) y_train = np.array(
assert norm(actual_proba - expected_proba) < E [
[True, False, False],
[True, False, False],
[False, True, False],
[True, False, False],
[False, True, False],
[False, True, False],
[False, True, False],
[False, False, True],
]
)
x_test = np.zeros((2, n_features))
y_expected = np.array(
[
[3 / 8.0, 4 / 8.0, 1 / 8.0],
[3 / 8.0, 4 / 8.0, 1 / 8.0],
]
)
clf.fit(x_train, y_train)
y_actual = clf.predict_proba(x_test)
assert norm(y_actual - y_expected) < E

@ -7,20 +7,37 @@ from numpy.linalg import norm
from sklearn.preprocessing import StandardScaler from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC from sklearn.svm import SVC
from miplearn.classifiers import ScikitLearnClassifier
from miplearn.classifiers.cv import CrossValidatedClassifier from miplearn.classifiers.cv import CrossValidatedClassifier
E = 0.1 E = 0.1
def test_cv(): def test_cv() -> None:
# Training set: label is true if point is inside a 2D circle # Training set: label is true if point is inside a 2D circle
x_train = np.array([[x1, x2] for x1 in range(-10, 11) for x2 in range(-10, 11)]) x_train = np.array(
[
[
x1,
x2,
]
for x1 in range(-10, 11)
for x2 in range(-10, 11)
]
)
x_train = StandardScaler().fit_transform(x_train) x_train = StandardScaler().fit_transform(x_train)
n_samples = x_train.shape[0] n_samples = x_train.shape[0]
y_train = np.array( y_train = np.array(
[ [
1.0 if x1 * x1 + x2 * x2 <= 100 else 0.0 [
False,
True,
]
if x1 * x1 + x2 * x2 <= 100
else [
True,
False,
]
for x1 in range(-10, 11) for x1 in range(-10, 11)
for x2 in range(-10, 11) for x2 in range(-10, 11)
] ]
@ -29,24 +46,39 @@ def test_cv():
# Support vector machines with linear kernels do not perform well on this # Support vector machines with linear kernels do not perform well on this
# data set, so predictor should return the given constant. # data set, so predictor should return the given constant.
clf = CrossValidatedClassifier( clf = CrossValidatedClassifier(
classifier=SVC(probability=True, random_state=42), classifier=lambda: ScikitLearnClassifier(
SVC(
probability=True,
random_state=42,
)
),
threshold=0.90, threshold=0.90,
constant=0.0, constant=[True, False],
cv=30, cv=30,
) )
clf.fit(x_train, y_train) clf.fit(x_train, y_train)
proba = clf.predict_proba(x_train) proba = clf.predict_proba(x_train)
assert isinstance(proba, np.ndarray)
assert proba.shape == (n_samples, 2)
y_pred = (proba[:, 1] > 0.5).astype(float) y_pred = (proba[:, 1] > 0.5).astype(float)
assert norm(np.zeros(n_samples) - y_pred) < E assert norm(np.zeros(n_samples) - y_pred) < E
# Support vector machines with quadratic kernels perform almost perfectly # Support vector machines with quadratic kernels perform almost perfectly
# on this data set, so predictor should return their prediction. # on this data set, so predictor should return their prediction.
clf = CrossValidatedClassifier( clf = CrossValidatedClassifier(
classifier=SVC(probability=True, kernel="poly", degree=2, random_state=42), classifier=lambda: ScikitLearnClassifier(
SVC(
probability=True,
kernel="poly",
degree=2,
random_state=42,
)
),
threshold=0.90, threshold=0.90,
cv=30, cv=30,
) )
clf.fit(x_train, y_train) clf.fit(x_train, y_train)
proba = clf.predict_proba(x_train) proba = clf.predict_proba(x_train)
y_pred = (proba[:, 1] > 0.5).astype(float) y_pred = (proba[:, 1] > 0.5).astype(float)
assert norm(y_train - y_pred) < E assert norm(y_train[:, 1] - y_pred) < E

Loading…
Cancel
Save