Start refactoring of classifiers

master
Alinson S. Xavier 5 years ago
parent b87ef651e1
commit 8dba65dd9c

@ -43,6 +43,6 @@ reformat:
test:
$(MYPY) -p miplearn
$(MYPY) -p tests
$(PYTEST) $(PYTEST_ARGS)
$(PYTEST) $(PYTEST_ARGS) tests/classifiers
.PHONY: test test-watch docs install

@ -152,25 +152,18 @@ dtype: float64
### Using customized ML classifiers and regressors
By default, given a training set of instantes, MIPLearn trains a fixed set of ML classifiers and regressors, then
selects the best one based on cross-validation performance. Alternatively, the user may specify which ML model a component
should use through the `classifier` or `regressor` contructor parameters. The provided classifiers and regressors must
follow the sklearn API. In particular, classifiers must provide the methods `fit`, `predict_proba` and `predict`,
while regressors must provide the methods `fit` and `predict`
By default, given a training set of instantes, MIPLearn trains a fixed set of ML classifiers and regressors, then selects the best one based on cross-validation performance. Alternatively, the user may specify which ML model a component should use through the `classifier` or `regressor` contructor parameters. Scikit-learn classifiers and regressors are currently supported. A future version of the package will add compatibility with Keras models.
!!! danger
MIPLearn must be able to generate a copy of any custom ML classifiers and regressors through
the standard `copy.deepcopy` method. This currently makes it incompatible with Keras and TensorFlow
predictors. This is a known limitation, which will be addressed in a future version.
The example below shows how to construct a `PrimalSolutionComponent` which internally uses
sklearn's `KNeighborsClassifiers`. Any other sklearn classifier or pipeline can be used.
The example below shows how to construct a `PrimalSolutionComponent` which internally uses scikit-learn's `KNeighborsClassifiers`. Any other scikit-learn classifier or pipeline can be used. The classifier needs to be provided as a lambda function because the component may need to create multiple copies of it. It needs to be wrapped in `ScikitLearnClassifier` to ensure that all the proper data transformations are applied.
```python
from miplearn import PrimalSolutionComponent
from miplearn import PrimalSolutionComponent, ScikitLearnClassifier
from sklearn.neighbors import KNeighborsClassifier
comp = PrimalSolutionComponent(classifier=KNeighborsClassifier(n_neighbors=5))
comp = PrimalSolutionComponent(
classifier=lambda: ScikitLearnClassifier(
KNeighborsClassifier(n_neighbors=5),
),
)
comp.fit(train_instances)
```

@ -3,7 +3,11 @@
# Released under the modified BSD license. See COPYING.md for more details.
from .benchmark import BenchmarkRunner
from .classifiers import Classifier, Regressor
from .classifiers import (
Classifier,
Regressor,
ScikitLearnClassifier,
)
from .classifiers.adaptive import AdaptiveClassifier
from .classifiers.threshold import MinPrecisionThreshold
from .components.component import Component

@ -3,7 +3,7 @@
# Released under the modified BSD license. See COPYING.md for more details.
from abc import ABC, abstractmethod
from typing import Optional
from typing import Optional, Any
import numpy as np
@ -14,7 +14,7 @@ class Classifier(ABC):
data.
"""
def __init__(self):
def __init__(self) -> None:
self.n_features: Optional[int] = None
self.n_classes: Optional[int] = None
@ -77,7 +77,7 @@ class Regressor(ABC):
values of other variables.
"""
def __init__(self):
def __init__(self) -> None:
self.n_inputs: Optional[int] = None
@abstractmethod
@ -128,3 +128,38 @@ class Regressor(ABC):
(n_samples, n_inputs_x) = x_test.shape
assert n_inputs_x == self.n_inputs
return np.ndarray([])
class ScikitLearnClassifier(Classifier):
"""
Wrapper for ScikitLearn classifiers, which makes sure inputs and outputs have the
correct dimensions and types.
"""
def __init__(self, clf: Any) -> None:
super().__init__()
self.inner_clf = clf
def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
super().fit(x_train, y_train)
(n_samples, n_classes) = x_train.shape
assert n_classes == 2, "scikit-learn classifiers must have exactly two classes"
self.inner_clf.fit(x_train, y_train[:, 1])
def predict_proba(self, x_test: np.ndarray) -> np.ndarray:
super().predict_proba(x_test)
n_samples = x_test.shape[0]
sklearn_proba = self.inner_clf.predict_proba(x_test)
if isinstance(sklearn_proba, list):
assert len(sklearn_proba) == self.n_classes
for pb in sklearn_proba:
assert isinstance(pb, np.ndarray)
assert pb.dtype in [np.float16, np.float32, np.float64]
assert pb.shape == (n_samples, 2)
proba = np.hstack([pb[:, [1]] for pb in sklearn_proba])
assert proba.shape == (n_samples, self.n_classes)
return proba
else:
assert isinstance(sklearn_proba, np.ndarray)
assert sklearn_proba.shape == (n_samples, 2)
return sklearn_proba

@ -29,9 +29,6 @@ class AdaptiveClassifier(Classifier):
candidates: Dict[str, Any] = None,
evaluator: ClassifierEvaluator = ClassifierEvaluator(),
) -> None:
"""
Initializes the meta-classifier.
"""
if candidates is None:
candidates = {
"knn(100)": {

@ -1,6 +1,7 @@
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
# Released under the modified BSD license. See COPYING.md for more details.
from typing import Optional, cast
import numpy as np
@ -9,20 +10,33 @@ from miplearn.classifiers import Classifier
class CountingClassifier(Classifier):
"""
A classifier that generates constant predictions, based only on the
frequency of the training labels. For example, if y_train is [1.0, 0.0, 0.0]
this classifier always returns [0.66 0.33] for any x_test. It essentially
counts how many times each label appeared, hence the name.
A classifier that generates constant predictions, based only on the frequency of
the training labels. For example, suppose `y_train` is given by:
```python
y_train = np.array([
[True, False],
[False, True],
[False, True],
])
```
Then `predict_proba` always returns `[0.33 0.66]` for every sample, regardless of
`x_train`. It essentially counts how many times each label appeared, hence the name.
"""
def __init__(self) -> None:
self.mean = None
super().__init__()
self.mean: Optional[np.ndarray] = None
def fit(self, x_train, y_train):
self.mean = np.mean(y_train)
def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
super().fit(x_train, y_train)
self.mean = cast(np.ndarray, np.mean(y_train, axis=0))
def predict_proba(self, x_test):
return np.array([[1 - self.mean, self.mean] for _ in range(x_test.shape[0])])
def predict_proba(self, x_test: np.ndarray) -> np.ndarray:
super().predict_proba(x_test)
n_samples = x_test.shape[0]
return np.array([self.mean for _ in range(n_samples)])
def __repr__(self):
return "CountingClassifier(mean=%s)" % self.mean

@ -3,14 +3,14 @@
# Released under the modified BSD license. See COPYING.md for more details.
import logging
from copy import deepcopy
from typing import Optional, Callable, List
import numpy as np
from sklearn.dummy import DummyClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from miplearn.classifiers import Classifier
from miplearn.classifiers import Classifier, ScikitLearnClassifier
logger = logging.getLogger(__name__)
@ -18,35 +18,58 @@ logger = logging.getLogger(__name__)
class CrossValidatedClassifier(Classifier):
"""
A meta-classifier that, upon training, evaluates the performance of another
classifier on the training data set using k-fold cross validation, then
either adopts the other classifier it if the cv-score is high enough, or
returns a constant label for every x_test otherwise.
candidate classifier on the training data set, using k-fold cross validation,
then either adopts it, if its cv-score is high enough, or returns constant
predictions for every x_test, otherwise.
The threshold is specified in comparison to a dummy classifier trained
on the same dataset. For example, a threshold of 0.0 indicates that any
classifier as good as the dummy predictor is acceptable. A threshold of 1.0
indicates that only classifier with a perfect cross-validation score are
acceptable. Other numbers are a linear interpolation of these two extremes.
Parameters
----------
classifier: Callable[[], ScikitLearnClassifier]
A callable that constructs the candidate classifier.
threshold: float
Number from zero to one indicating how well must the candidate classifier
perform to be adopted. The threshold is specified in comparison to a dummy
classifier trained on the same dataset. For example, a threshold of 0.0
indicates that any classifier as good as the dummy predictor is acceptable. A
threshold of 1.0 indicates that only classifiers with perfect
cross-validation scores are acceptable. Other numbers are a linear
interpolation of these two extremes.
constant: Optional[List[bool]]
If the candidate classifier fails to meet the threshold, use a dummy classifier
which always returns this prediction instead. The list should have exactly as
many elements as the number of columns of `x_train` provided to `fit`.
cv: int
Number of folds.
scoring: str
Scoring function.
"""
def __init__(
self,
classifier=LogisticRegression(),
threshold=0.75,
constant=0.0,
cv=5,
scoring="accuracy",
classifier: Callable[[], ScikitLearnClassifier] = (
lambda: ScikitLearnClassifier(LogisticRegression())
),
threshold: float = 0.75,
constant: Optional[List[bool]] = None,
cv: int = 5,
scoring: str = "accuracy",
):
""""""
super().__init__()
self.classifier = None
self.classifier_prototype = classifier
self.constant = constant
if constant is None:
constant = [True, False]
self.n_classes = len(constant)
self.classifier: Optional[ScikitLearnClassifier] = None
self.classifier_factory = classifier
self.constant: List[bool] = constant
self.threshold = threshold
self.cv = cv
self.scoring = scoring
def fit(self, x_train, y_train):
# super().fit(x_train, y_train)
def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
super().fit(x_train, y_train)
(n_samples, n_classes) = x_train.shape
assert n_classes == self.n_classes
# Calculate dummy score and absolute score threshold
y_train_avg = np.average(y_train)
@ -54,13 +77,20 @@ class CrossValidatedClassifier(Classifier):
absolute_threshold = 1.0 * self.threshold + dummy_score * (1 - self.threshold)
# Calculate cross validation score and decide which classifier to use
clf = deepcopy(self.classifier_prototype)
clf = self.classifier_factory()
assert clf is not None
assert isinstance(clf, ScikitLearnClassifier), (
f"The provided classifier callable must return a ScikitLearnClassifier. "
f"Found {clf.__class__.__name__} instead. If this is a scikit-learn "
f"classifier, you must wrap it with ScikitLearnClassifier."
)
cv_score = float(
np.mean(
cross_val_score(
clf,
clf.inner_clf,
x_train,
y_train,
y_train[:, 1],
cv=self.cv,
scoring=self.scoring,
)
@ -77,14 +107,19 @@ class CrossValidatedClassifier(Classifier):
"cv_score is below threshold (%.2f < %.2f); discarding"
% (cv_score, absolute_threshold)
)
self.classifier = DummyClassifier(
self.classifier = ScikitLearnClassifier(
DummyClassifier(
strategy="constant",
constant=self.constant,
constant=self.constant[1],
)
)
# Train chosen classifier
assert self.classifier is not None
assert isinstance(self.classifier, ScikitLearnClassifier)
self.classifier.fit(x_train, y_train)
def predict_proba(self, x_test):
# super().predict_proba(x_test)
def predict_proba(self, x_test: np.ndarray) -> np.ndarray:
super().predict_proba(x_test)
assert self.classifier is not None
return self.classifier.predict_proba(x_test)

@ -12,7 +12,27 @@ E = 0.1
def test_counting():
clf = CountingClassifier()
clf.fit(np.zeros((8, 25)), [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0])
expected_proba = np.array([[0.375, 0.625], [0.375, 0.625]])
actual_proba = clf.predict_proba(np.zeros((2, 25)))
assert norm(actual_proba - expected_proba) < E
n_features = 25
x_train = np.zeros((8, n_features))
y_train = np.array(
[
[True, False, False],
[True, False, False],
[False, True, False],
[True, False, False],
[False, True, False],
[False, True, False],
[False, True, False],
[False, False, True],
]
)
x_test = np.zeros((2, n_features))
y_expected = np.array(
[
[3 / 8.0, 4 / 8.0, 1 / 8.0],
[3 / 8.0, 4 / 8.0, 1 / 8.0],
]
)
clf.fit(x_train, y_train)
y_actual = clf.predict_proba(x_test)
assert norm(y_actual - y_expected) < E

@ -7,20 +7,37 @@ from numpy.linalg import norm
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from miplearn.classifiers import ScikitLearnClassifier
from miplearn.classifiers.cv import CrossValidatedClassifier
E = 0.1
def test_cv():
def test_cv() -> None:
# Training set: label is true if point is inside a 2D circle
x_train = np.array([[x1, x2] for x1 in range(-10, 11) for x2 in range(-10, 11)])
x_train = np.array(
[
[
x1,
x2,
]
for x1 in range(-10, 11)
for x2 in range(-10, 11)
]
)
x_train = StandardScaler().fit_transform(x_train)
n_samples = x_train.shape[0]
y_train = np.array(
[
1.0 if x1 * x1 + x2 * x2 <= 100 else 0.0
[
False,
True,
]
if x1 * x1 + x2 * x2 <= 100
else [
True,
False,
]
for x1 in range(-10, 11)
for x2 in range(-10, 11)
]
@ -29,24 +46,39 @@ def test_cv():
# Support vector machines with linear kernels do not perform well on this
# data set, so predictor should return the given constant.
clf = CrossValidatedClassifier(
classifier=SVC(probability=True, random_state=42),
classifier=lambda: ScikitLearnClassifier(
SVC(
probability=True,
random_state=42,
)
),
threshold=0.90,
constant=0.0,
constant=[True, False],
cv=30,
)
clf.fit(x_train, y_train)
proba = clf.predict_proba(x_train)
assert isinstance(proba, np.ndarray)
assert proba.shape == (n_samples, 2)
y_pred = (proba[:, 1] > 0.5).astype(float)
assert norm(np.zeros(n_samples) - y_pred) < E
# Support vector machines with quadratic kernels perform almost perfectly
# on this data set, so predictor should return their prediction.
clf = CrossValidatedClassifier(
classifier=SVC(probability=True, kernel="poly", degree=2, random_state=42),
classifier=lambda: ScikitLearnClassifier(
SVC(
probability=True,
kernel="poly",
degree=2,
random_state=42,
)
),
threshold=0.90,
cv=30,
)
clf.fit(x_train, y_train)
proba = clf.predict_proba(x_train)
y_pred = (proba[:, 1] > 0.5).astype(float)
assert norm(y_train - y_pred) < E
assert norm(y_train[:, 1] - y_pred) < E

Loading…
Cancel
Save