diff --git a/Makefile b/Makefile index 2ed3f1c..3d80474 100644 --- a/Makefile +++ b/Makefile @@ -43,6 +43,6 @@ reformat: test: $(MYPY) -p miplearn $(MYPY) -p tests - $(PYTEST) $(PYTEST_ARGS) + $(PYTEST) $(PYTEST_ARGS) tests/classifiers .PHONY: test test-watch docs install diff --git a/docs/customization.md b/docs/customization.md index dd660ca..d9bbedd 100644 --- a/docs/customization.md +++ b/docs/customization.md @@ -152,25 +152,18 @@ dtype: float64 ### Using customized ML classifiers and regressors -By default, given a training set of instantes, MIPLearn trains a fixed set of ML classifiers and regressors, then -selects the best one based on cross-validation performance. Alternatively, the user may specify which ML model a component -should use through the `classifier` or `regressor` contructor parameters. The provided classifiers and regressors must -follow the sklearn API. In particular, classifiers must provide the methods `fit`, `predict_proba` and `predict`, -while regressors must provide the methods `fit` and `predict` - -!!! danger - MIPLearn must be able to generate a copy of any custom ML classifiers and regressors through - the standard `copy.deepcopy` method. This currently makes it incompatible with Keras and TensorFlow - predictors. This is a known limitation, which will be addressed in a future version. - -The example below shows how to construct a `PrimalSolutionComponent` which internally uses -sklearn's `KNeighborsClassifiers`. Any other sklearn classifier or pipeline can be used. +By default, given a training set of instantes, MIPLearn trains a fixed set of ML classifiers and regressors, then selects the best one based on cross-validation performance. Alternatively, the user may specify which ML model a component should use through the `classifier` or `regressor` contructor parameters. Scikit-learn classifiers and regressors are currently supported. A future version of the package will add compatibility with Keras models. + +The example below shows how to construct a `PrimalSolutionComponent` which internally uses scikit-learn's `KNeighborsClassifiers`. Any other scikit-learn classifier or pipeline can be used. The classifier needs to be provided as a lambda function because the component may need to create multiple copies of it. It needs to be wrapped in `ScikitLearnClassifier` to ensure that all the proper data transformations are applied. ```python -from miplearn import PrimalSolutionComponent +from miplearn import PrimalSolutionComponent, ScikitLearnClassifier from sklearn.neighbors import KNeighborsClassifier -comp = PrimalSolutionComponent(classifier=KNeighborsClassifier(n_neighbors=5)) +comp = PrimalSolutionComponent( + classifier=lambda: ScikitLearnClassifier( + KNeighborsClassifier(n_neighbors=5), + ), +) comp.fit(train_instances) ``` - \ No newline at end of file diff --git a/miplearn/__init__.py b/miplearn/__init__.py index 28003e7..3d8f052 100644 --- a/miplearn/__init__.py +++ b/miplearn/__init__.py @@ -3,7 +3,11 @@ # Released under the modified BSD license. See COPYING.md for more details. from .benchmark import BenchmarkRunner -from .classifiers import Classifier, Regressor +from .classifiers import ( + Classifier, + Regressor, + ScikitLearnClassifier, +) from .classifiers.adaptive import AdaptiveClassifier from .classifiers.threshold import MinPrecisionThreshold from .components.component import Component diff --git a/miplearn/classifiers/__init__.py b/miplearn/classifiers/__init__.py index f46c41d..265f644 100644 --- a/miplearn/classifiers/__init__.py +++ b/miplearn/classifiers/__init__.py @@ -3,7 +3,7 @@ # Released under the modified BSD license. See COPYING.md for more details. from abc import ABC, abstractmethod -from typing import Optional +from typing import Optional, Any import numpy as np @@ -14,7 +14,7 @@ class Classifier(ABC): data. """ - def __init__(self): + def __init__(self) -> None: self.n_features: Optional[int] = None self.n_classes: Optional[int] = None @@ -77,7 +77,7 @@ class Regressor(ABC): values of other variables. """ - def __init__(self): + def __init__(self) -> None: self.n_inputs: Optional[int] = None @abstractmethod @@ -128,3 +128,38 @@ class Regressor(ABC): (n_samples, n_inputs_x) = x_test.shape assert n_inputs_x == self.n_inputs return np.ndarray([]) + + +class ScikitLearnClassifier(Classifier): + """ + Wrapper for ScikitLearn classifiers, which makes sure inputs and outputs have the + correct dimensions and types. + """ + + def __init__(self, clf: Any) -> None: + super().__init__() + self.inner_clf = clf + + def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None: + super().fit(x_train, y_train) + (n_samples, n_classes) = x_train.shape + assert n_classes == 2, "scikit-learn classifiers must have exactly two classes" + self.inner_clf.fit(x_train, y_train[:, 1]) + + def predict_proba(self, x_test: np.ndarray) -> np.ndarray: + super().predict_proba(x_test) + n_samples = x_test.shape[0] + sklearn_proba = self.inner_clf.predict_proba(x_test) + if isinstance(sklearn_proba, list): + assert len(sklearn_proba) == self.n_classes + for pb in sklearn_proba: + assert isinstance(pb, np.ndarray) + assert pb.dtype in [np.float16, np.float32, np.float64] + assert pb.shape == (n_samples, 2) + proba = np.hstack([pb[:, [1]] for pb in sklearn_proba]) + assert proba.shape == (n_samples, self.n_classes) + return proba + else: + assert isinstance(sklearn_proba, np.ndarray) + assert sklearn_proba.shape == (n_samples, 2) + return sklearn_proba diff --git a/miplearn/classifiers/adaptive.py b/miplearn/classifiers/adaptive.py index 60b006e..de3cb0b 100644 --- a/miplearn/classifiers/adaptive.py +++ b/miplearn/classifiers/adaptive.py @@ -29,9 +29,6 @@ class AdaptiveClassifier(Classifier): candidates: Dict[str, Any] = None, evaluator: ClassifierEvaluator = ClassifierEvaluator(), ) -> None: - """ - Initializes the meta-classifier. - """ if candidates is None: candidates = { "knn(100)": { diff --git a/miplearn/classifiers/counting.py b/miplearn/classifiers/counting.py index 0c8520a..226013b 100644 --- a/miplearn/classifiers/counting.py +++ b/miplearn/classifiers/counting.py @@ -1,6 +1,7 @@ # MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization # Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved. # Released under the modified BSD license. See COPYING.md for more details. +from typing import Optional, cast import numpy as np @@ -9,20 +10,33 @@ from miplearn.classifiers import Classifier class CountingClassifier(Classifier): """ - A classifier that generates constant predictions, based only on the - frequency of the training labels. For example, if y_train is [1.0, 0.0, 0.0] - this classifier always returns [0.66 0.33] for any x_test. It essentially - counts how many times each label appeared, hence the name. + + A classifier that generates constant predictions, based only on the frequency of + the training labels. For example, suppose `y_train` is given by: + ```python + y_train = np.array([ + [True, False], + [False, True], + [False, True], + ]) + ``` + Then `predict_proba` always returns `[0.33 0.66]` for every sample, regardless of + `x_train`. It essentially counts how many times each label appeared, hence the name. + """ def __init__(self) -> None: - self.mean = None + super().__init__() + self.mean: Optional[np.ndarray] = None - def fit(self, x_train, y_train): - self.mean = np.mean(y_train) + def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None: + super().fit(x_train, y_train) + self.mean = cast(np.ndarray, np.mean(y_train, axis=0)) - def predict_proba(self, x_test): - return np.array([[1 - self.mean, self.mean] for _ in range(x_test.shape[0])]) + def predict_proba(self, x_test: np.ndarray) -> np.ndarray: + super().predict_proba(x_test) + n_samples = x_test.shape[0] + return np.array([self.mean for _ in range(n_samples)]) def __repr__(self): return "CountingClassifier(mean=%s)" % self.mean diff --git a/miplearn/classifiers/cv.py b/miplearn/classifiers/cv.py index e8c8b42..2743457 100644 --- a/miplearn/classifiers/cv.py +++ b/miplearn/classifiers/cv.py @@ -3,14 +3,14 @@ # Released under the modified BSD license. See COPYING.md for more details. import logging -from copy import deepcopy +from typing import Optional, Callable, List import numpy as np from sklearn.dummy import DummyClassifier from sklearn.linear_model import LogisticRegression from sklearn.model_selection import cross_val_score -from miplearn.classifiers import Classifier +from miplearn.classifiers import Classifier, ScikitLearnClassifier logger = logging.getLogger(__name__) @@ -18,35 +18,58 @@ logger = logging.getLogger(__name__) class CrossValidatedClassifier(Classifier): """ A meta-classifier that, upon training, evaluates the performance of another - classifier on the training data set using k-fold cross validation, then - either adopts the other classifier it if the cv-score is high enough, or - returns a constant label for every x_test otherwise. + candidate classifier on the training data set, using k-fold cross validation, + then either adopts it, if its cv-score is high enough, or returns constant + predictions for every x_test, otherwise. - The threshold is specified in comparison to a dummy classifier trained - on the same dataset. For example, a threshold of 0.0 indicates that any - classifier as good as the dummy predictor is acceptable. A threshold of 1.0 - indicates that only classifier with a perfect cross-validation score are - acceptable. Other numbers are a linear interpolation of these two extremes. + Parameters + ---------- + classifier: Callable[[], ScikitLearnClassifier] + A callable that constructs the candidate classifier. + threshold: float + Number from zero to one indicating how well must the candidate classifier + perform to be adopted. The threshold is specified in comparison to a dummy + classifier trained on the same dataset. For example, a threshold of 0.0 + indicates that any classifier as good as the dummy predictor is acceptable. A + threshold of 1.0 indicates that only classifiers with perfect + cross-validation scores are acceptable. Other numbers are a linear + interpolation of these two extremes. + constant: Optional[List[bool]] + If the candidate classifier fails to meet the threshold, use a dummy classifier + which always returns this prediction instead. The list should have exactly as + many elements as the number of columns of `x_train` provided to `fit`. + cv: int + Number of folds. + scoring: str + Scoring function. """ def __init__( self, - classifier=LogisticRegression(), - threshold=0.75, - constant=0.0, - cv=5, - scoring="accuracy", + classifier: Callable[[], ScikitLearnClassifier] = ( + lambda: ScikitLearnClassifier(LogisticRegression()) + ), + threshold: float = 0.75, + constant: Optional[List[bool]] = None, + cv: int = 5, + scoring: str = "accuracy", ): + """""" super().__init__() - self.classifier = None - self.classifier_prototype = classifier - self.constant = constant + if constant is None: + constant = [True, False] + self.n_classes = len(constant) + self.classifier: Optional[ScikitLearnClassifier] = None + self.classifier_factory = classifier + self.constant: List[bool] = constant self.threshold = threshold self.cv = cv self.scoring = scoring - def fit(self, x_train, y_train): - # super().fit(x_train, y_train) + def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None: + super().fit(x_train, y_train) + (n_samples, n_classes) = x_train.shape + assert n_classes == self.n_classes # Calculate dummy score and absolute score threshold y_train_avg = np.average(y_train) @@ -54,13 +77,20 @@ class CrossValidatedClassifier(Classifier): absolute_threshold = 1.0 * self.threshold + dummy_score * (1 - self.threshold) # Calculate cross validation score and decide which classifier to use - clf = deepcopy(self.classifier_prototype) + clf = self.classifier_factory() + assert clf is not None + assert isinstance(clf, ScikitLearnClassifier), ( + f"The provided classifier callable must return a ScikitLearnClassifier. " + f"Found {clf.__class__.__name__} instead. If this is a scikit-learn " + f"classifier, you must wrap it with ScikitLearnClassifier." + ) + cv_score = float( np.mean( cross_val_score( - clf, + clf.inner_clf, x_train, - y_train, + y_train[:, 1], cv=self.cv, scoring=self.scoring, ) @@ -77,14 +107,19 @@ class CrossValidatedClassifier(Classifier): "cv_score is below threshold (%.2f < %.2f); discarding" % (cv_score, absolute_threshold) ) - self.classifier = DummyClassifier( - strategy="constant", - constant=self.constant, + self.classifier = ScikitLearnClassifier( + DummyClassifier( + strategy="constant", + constant=self.constant[1], + ) ) # Train chosen classifier + assert self.classifier is not None + assert isinstance(self.classifier, ScikitLearnClassifier) self.classifier.fit(x_train, y_train) - def predict_proba(self, x_test): - # super().predict_proba(x_test) + def predict_proba(self, x_test: np.ndarray) -> np.ndarray: + super().predict_proba(x_test) + assert self.classifier is not None return self.classifier.predict_proba(x_test) diff --git a/tests/classifiers/test_counting.py b/tests/classifiers/test_counting.py index a8bbec8..9082754 100644 --- a/tests/classifiers/test_counting.py +++ b/tests/classifiers/test_counting.py @@ -12,7 +12,27 @@ E = 0.1 def test_counting(): clf = CountingClassifier() - clf.fit(np.zeros((8, 25)), [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0]) - expected_proba = np.array([[0.375, 0.625], [0.375, 0.625]]) - actual_proba = clf.predict_proba(np.zeros((2, 25))) - assert norm(actual_proba - expected_proba) < E + n_features = 25 + x_train = np.zeros((8, n_features)) + y_train = np.array( + [ + [True, False, False], + [True, False, False], + [False, True, False], + [True, False, False], + [False, True, False], + [False, True, False], + [False, True, False], + [False, False, True], + ] + ) + x_test = np.zeros((2, n_features)) + y_expected = np.array( + [ + [3 / 8.0, 4 / 8.0, 1 / 8.0], + [3 / 8.0, 4 / 8.0, 1 / 8.0], + ] + ) + clf.fit(x_train, y_train) + y_actual = clf.predict_proba(x_test) + assert norm(y_actual - y_expected) < E diff --git a/tests/classifiers/test_cv.py b/tests/classifiers/test_cv.py index fa6baa6..4ba8f9b 100644 --- a/tests/classifiers/test_cv.py +++ b/tests/classifiers/test_cv.py @@ -7,20 +7,37 @@ from numpy.linalg import norm from sklearn.preprocessing import StandardScaler from sklearn.svm import SVC +from miplearn.classifiers import ScikitLearnClassifier from miplearn.classifiers.cv import CrossValidatedClassifier E = 0.1 -def test_cv(): +def test_cv() -> None: # Training set: label is true if point is inside a 2D circle - x_train = np.array([[x1, x2] for x1 in range(-10, 11) for x2 in range(-10, 11)]) + x_train = np.array( + [ + [ + x1, + x2, + ] + for x1 in range(-10, 11) + for x2 in range(-10, 11) + ] + ) x_train = StandardScaler().fit_transform(x_train) n_samples = x_train.shape[0] - y_train = np.array( [ - 1.0 if x1 * x1 + x2 * x2 <= 100 else 0.0 + [ + False, + True, + ] + if x1 * x1 + x2 * x2 <= 100 + else [ + True, + False, + ] for x1 in range(-10, 11) for x2 in range(-10, 11) ] @@ -29,24 +46,39 @@ def test_cv(): # Support vector machines with linear kernels do not perform well on this # data set, so predictor should return the given constant. clf = CrossValidatedClassifier( - classifier=SVC(probability=True, random_state=42), + classifier=lambda: ScikitLearnClassifier( + SVC( + probability=True, + random_state=42, + ) + ), threshold=0.90, - constant=0.0, + constant=[True, False], cv=30, ) clf.fit(x_train, y_train) proba = clf.predict_proba(x_train) + assert isinstance(proba, np.ndarray) + assert proba.shape == (n_samples, 2) + y_pred = (proba[:, 1] > 0.5).astype(float) assert norm(np.zeros(n_samples) - y_pred) < E # Support vector machines with quadratic kernels perform almost perfectly # on this data set, so predictor should return their prediction. clf = CrossValidatedClassifier( - classifier=SVC(probability=True, kernel="poly", degree=2, random_state=42), + classifier=lambda: ScikitLearnClassifier( + SVC( + probability=True, + kernel="poly", + degree=2, + random_state=42, + ) + ), threshold=0.90, cv=30, ) clf.fit(x_train, y_train) proba = clf.predict_proba(x_train) y_pred = (proba[:, 1] > 0.5).astype(float) - assert norm(y_train - y_pred) < E + assert norm(y_train[:, 1] - y_pred) < E