From 4da561a6a8a423d40e309cddce83cc46ead1cf78 Mon Sep 17 00:00:00 2001 From: "Alinson S. Xavier" Date: Mon, 25 Jan 2021 08:59:06 -0600 Subject: [PATCH] AdaptiveClassifier: Refactor and add tests --- miplearn/classifiers/adaptive.py | 96 +++++++++++++++++++++-------- miplearn/classifiers/evaluator.py | 15 ----- tests/classifiers/__init__.py | 36 +++++++++++ tests/classifiers/test_adaptive.py | 41 ++++++++++++ tests/classifiers/test_cv.py | 30 +-------- tests/classifiers/test_evaluator.py | 20 ------ 6 files changed, 149 insertions(+), 89 deletions(-) delete mode 100644 miplearn/classifiers/evaluator.py create mode 100644 tests/classifiers/test_adaptive.py delete mode 100644 tests/classifiers/test_evaluator.py diff --git a/miplearn/classifiers/adaptive.py b/miplearn/classifiers/adaptive.py index de3cb0b..8445d1f 100644 --- a/miplearn/classifiers/adaptive.py +++ b/miplearn/classifiers/adaptive.py @@ -4,63 +4,107 @@ import logging from copy import deepcopy -from typing import Any, Dict +from typing import Dict, Callable, Optional +import numpy as np from sklearn.linear_model import LogisticRegression +from sklearn.metrics import roc_auc_score from sklearn.neighbors import KNeighborsClassifier from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler -from miplearn.classifiers import Classifier +from miplearn.classifiers import Classifier, ScikitLearnClassifier from miplearn.classifiers.counting import CountingClassifier -from miplearn.classifiers.evaluator import ClassifierEvaluator logger = logging.getLogger(__name__) +class CandidateClassifierSpecs: + """ + Specifications describing how to construct a certain classifier, and under + which circumstances it can be used. + + Parameters + ---------- + min_samples: int + Minimum number of samples for this classifier to be considered. + classifier: Callable[[], Classifier] + Callable that constructs the classifier. + """ + + def __init__( + self, + classifier: Callable[[], Classifier], + min_samples: int = 0, + ) -> None: + self.min_samples = min_samples + self.classifier = classifier + + class AdaptiveClassifier(Classifier): """ A meta-classifier which dynamically selects what actual classifier to use based on its cross-validation score on a particular training data set. + + Parameters + ---------- + candidates: Dict[str, CandidateClassifierSpecs] + A dictionary of candidate classifiers to consider, mapping the name of the + candidate to its specs, which describes how to construct it and under what + scenarios. If no candidates are provided, uses a fixed set of defaults, + which includes `CountingClassifier`, `KNeighborsClassifier` and + `LogisticRegression`. """ def __init__( self, - candidates: Dict[str, Any] = None, - evaluator: ClassifierEvaluator = ClassifierEvaluator(), + candidates: Dict[str, CandidateClassifierSpecs] = None, ) -> None: + super().__init__() if candidates is None: candidates = { - "knn(100)": { - "classifier": KNeighborsClassifier(n_neighbors=100), - "min samples": 100, - }, - "logistic": { - "classifier": make_pipeline(StandardScaler(), LogisticRegression()), - "min samples": 30, - }, - "counting": { - "classifier": CountingClassifier(), - "min samples": 0, - }, + "knn(100)": CandidateClassifierSpecs( + classifier=lambda: ScikitLearnClassifier( + KNeighborsClassifier(n_neighbors=100) + ), + min_samples=100, + ), + "logistic": CandidateClassifierSpecs( + classifier=lambda: ScikitLearnClassifier( + make_pipeline( + StandardScaler(), + LogisticRegression(), + ) + ), + min_samples=30, + ), + "counting": CandidateClassifierSpecs( + classifier=lambda: CountingClassifier(), + ), } self.candidates = candidates - self.evaluator = evaluator - self.classifier = None + self.classifier: Optional[Classifier] = None - def fit(self, x_train, y_train): - best_name, best_clf, best_score = None, None, -float("inf") + def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None: + super().fit(x_train, y_train) n_samples = x_train.shape[0] - for (name, clf_dict) in self.candidates.items(): - if n_samples < clf_dict["min samples"]: + assert y_train.shape == (n_samples, 2) + + best_name, best_clf, best_score = None, None, -float("inf") + for (name, specs) in self.candidates.items(): + if n_samples < specs.min_samples: continue - clf = deepcopy(clf_dict["classifier"]) + clf = specs.classifier() clf.fit(x_train, y_train) - score = self.evaluator.evaluate(clf, x_train, y_train) + proba = clf.predict_proba(x_train) + # FIXME: Switch to k-fold cross validation + score = roc_auc_score(y_train[:, 1], proba[:, 1]) if score > best_score: best_name, best_clf, best_score = name, clf, score logger.debug("Best classifier: %s (score=%.3f)" % (best_name, best_score)) self.classifier = best_clf - def predict_proba(self, x_test): + def predict_proba(self, x_test: np.ndarray) -> np.ndarray: + super().predict_proba(x_test) + assert self.classifier is not None return self.classifier.predict_proba(x_test) diff --git a/miplearn/classifiers/evaluator.py b/miplearn/classifiers/evaluator.py deleted file mode 100644 index 66afb22..0000000 --- a/miplearn/classifiers/evaluator.py +++ /dev/null @@ -1,15 +0,0 @@ -# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization -# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved. -# Released under the modified BSD license. See COPYING.md for more details. - -from sklearn.metrics import roc_auc_score - - -class ClassifierEvaluator: - def __init__(self) -> None: - pass - - def evaluate(self, clf, x_train, y_train): - # FIXME: use cross-validation - proba = clf.predict_proba(x_train) - return roc_auc_score(y_train, proba[:, 1]) diff --git a/tests/classifiers/__init__.py b/tests/classifiers/__init__.py index 13c148b..b602796 100644 --- a/tests/classifiers/__init__.py +++ b/tests/classifiers/__init__.py @@ -1,3 +1,39 @@ # MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization # Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved. # Released under the modified BSD license. See COPYING.md for more details. + +from typing import Tuple + +import numpy as np +from sklearn.preprocessing import StandardScaler + + +def _build_circle_training_data() -> Tuple[np.ndarray, np.ndarray]: + x_train = StandardScaler().fit_transform( + np.array( + [ + [ + x1, + x2, + ] + for x1 in range(-10, 11) + for x2 in range(-10, 11) + ] + ) + ) + y_train = np.array( + [ + [ + False, + True, + ] + if x1 * x1 + x2 * x2 <= 100 + else [ + True, + False, + ] + for x1 in range(-10, 11) + for x2 in range(-10, 11) + ] + ) + return x_train, y_train diff --git a/tests/classifiers/test_adaptive.py b/tests/classifiers/test_adaptive.py new file mode 100644 index 0000000..959b196 --- /dev/null +++ b/tests/classifiers/test_adaptive.py @@ -0,0 +1,41 @@ +# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization +# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved. +# Released under the modified BSD license. See COPYING.md for more details. +from typing import cast + +from numpy.linalg import norm +from sklearn.svm import SVC + +from miplearn import AdaptiveClassifier, ScikitLearnClassifier +from miplearn.classifiers.adaptive import CandidateClassifierSpecs +from tests.classifiers import _build_circle_training_data + + +def test_adaptive() -> None: + clf = AdaptiveClassifier( + candidates={ + "linear": CandidateClassifierSpecs( + classifier=lambda: ScikitLearnClassifier( + SVC( + probability=True, + random_state=42, + ) + ) + ), + "poly": CandidateClassifierSpecs( + classifier=lambda: ScikitLearnClassifier( + SVC( + probability=True, + kernel="poly", + degree=2, + random_state=42, + ) + ) + ), + } + ) + x_train, y_train = _build_circle_training_data() + clf.fit(x_train, y_train) + proba = clf.predict_proba(x_train) + y_pred = (proba[:, 1] > 0.5).astype(float) + assert norm(y_train[:, 1] - y_pred) < 0.1 diff --git a/tests/classifiers/test_cv.py b/tests/classifiers/test_cv.py index 4ba8f9b..618b3cf 100644 --- a/tests/classifiers/test_cv.py +++ b/tests/classifiers/test_cv.py @@ -4,44 +4,18 @@ import numpy as np from numpy.linalg import norm -from sklearn.preprocessing import StandardScaler from sklearn.svm import SVC from miplearn.classifiers import ScikitLearnClassifier from miplearn.classifiers.cv import CrossValidatedClassifier +from tests.classifiers import _build_circle_training_data E = 0.1 def test_cv() -> None: - # Training set: label is true if point is inside a 2D circle - x_train = np.array( - [ - [ - x1, - x2, - ] - for x1 in range(-10, 11) - for x2 in range(-10, 11) - ] - ) - x_train = StandardScaler().fit_transform(x_train) + x_train, y_train = _build_circle_training_data() n_samples = x_train.shape[0] - y_train = np.array( - [ - [ - False, - True, - ] - if x1 * x1 + x2 * x2 <= 100 - else [ - True, - False, - ] - for x1 in range(-10, 11) - for x2 in range(-10, 11) - ] - ) # Support vector machines with linear kernels do not perform well on this # data set, so predictor should return the given constant. diff --git a/tests/classifiers/test_evaluator.py b/tests/classifiers/test_evaluator.py deleted file mode 100644 index d0dd201..0000000 --- a/tests/classifiers/test_evaluator.py +++ /dev/null @@ -1,20 +0,0 @@ -# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization -# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved. -# Released under the modified BSD license. See COPYING.md for more details. - -import numpy as np -from sklearn.neighbors import KNeighborsClassifier - -from miplearn.classifiers.evaluator import ClassifierEvaluator - - -def test_evaluator(): - clf_a = KNeighborsClassifier(n_neighbors=1) - clf_b = KNeighborsClassifier(n_neighbors=2) - x_train = np.array([[0, 0], [1, 0]]) - y_train = np.array([0, 1]) - clf_a.fit(x_train, y_train) - clf_b.fit(x_train, y_train) - ev = ClassifierEvaluator() - assert ev.evaluate(clf_a, x_train, y_train) == 1.0 - assert ev.evaluate(clf_b, x_train, y_train) == 0.5