Make AdaptiveClassifier pick best classifier based on score

2025-12-06 09:28:51 -06:00 · 2020-04-16 16:26:58 -05:00
parent 89e5b9c2bf
commit 4c152d60f7
3 changed files with 71 additions and 31 deletions
--- a/src/python/miplearn/classifiers/adaptive.py
+++ b/src/python/miplearn/classifiers/adaptive.py
@@ -7,7 +7,9 @@ from copy import deepcopy

 from miplearn.classifiers import Classifier
 from miplearn.classifiers.counting import CountingClassifier
+from miplearn.classifiers.evaluator import ClassifierEvaluator
 from sklearn.linear_model import LogisticRegression
+from sklearn.neighbors import KNeighborsClassifier
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import StandardScaler

@@ -17,45 +19,48 @@ logger = logging.getLogger(__name__)
 class AdaptiveClassifier(Classifier):
    """
    A meta-classifier which dynamically selects what actual classifier to use
-    based on the number of samples in the training data.
-
-    By default, uses CountingClassifier for less than 30 samples and
-    LogisticRegression (with standard scaling) for 30 or more samples.
+    based on its cross-validation score on a particular training data set.
    """

-    def __init__(self, classifiers=None):
+    def __init__(self,
+                 candidates=None,
+                 evaluator=ClassifierEvaluator()):
        """
-        Initializes the classifier.
-
-        The `classifiers` argument must be a list of tuples where the second element
-        of the tuple is the classifier and the first element is the number of
-        samples required. For example, if `classifiers` is set to
-        ```
-            [(100, ClassifierA()),
-             (50,  ClassifierB()),
-             (0,   ClassifierC())]
-        ``` then ClassifierA will be used if n_samples >= 100, ClassifierB will
-        be used if 100 > n_samples >= 50 and ClassifierC will be used if
-        50 > n_samples. The list must be ordered in (strictly) decreasing order.
+        Initializes the meta-classifier.
        """
-        if classifiers is None:
-            classifiers = [
-                (30, make_pipeline(StandardScaler(), LogisticRegression())),
-                (0, CountingClassifier())
-            ]
-        self.available_classifiers = classifiers
+        if candidates is None:
+            candidates = {
+                "knn(100)": {
+                    "classifier": KNeighborsClassifier(n_neighbors=100),
+                    "min samples": 100,
+                },
+                "logistic": {
+                    "classifier": make_pipeline(StandardScaler(),
+                                                LogisticRegression()),
+                    "min samples": 30,
+                },
+                "counting": {
+                    "classifier": CountingClassifier(),
+                    "min samples": 0,
+                }
+            }
+        self.candidates = candidates
+        self.evaluator = evaluator
        self.classifier = None

    def fit(self, x_train, y_train):
+        best_clf = None
+        best_score = -float("inf")
        n_samples = x_train.shape[0]
-
-        for (min_samples, clf_prototype) in self.available_classifiers:
-            if n_samples >= min_samples:
-                self.classifier = deepcopy(clf_prototype)
-                self.classifier.fit(x_train, y_train)
-                break
+        for clf_dict in self.candidates.values():
+            if n_samples < clf_dict["min samples"]:
+                continue
+            clf = deepcopy(clf_dict["classifier"])
+            clf.fit(x_train, y_train)
+            score = self.evaluator.evaluate(clf, x_train, y_train)
+            if score > best_score:
+                best_clf, best_score = clf, score
+        self.classifier = best_clf

    def predict_proba(self, x_test):
        return self.classifier.predict_proba(x_test)
-
-
--- a/src/python/miplearn/classifiers/evaluator.py
+++ b/src/python/miplearn/classifiers/evaluator.py
@@ -0,0 +1,15 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+
+from sklearn.metrics import roc_auc_score
+
+
+class ClassifierEvaluator:
+    def __init__(self):
+        pass
+
+    def evaluate(self, clf, x_train, y_train):
+        # FIXME: use cross-validation
+        proba = clf.predict_proba(x_train)
+        return roc_auc_score(y_train, proba[:, 1])
--- a/src/python/miplearn/classifiers/tests/test_evaluator.py
+++ b/src/python/miplearn/classifiers/tests/test_evaluator.py
@@ -0,0 +1,20 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+
+import numpy as np
+from miplearn.classifiers.evaluator import ClassifierEvaluator
+from sklearn.neighbors import KNeighborsClassifier
+
+
+def test_evaluator():
+    clf_a = KNeighborsClassifier(n_neighbors=1)
+    clf_b = KNeighborsClassifier(n_neighbors=2)
+    x_train = np.array([[0, 0], [1, 0]])
+    y_train = np.array([0, 1])
+    clf_a.fit(x_train, y_train)
+    clf_b.fit(x_train, y_train)
+    ev = ClassifierEvaluator()
+    assert ev.evaluate(clf_a, x_train, y_train) == 1.0
+    assert ev.evaluate(clf_b, x_train, y_train) == 0.5
+