AdaptiveClassifier: Refactor and add tests

2025-12-06 09:28:51 -06:00 · 2021-01-25 08:59:06 -06:00
parent 8dba65dd9c
commit 4da561a6a8
6 changed files with 149 additions and 89 deletions
--- a/miplearn/classifiers/adaptive.py
+++ b/miplearn/classifiers/adaptive.py
@@ -4,63 +4,107 @@

 import logging
 from copy import deepcopy
-from typing import Any, Dict
+from typing import Dict, Callable, Optional

+import numpy as np
 from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import roc_auc_score
 from sklearn.neighbors import KNeighborsClassifier
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import StandardScaler

-from miplearn.classifiers import Classifier
+from miplearn.classifiers import Classifier, ScikitLearnClassifier
 from miplearn.classifiers.counting import CountingClassifier
-from miplearn.classifiers.evaluator import ClassifierEvaluator

 logger = logging.getLogger(__name__)


+class CandidateClassifierSpecs:
+    """
+    Specifications describing how to construct a certain classifier, and under
+    which circumstances it can be used.
+
+    Parameters
+    ----------
+    min_samples: int
+        Minimum number of samples for this classifier to be considered.
+    classifier: Callable[[], Classifier]
+        Callable that constructs the classifier.
+    """
+
+    def __init__(
+        self,
+        classifier: Callable[[], Classifier],
+        min_samples: int = 0,
+    ) -> None:
+        self.min_samples = min_samples
+        self.classifier = classifier
+
+
 class AdaptiveClassifier(Classifier):
    """
    A meta-classifier which dynamically selects what actual classifier to use
    based on its cross-validation score on a particular training data set.
+
+    Parameters
+    ----------
+    candidates: Dict[str, CandidateClassifierSpecs]
+        A dictionary of candidate classifiers to consider, mapping the name of the
+        candidate to its specs, which describes how to construct it and under what
+        scenarios. If no candidates are provided, uses a fixed set of defaults,
+        which includes `CountingClassifier`, `KNeighborsClassifier` and
+        `LogisticRegression`.
    """

    def __init__(
        self,
-        candidates: Dict[str, Any] = None,
-        evaluator: ClassifierEvaluator = ClassifierEvaluator(),
+        candidates: Dict[str, CandidateClassifierSpecs] = None,
    ) -> None:
+        super().__init__()
        if candidates is None:
            candidates = {
-                "knn(100)": {
-                    "classifier": KNeighborsClassifier(n_neighbors=100),
-                    "min samples": 100,
-                },
-                "logistic": {
-                    "classifier": make_pipeline(StandardScaler(), LogisticRegression()),
-                    "min samples": 30,
-                },
-                "counting": {
-                    "classifier": CountingClassifier(),
-                    "min samples": 0,
-                },
+                "knn(100)": CandidateClassifierSpecs(
+                    classifier=lambda: ScikitLearnClassifier(
+                        KNeighborsClassifier(n_neighbors=100)
+                    ),
+                    min_samples=100,
+                ),
+                "logistic": CandidateClassifierSpecs(
+                    classifier=lambda: ScikitLearnClassifier(
+                        make_pipeline(
+                            StandardScaler(),
+                            LogisticRegression(),
+                        )
+                    ),
+                    min_samples=30,
+                ),
+                "counting": CandidateClassifierSpecs(
+                    classifier=lambda: CountingClassifier(),
+                ),
            }
        self.candidates = candidates
-        self.evaluator = evaluator
-        self.classifier = None
+        self.classifier: Optional[Classifier] = None

-    def fit(self, x_train, y_train):
-        best_name, best_clf, best_score = None, None, -float("inf")
+    def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
+        super().fit(x_train, y_train)
        n_samples = x_train.shape[0]
-        for (name, clf_dict) in self.candidates.items():
-            if n_samples < clf_dict["min samples"]:
+        assert y_train.shape == (n_samples, 2)
+
+        best_name, best_clf, best_score = None, None, -float("inf")
+        for (name, specs) in self.candidates.items():
+            if n_samples < specs.min_samples:
                continue
-            clf = deepcopy(clf_dict["classifier"])
+            clf = specs.classifier()
            clf.fit(x_train, y_train)
-            score = self.evaluator.evaluate(clf, x_train, y_train)
+            proba = clf.predict_proba(x_train)
+            # FIXME: Switch to k-fold cross validation
+            score = roc_auc_score(y_train[:, 1], proba[:, 1])
            if score > best_score:
                best_name, best_clf, best_score = name, clf, score
        logger.debug("Best classifier: %s (score=%.3f)" % (best_name, best_score))
        self.classifier = best_clf

-    def predict_proba(self, x_test):
+    def predict_proba(self, x_test: np.ndarray) -> np.ndarray:
+        super().predict_proba(x_test)
+        assert self.classifier is not None
        return self.classifier.predict_proba(x_test)
--- a/miplearn/classifiers/evaluator.py
+++ b/miplearn/classifiers/evaluator.py
@@ -1,15 +0,0 @@
-#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
-#  Released under the modified BSD license. See COPYING.md for more details.
-
-from sklearn.metrics import roc_auc_score
-
-
-class ClassifierEvaluator:
-    def __init__(self) -> None:
-        pass
-
-    def evaluate(self, clf, x_train, y_train):
-        # FIXME: use cross-validation
-        proba = clf.predict_proba(x_train)
-        return roc_auc_score(y_train, proba[:, 1])