Start refactoring of classifiers

2025-12-07 09:58:51 -06:00 · 2021-01-22 11:35:29 -06:00
parent b87ef651e1
commit 8dba65dd9c
9 changed files with 202 additions and 72 deletions
--- a/miplearn/classifiers/counting.py
+++ b/miplearn/classifiers/counting.py
@@ -1,6 +1,7 @@
 #  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
 #  Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
 #  Released under the modified BSD license. See COPYING.md for more details.
+from typing import Optional, cast

 import numpy as np

@@ -9,20 +10,33 @@ from miplearn.classifiers import Classifier

 class CountingClassifier(Classifier):
    """
-    A classifier that generates constant predictions, based only on the
-    frequency of the training labels. For example, if y_train is [1.0, 0.0, 0.0]
-    this classifier always returns [0.66 0.33] for any x_test. It essentially
-    counts how many times each label appeared, hence the name.
+
+    A classifier that generates constant predictions, based only on the frequency of
+    the training labels. For example, suppose `y_train` is given by:
+    ```python
+    y_train = np.array([
+        [True, False],
+        [False, True],
+        [False, True],
+    ])
+    ```
+    Then `predict_proba` always returns `[0.33 0.66]` for every sample, regardless of
+    `x_train`. It essentially counts how many times each label appeared, hence the name.
+
    """

    def __init__(self) -> None:
-        self.mean = None
+        super().__init__()
+        self.mean: Optional[np.ndarray] = None

-    def fit(self, x_train, y_train):
-        self.mean = np.mean(y_train)
+    def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
+        super().fit(x_train, y_train)
+        self.mean = cast(np.ndarray, np.mean(y_train, axis=0))

-    def predict_proba(self, x_test):
-        return np.array([[1 - self.mean, self.mean] for _ in range(x_test.shape[0])])
+    def predict_proba(self, x_test: np.ndarray) -> np.ndarray:
+        super().predict_proba(x_test)
+        n_samples = x_test.shape[0]
+        return np.array([self.mean for _ in range(n_samples)])

    def __repr__(self):
        return "CountingClassifier(mean=%s)" % self.mean