Sklearn: Handle the special case when all labels are the same

2025-12-06 01:18:52 -06:00 · 2021-03-02 19:31:12 -06:00
parent b6ea0c5f1b
commit bcaf26b18c
2 changed files with 53 additions and 1 deletions
--- a/miplearn/classifiers/init.py
+++ b/miplearn/classifiers/init.py
@@ -3,7 +3,7 @@
 #  Released under the modified BSD license. See COPYING.md for more details.
 from abc import ABC, abstractmethod
-from typing import Optional, Any
+from typing import Optional, Any, cast
 import numpy as np
@@ -139,6 +139,7 @@ class ScikitLearnClassifier(Classifier):
    def __init__(self, clf: Any) -> None:
        super().__init__()
        self.inner_clf = clf
        self.constant: Optional[np.ndarray] = None
    def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
        super().fit(x_train, y_train)
@@ -147,11 +148,22 @@ class ScikitLearnClassifier(Classifier):
            f"Scikit-learn classifiers must have exactly two classes. "
            f"{n_classes} classes were provided instead."
        )
        # When all samples belong to the same class, sklearn's predict_proba returns
        # an array with a single column. The following check avoid this strange
        # behavior.
        mean = cast(np.ndarray, y_train.astype(float).mean(axis=0))
        if mean.max() == 1.0:
            self.constant = mean
            return
        self.inner_clf.fit(x_train, y_train[:, 1])
    def predict_proba(self, x_test: np.ndarray) -> np.ndarray:
        super().predict_proba(x_test)
        n_samples = x_test.shape[0]
        if self.constant is not None:
            return np.array([self.constant for n in range(n_samples)])
        sklearn_proba = self.inner_clf.predict_proba(x_test)
        if isinstance(sklearn_proba, list):
            assert len(sklearn_proba) == self.n_classes
--- a/tests/classifiers/test_sklearn.py
+++ b/tests/classifiers/test_sklearn.py
@@ -0,0 +1,40 @@
 #  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
 #  Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
 #  Released under the modified BSD license. See COPYING.md for more details.
 import numpy as np
 from numpy.testing import assert_array_equal
 from sklearn.neighbors import KNeighborsClassifier
 from miplearn import ScikitLearnClassifier
 def test_constant_prediction():
    x_train = np.array(
        [
            [0.0, 1.0],
            [1.0, 0.0],
        ]
    )
    y_train = np.array(
        [
            [True, False],
            [True, False],
        ]
    )
    clf = ScikitLearnClassifier(
        KNeighborsClassifier(
            n_neighbors=1,
        )
    )
    clf.fit(x_train, y_train)
    proba = clf.predict_proba(x_train)
    assert_array_equal(
        proba,
        np.array(
            [
                [1.0, 0.0],
                [1.0, 0.0],
            ]
        ),
    )