Reformat source code with Black; add pre-commit hooks and CI checks

2025-12-06 09:28:51 -06:00 · 2020-12-05 10:59:33 -06:00
parent 3823931382
commit d99600f101
49 changed files with 1291 additions and 972 deletions
--- a/miplearn/classifiers/adaptive.py
+++ b/miplearn/classifiers/adaptive.py
@@ -22,9 +22,11 @@ class AdaptiveClassifier(Classifier):
    based on its cross-validation score on a particular training data set.
    """

-    def __init__(self,
-                 candidates=None,
-                 evaluator=ClassifierEvaluator()):
+    def __init__(
+        self,
+        candidates=None,
+        evaluator=ClassifierEvaluator(),
+    ):
        """
        Initializes the meta-classifier.
        """
@@ -35,14 +37,13 @@ class AdaptiveClassifier(Classifier):
                    "min samples": 100,
                },
                "logistic": {
-                    "classifier": make_pipeline(StandardScaler(),
-                                                LogisticRegression()),
+                    "classifier": make_pipeline(StandardScaler(), LogisticRegression()),
                    "min samples": 30,
                },
                "counting": {
                    "classifier": CountingClassifier(),
                    "min samples": 0,
-                }
+                },
            }
        self.candidates = candidates
        self.evaluator = evaluator
--- a/miplearn/classifiers/counting.py
+++ b/miplearn/classifiers/counting.py
@@ -21,8 +21,7 @@ class CountingClassifier(Classifier):
        self.mean = np.mean(y_train)

    def predict_proba(self, x_test):
-        return np.array([[1 - self.mean, self.mean]
-                         for _ in range(x_test.shape[0])])
+        return np.array([[1 - self.mean, self.mean] for _ in range(x_test.shape[0])])

    def __repr__(self):
        return "CountingClassifier(mean=%s)" % self.mean
--- a/miplearn/classifiers/cv.py
+++ b/miplearn/classifiers/cv.py
@@ -11,6 +11,7 @@ from sklearn.linear_model import LogisticRegression
 from sklearn.model_selection import cross_val_score

 import logging
+
 logger = logging.getLogger(__name__)


@@ -28,12 +29,14 @@ class CrossValidatedClassifier(Classifier):
    acceptable. Other numbers are a linear interpolation of these two extremes.
    """

-    def __init__(self,
-                 classifier=LogisticRegression(),
-                 threshold=0.75,
-                 constant=0.0,
-                 cv=5,
-                 scoring='accuracy'):
+    def __init__(
+        self,
+        classifier=LogisticRegression(),
+        threshold=0.75,
+        constant=0.0,
+        cv=5,
+        scoring="accuracy",
+    ):
        self.classifier = None
        self.classifier_prototype = classifier
        self.constant = constant
@@ -45,24 +48,36 @@ class CrossValidatedClassifier(Classifier):
        # Calculate dummy score and absolute score threshold
        y_train_avg = np.average(y_train)
        dummy_score = max(y_train_avg, 1 - y_train_avg)
-        absolute_threshold = 1. * self.threshold + dummy_score * (1 - self.threshold)
+        absolute_threshold = 1.0 * self.threshold + dummy_score * (1 - self.threshold)

        # Calculate cross validation score and decide which classifier to use
        clf = deepcopy(self.classifier_prototype)
-        cv_score = float(np.mean(cross_val_score(clf,
-                                                 x_train,
-                                                 y_train,
-                                                 cv=self.cv,
-                                                 scoring=self.scoring)))
+        cv_score = float(
+            np.mean(
+                cross_val_score(
+                    clf,
+                    x_train,
+                    y_train,
+                    cv=self.cv,
+                    scoring=self.scoring,
+                )
+            )
+        )
        if cv_score >= absolute_threshold:
-            logger.debug("cv_score is above threshold (%.2f >= %.2f); keeping" %
-                         (cv_score, absolute_threshold))
+            logger.debug(
+                "cv_score is above threshold (%.2f >= %.2f); keeping"
+                % (cv_score, absolute_threshold)
+            )
            self.classifier = clf
        else:
-            logger.debug("cv_score is below threshold (%.2f < %.2f); discarding" %
-                         (cv_score, absolute_threshold))
-            self.classifier = DummyClassifier(strategy="constant",
-                                              constant=self.constant)
+            logger.debug(
+                "cv_score is below threshold (%.2f < %.2f); discarding"
+                % (cv_score, absolute_threshold)
+            )
+            self.classifier = DummyClassifier(
+                strategy="constant",
+                constant=self.constant,
+            )

        # Train chosen classifier
        self.classifier.fit(x_train, y_train)
--- a/miplearn/classifiers/tests/test_counting.py
+++ b/miplearn/classifiers/tests/test_counting.py
@@ -12,7 +12,6 @@ E = 0.1
 def test_counting():
    clf = CountingClassifier()
    clf.fit(np.zeros((8, 25)), [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0])
-    expected_proba = np.array([[0.375, 0.625],
-                               [0.375, 0.625]])
+    expected_proba = np.array([[0.375, 0.625], [0.375, 0.625]])
    actual_proba = clf.predict_proba(np.zeros((2, 25)))
    assert norm(actual_proba - expected_proba) < E
--- a/miplearn/classifiers/tests/test_cv.py
+++ b/miplearn/classifiers/tests/test_cv.py
@@ -13,34 +13,36 @@ E = 0.1

 def test_cv():
    # Training set: label is true if point is inside a 2D circle
-    x_train = np.array([[x1, x2]
-                        for x1 in range(-10, 11)
-                        for x2 in range(-10, 11)])
+    x_train = np.array([[x1, x2] for x1 in range(-10, 11) for x2 in range(-10, 11)])
    x_train = StandardScaler().fit_transform(x_train)
    n_samples = x_train.shape[0]

-    y_train = np.array([1.0 if x1*x1 + x2*x2 <= 100 else 0.0
-                        for x1 in range(-10, 11)
-                        for x2 in range(-10, 11)])
+    y_train = np.array(
+        [
+            1.0 if x1 * x1 + x2 * x2 <= 100 else 0.0
+            for x1 in range(-10, 11)
+            for x2 in range(-10, 11)
+        ]
+    )

    # Support vector machines with linear kernels do not perform well on this
    # data set, so predictor should return the given constant.
-    clf = CrossValidatedClassifier(classifier=SVC(probability=True,
-                                                  random_state=42),
-                                   threshold=0.90,
-                                   constant=0.0,
-                                   cv=30)
+    clf = CrossValidatedClassifier(
+        classifier=SVC(probability=True, random_state=42),
+        threshold=0.90,
+        constant=0.0,
+        cv=30,
+    )
    clf.fit(x_train, y_train)
    assert norm(np.zeros(n_samples) - clf.predict(x_train)) < E

    # Support vector machines with quadratic kernels perform almost perfectly
    # on this data set, so predictor should return their prediction.
-    clf = CrossValidatedClassifier(classifier=SVC(probability=True,
-                                                  kernel='poly',
-                                                  degree=2,
-                                                  random_state=42),
-                                   threshold=0.90,
-                                   cv=30)
+    clf = CrossValidatedClassifier(
+        classifier=SVC(probability=True, kernel="poly", degree=2, random_state=42),
+        threshold=0.90,
+        cv=30,
+    )
    clf.fit(x_train, y_train)
    print(y_train - clf.predict(x_train))
    assert norm(y_train - clf.predict(x_train)) < E
--- a/miplearn/classifiers/tests/test_evaluator.py
+++ b/miplearn/classifiers/tests/test_evaluator.py
@@ -17,4 +17,3 @@ def test_evaluator():
    ev = ClassifierEvaluator()
    assert ev.evaluate(clf_a, x_train, y_train) == 1.0
    assert ev.evaluate(clf_b, x_train, y_train) == 0.5
-
--- a/miplearn/classifiers/tests/test_threshold.py
+++ b/miplearn/classifiers/tests/test_threshold.py
@@ -11,12 +11,16 @@ from miplearn.classifiers.threshold import MinPrecisionThreshold

 def test_threshold_dynamic():
    clf = Mock(spec=Classifier)
-    clf.predict_proba = Mock(return_value=np.array([
-        [0.10, 0.90],
-        [0.10, 0.90],
-        [0.20, 0.80],
-        [0.30, 0.70],
-    ]))
+    clf.predict_proba = Mock(
+        return_value=np.array(
+            [
+                [0.10, 0.90],
+                [0.10, 0.90],
+                [0.20, 0.80],
+                [0.30, 0.70],
+            ]
+        )
+    )
    x_train = np.array([0, 1, 2, 3])
    y_train = np.array([1, 1, 0, 0])

@@ -31,4 +35,3 @@ def test_threshold_dynamic():

    threshold = MinPrecisionThreshold(min_precision=0.00)
    assert threshold.find(clf, x_train, y_train) == 0.70
-
--- a/miplearn/classifiers/threshold.py
+++ b/miplearn/classifiers/threshold.py
@@ -30,11 +30,15 @@ class MinPrecisionThreshold(DynamicThreshold):
    def find(self, clf, x_train, y_train):
        proba = clf.predict_proba(x_train)

-        assert isinstance(proba, np.ndarray), \
-            "classifier should return numpy array"
-        assert proba.shape == (x_train.shape[0], 2), \
-            "classifier should return (%d,%d)-shaped array, not %s" % (
-                x_train.shape[0], 2, str(proba.shape))
+        assert isinstance(proba, np.ndarray), "classifier should return numpy array"
+        assert proba.shape == (
+            x_train.shape[0],
+            2,
+        ), "classifier should return (%d,%d)-shaped array, not %s" % (
+            x_train.shape[0],
+            2,
+            str(proba.shape),
+        )

        fps, tps, thresholds = _binary_clf_curve(y_train, proba[:, 1])
        precision = tps / (tps + fps)