Start refactoring of classifiers

2025-12-06 01:18:52 -06:00 · 2021-01-22 11:35:29 -06:00
parent b87ef651e1
commit 8dba65dd9c
9 changed files with 202 additions and 72 deletions
--- a/tests/classifiers/test_counting.py
+++ b/tests/classifiers/test_counting.py
@@ -12,7 +12,27 @@ E = 0.1

 def test_counting():
    clf = CountingClassifier()
-    clf.fit(np.zeros((8, 25)), [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0])
-    expected_proba = np.array([[0.375, 0.625], [0.375, 0.625]])
-    actual_proba = clf.predict_proba(np.zeros((2, 25)))
-    assert norm(actual_proba - expected_proba) < E
+    n_features = 25
+    x_train = np.zeros((8, n_features))
+    y_train = np.array(
+        [
+            [True, False, False],
+            [True, False, False],
+            [False, True, False],
+            [True, False, False],
+            [False, True, False],
+            [False, True, False],
+            [False, True, False],
+            [False, False, True],
+        ]
+    )
+    x_test = np.zeros((2, n_features))
+    y_expected = np.array(
+        [
+            [3 / 8.0, 4 / 8.0, 1 / 8.0],
+            [3 / 8.0, 4 / 8.0, 1 / 8.0],
+        ]
+    )
+    clf.fit(x_train, y_train)
+    y_actual = clf.predict_proba(x_test)
+    assert norm(y_actual - y_expected) < E
--- a/tests/classifiers/test_cv.py
+++ b/tests/classifiers/test_cv.py
@@ -7,20 +7,37 @@ from numpy.linalg import norm
 from sklearn.preprocessing import StandardScaler
 from sklearn.svm import SVC

+from miplearn.classifiers import ScikitLearnClassifier
 from miplearn.classifiers.cv import CrossValidatedClassifier

 E = 0.1


-def test_cv():
+def test_cv() -> None:
    # Training set: label is true if point is inside a 2D circle
-    x_train = np.array([[x1, x2] for x1 in range(-10, 11) for x2 in range(-10, 11)])
+    x_train = np.array(
+        [
+            [
+                x1,
+                x2,
+            ]
+            for x1 in range(-10, 11)
+            for x2 in range(-10, 11)
+        ]
+    )
    x_train = StandardScaler().fit_transform(x_train)
    n_samples = x_train.shape[0]
-
    y_train = np.array(
        [
-            1.0 if x1 * x1 + x2 * x2 <= 100 else 0.0
+            [
+                False,
+                True,
+            ]
+            if x1 * x1 + x2 * x2 <= 100
+            else [
+                True,
+                False,
+            ]
            for x1 in range(-10, 11)
            for x2 in range(-10, 11)
        ]
@@ -29,24 +46,39 @@ def test_cv():
    # Support vector machines with linear kernels do not perform well on this
    # data set, so predictor should return the given constant.
    clf = CrossValidatedClassifier(
-        classifier=SVC(probability=True, random_state=42),
+        classifier=lambda: ScikitLearnClassifier(
+            SVC(
+                probability=True,
+                random_state=42,
+            )
+        ),
        threshold=0.90,
-        constant=0.0,
+        constant=[True, False],
        cv=30,
    )
    clf.fit(x_train, y_train)
    proba = clf.predict_proba(x_train)
+    assert isinstance(proba, np.ndarray)
+    assert proba.shape == (n_samples, 2)
+
    y_pred = (proba[:, 1] > 0.5).astype(float)
    assert norm(np.zeros(n_samples) - y_pred) < E

    # Support vector machines with quadratic kernels perform almost perfectly
    # on this data set, so predictor should return their prediction.
    clf = CrossValidatedClassifier(
-        classifier=SVC(probability=True, kernel="poly", degree=2, random_state=42),
+        classifier=lambda: ScikitLearnClassifier(
+            SVC(
+                probability=True,
+                kernel="poly",
+                degree=2,
+                random_state=42,
+            )
+        ),
        threshold=0.90,
        cv=30,
    )
    clf.fit(x_train, y_train)
    proba = clf.predict_proba(x_train)
    y_pred = (proba[:, 1] > 0.5).astype(float)
-    assert norm(y_train - y_pred) < E
+    assert norm(y_train[:, 1] - y_pred) < E