mirror of
https://github.com/ANL-CEEESA/MIPLearn.git
synced 2025-12-06 01:18:52 -06:00
Start refactoring of classifiers
This commit is contained in:
@@ -12,7 +12,27 @@ E = 0.1
|
||||
|
||||
def test_counting():
|
||||
clf = CountingClassifier()
|
||||
clf.fit(np.zeros((8, 25)), [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0])
|
||||
expected_proba = np.array([[0.375, 0.625], [0.375, 0.625]])
|
||||
actual_proba = clf.predict_proba(np.zeros((2, 25)))
|
||||
assert norm(actual_proba - expected_proba) < E
|
||||
n_features = 25
|
||||
x_train = np.zeros((8, n_features))
|
||||
y_train = np.array(
|
||||
[
|
||||
[True, False, False],
|
||||
[True, False, False],
|
||||
[False, True, False],
|
||||
[True, False, False],
|
||||
[False, True, False],
|
||||
[False, True, False],
|
||||
[False, True, False],
|
||||
[False, False, True],
|
||||
]
|
||||
)
|
||||
x_test = np.zeros((2, n_features))
|
||||
y_expected = np.array(
|
||||
[
|
||||
[3 / 8.0, 4 / 8.0, 1 / 8.0],
|
||||
[3 / 8.0, 4 / 8.0, 1 / 8.0],
|
||||
]
|
||||
)
|
||||
clf.fit(x_train, y_train)
|
||||
y_actual = clf.predict_proba(x_test)
|
||||
assert norm(y_actual - y_expected) < E
|
||||
|
||||
@@ -7,20 +7,37 @@ from numpy.linalg import norm
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.svm import SVC
|
||||
|
||||
from miplearn.classifiers import ScikitLearnClassifier
|
||||
from miplearn.classifiers.cv import CrossValidatedClassifier
|
||||
|
||||
E = 0.1
|
||||
|
||||
|
||||
def test_cv():
|
||||
def test_cv() -> None:
|
||||
# Training set: label is true if point is inside a 2D circle
|
||||
x_train = np.array([[x1, x2] for x1 in range(-10, 11) for x2 in range(-10, 11)])
|
||||
x_train = np.array(
|
||||
[
|
||||
[
|
||||
x1,
|
||||
x2,
|
||||
]
|
||||
for x1 in range(-10, 11)
|
||||
for x2 in range(-10, 11)
|
||||
]
|
||||
)
|
||||
x_train = StandardScaler().fit_transform(x_train)
|
||||
n_samples = x_train.shape[0]
|
||||
|
||||
y_train = np.array(
|
||||
[
|
||||
1.0 if x1 * x1 + x2 * x2 <= 100 else 0.0
|
||||
[
|
||||
False,
|
||||
True,
|
||||
]
|
||||
if x1 * x1 + x2 * x2 <= 100
|
||||
else [
|
||||
True,
|
||||
False,
|
||||
]
|
||||
for x1 in range(-10, 11)
|
||||
for x2 in range(-10, 11)
|
||||
]
|
||||
@@ -29,24 +46,39 @@ def test_cv():
|
||||
# Support vector machines with linear kernels do not perform well on this
|
||||
# data set, so predictor should return the given constant.
|
||||
clf = CrossValidatedClassifier(
|
||||
classifier=SVC(probability=True, random_state=42),
|
||||
classifier=lambda: ScikitLearnClassifier(
|
||||
SVC(
|
||||
probability=True,
|
||||
random_state=42,
|
||||
)
|
||||
),
|
||||
threshold=0.90,
|
||||
constant=0.0,
|
||||
constant=[True, False],
|
||||
cv=30,
|
||||
)
|
||||
clf.fit(x_train, y_train)
|
||||
proba = clf.predict_proba(x_train)
|
||||
assert isinstance(proba, np.ndarray)
|
||||
assert proba.shape == (n_samples, 2)
|
||||
|
||||
y_pred = (proba[:, 1] > 0.5).astype(float)
|
||||
assert norm(np.zeros(n_samples) - y_pred) < E
|
||||
|
||||
# Support vector machines with quadratic kernels perform almost perfectly
|
||||
# on this data set, so predictor should return their prediction.
|
||||
clf = CrossValidatedClassifier(
|
||||
classifier=SVC(probability=True, kernel="poly", degree=2, random_state=42),
|
||||
classifier=lambda: ScikitLearnClassifier(
|
||||
SVC(
|
||||
probability=True,
|
||||
kernel="poly",
|
||||
degree=2,
|
||||
random_state=42,
|
||||
)
|
||||
),
|
||||
threshold=0.90,
|
||||
cv=30,
|
||||
)
|
||||
clf.fit(x_train, y_train)
|
||||
proba = clf.predict_proba(x_train)
|
||||
y_pred = (proba[:, 1] > 0.5).astype(float)
|
||||
assert norm(y_train - y_pred) < E
|
||||
assert norm(y_train[:, 1] - y_pred) < E
|
||||
|
||||
Reference in New Issue
Block a user