Start refactoring of classifiers

This commit is contained in:
2021-01-22 11:35:29 -06:00
parent b87ef651e1
commit 8dba65dd9c
9 changed files with 202 additions and 72 deletions

View File

@@ -12,7 +12,27 @@ E = 0.1
def test_counting():
clf = CountingClassifier()
clf.fit(np.zeros((8, 25)), [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0])
expected_proba = np.array([[0.375, 0.625], [0.375, 0.625]])
actual_proba = clf.predict_proba(np.zeros((2, 25)))
assert norm(actual_proba - expected_proba) < E
n_features = 25
x_train = np.zeros((8, n_features))
y_train = np.array(
[
[True, False, False],
[True, False, False],
[False, True, False],
[True, False, False],
[False, True, False],
[False, True, False],
[False, True, False],
[False, False, True],
]
)
x_test = np.zeros((2, n_features))
y_expected = np.array(
[
[3 / 8.0, 4 / 8.0, 1 / 8.0],
[3 / 8.0, 4 / 8.0, 1 / 8.0],
]
)
clf.fit(x_train, y_train)
y_actual = clf.predict_proba(x_test)
assert norm(y_actual - y_expected) < E

View File

@@ -7,20 +7,37 @@ from numpy.linalg import norm
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from miplearn.classifiers import ScikitLearnClassifier
from miplearn.classifiers.cv import CrossValidatedClassifier
E = 0.1
def test_cv():
def test_cv() -> None:
# Training set: label is true if point is inside a 2D circle
x_train = np.array([[x1, x2] for x1 in range(-10, 11) for x2 in range(-10, 11)])
x_train = np.array(
[
[
x1,
x2,
]
for x1 in range(-10, 11)
for x2 in range(-10, 11)
]
)
x_train = StandardScaler().fit_transform(x_train)
n_samples = x_train.shape[0]
y_train = np.array(
[
1.0 if x1 * x1 + x2 * x2 <= 100 else 0.0
[
False,
True,
]
if x1 * x1 + x2 * x2 <= 100
else [
True,
False,
]
for x1 in range(-10, 11)
for x2 in range(-10, 11)
]
@@ -29,24 +46,39 @@ def test_cv():
# Support vector machines with linear kernels do not perform well on this
# data set, so predictor should return the given constant.
clf = CrossValidatedClassifier(
classifier=SVC(probability=True, random_state=42),
classifier=lambda: ScikitLearnClassifier(
SVC(
probability=True,
random_state=42,
)
),
threshold=0.90,
constant=0.0,
constant=[True, False],
cv=30,
)
clf.fit(x_train, y_train)
proba = clf.predict_proba(x_train)
assert isinstance(proba, np.ndarray)
assert proba.shape == (n_samples, 2)
y_pred = (proba[:, 1] > 0.5).astype(float)
assert norm(np.zeros(n_samples) - y_pred) < E
# Support vector machines with quadratic kernels perform almost perfectly
# on this data set, so predictor should return their prediction.
clf = CrossValidatedClassifier(
classifier=SVC(probability=True, kernel="poly", degree=2, random_state=42),
classifier=lambda: ScikitLearnClassifier(
SVC(
probability=True,
kernel="poly",
degree=2,
random_state=42,
)
),
threshold=0.90,
cv=30,
)
clf.fit(x_train, y_train)
proba = clf.predict_proba(x_train)
y_pred = (proba[:, 1] > 0.5).astype(float)
assert norm(y_train - y_pred) < E
assert norm(y_train[:, 1] - y_pred) < E