mirror of
https://github.com/ANL-CEEESA/MIPLearn.git
synced 2025-12-06 09:28:51 -06:00
Reformat source code with Black; add pre-commit hooks and CI checks
This commit is contained in:
@@ -22,9 +22,11 @@ class AdaptiveClassifier(Classifier):
|
||||
based on its cross-validation score on a particular training data set.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
candidates=None,
|
||||
evaluator=ClassifierEvaluator()):
|
||||
def __init__(
|
||||
self,
|
||||
candidates=None,
|
||||
evaluator=ClassifierEvaluator(),
|
||||
):
|
||||
"""
|
||||
Initializes the meta-classifier.
|
||||
"""
|
||||
@@ -35,14 +37,13 @@ class AdaptiveClassifier(Classifier):
|
||||
"min samples": 100,
|
||||
},
|
||||
"logistic": {
|
||||
"classifier": make_pipeline(StandardScaler(),
|
||||
LogisticRegression()),
|
||||
"classifier": make_pipeline(StandardScaler(), LogisticRegression()),
|
||||
"min samples": 30,
|
||||
},
|
||||
"counting": {
|
||||
"classifier": CountingClassifier(),
|
||||
"min samples": 0,
|
||||
}
|
||||
},
|
||||
}
|
||||
self.candidates = candidates
|
||||
self.evaluator = evaluator
|
||||
|
||||
@@ -21,8 +21,7 @@ class CountingClassifier(Classifier):
|
||||
self.mean = np.mean(y_train)
|
||||
|
||||
def predict_proba(self, x_test):
|
||||
return np.array([[1 - self.mean, self.mean]
|
||||
for _ in range(x_test.shape[0])])
|
||||
return np.array([[1 - self.mean, self.mean] for _ in range(x_test.shape[0])])
|
||||
|
||||
def __repr__(self):
|
||||
return "CountingClassifier(mean=%s)" % self.mean
|
||||
|
||||
@@ -11,6 +11,7 @@ from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.model_selection import cross_val_score
|
||||
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -28,12 +29,14 @@ class CrossValidatedClassifier(Classifier):
|
||||
acceptable. Other numbers are a linear interpolation of these two extremes.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
classifier=LogisticRegression(),
|
||||
threshold=0.75,
|
||||
constant=0.0,
|
||||
cv=5,
|
||||
scoring='accuracy'):
|
||||
def __init__(
|
||||
self,
|
||||
classifier=LogisticRegression(),
|
||||
threshold=0.75,
|
||||
constant=0.0,
|
||||
cv=5,
|
||||
scoring="accuracy",
|
||||
):
|
||||
self.classifier = None
|
||||
self.classifier_prototype = classifier
|
||||
self.constant = constant
|
||||
@@ -45,24 +48,36 @@ class CrossValidatedClassifier(Classifier):
|
||||
# Calculate dummy score and absolute score threshold
|
||||
y_train_avg = np.average(y_train)
|
||||
dummy_score = max(y_train_avg, 1 - y_train_avg)
|
||||
absolute_threshold = 1. * self.threshold + dummy_score * (1 - self.threshold)
|
||||
absolute_threshold = 1.0 * self.threshold + dummy_score * (1 - self.threshold)
|
||||
|
||||
# Calculate cross validation score and decide which classifier to use
|
||||
clf = deepcopy(self.classifier_prototype)
|
||||
cv_score = float(np.mean(cross_val_score(clf,
|
||||
x_train,
|
||||
y_train,
|
||||
cv=self.cv,
|
||||
scoring=self.scoring)))
|
||||
cv_score = float(
|
||||
np.mean(
|
||||
cross_val_score(
|
||||
clf,
|
||||
x_train,
|
||||
y_train,
|
||||
cv=self.cv,
|
||||
scoring=self.scoring,
|
||||
)
|
||||
)
|
||||
)
|
||||
if cv_score >= absolute_threshold:
|
||||
logger.debug("cv_score is above threshold (%.2f >= %.2f); keeping" %
|
||||
(cv_score, absolute_threshold))
|
||||
logger.debug(
|
||||
"cv_score is above threshold (%.2f >= %.2f); keeping"
|
||||
% (cv_score, absolute_threshold)
|
||||
)
|
||||
self.classifier = clf
|
||||
else:
|
||||
logger.debug("cv_score is below threshold (%.2f < %.2f); discarding" %
|
||||
(cv_score, absolute_threshold))
|
||||
self.classifier = DummyClassifier(strategy="constant",
|
||||
constant=self.constant)
|
||||
logger.debug(
|
||||
"cv_score is below threshold (%.2f < %.2f); discarding"
|
||||
% (cv_score, absolute_threshold)
|
||||
)
|
||||
self.classifier = DummyClassifier(
|
||||
strategy="constant",
|
||||
constant=self.constant,
|
||||
)
|
||||
|
||||
# Train chosen classifier
|
||||
self.classifier.fit(x_train, y_train)
|
||||
|
||||
@@ -12,7 +12,6 @@ E = 0.1
|
||||
def test_counting():
|
||||
clf = CountingClassifier()
|
||||
clf.fit(np.zeros((8, 25)), [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0])
|
||||
expected_proba = np.array([[0.375, 0.625],
|
||||
[0.375, 0.625]])
|
||||
expected_proba = np.array([[0.375, 0.625], [0.375, 0.625]])
|
||||
actual_proba = clf.predict_proba(np.zeros((2, 25)))
|
||||
assert norm(actual_proba - expected_proba) < E
|
||||
|
||||
@@ -13,34 +13,36 @@ E = 0.1
|
||||
|
||||
def test_cv():
|
||||
# Training set: label is true if point is inside a 2D circle
|
||||
x_train = np.array([[x1, x2]
|
||||
for x1 in range(-10, 11)
|
||||
for x2 in range(-10, 11)])
|
||||
x_train = np.array([[x1, x2] for x1 in range(-10, 11) for x2 in range(-10, 11)])
|
||||
x_train = StandardScaler().fit_transform(x_train)
|
||||
n_samples = x_train.shape[0]
|
||||
|
||||
y_train = np.array([1.0 if x1*x1 + x2*x2 <= 100 else 0.0
|
||||
for x1 in range(-10, 11)
|
||||
for x2 in range(-10, 11)])
|
||||
y_train = np.array(
|
||||
[
|
||||
1.0 if x1 * x1 + x2 * x2 <= 100 else 0.0
|
||||
for x1 in range(-10, 11)
|
||||
for x2 in range(-10, 11)
|
||||
]
|
||||
)
|
||||
|
||||
# Support vector machines with linear kernels do not perform well on this
|
||||
# data set, so predictor should return the given constant.
|
||||
clf = CrossValidatedClassifier(classifier=SVC(probability=True,
|
||||
random_state=42),
|
||||
threshold=0.90,
|
||||
constant=0.0,
|
||||
cv=30)
|
||||
clf = CrossValidatedClassifier(
|
||||
classifier=SVC(probability=True, random_state=42),
|
||||
threshold=0.90,
|
||||
constant=0.0,
|
||||
cv=30,
|
||||
)
|
||||
clf.fit(x_train, y_train)
|
||||
assert norm(np.zeros(n_samples) - clf.predict(x_train)) < E
|
||||
|
||||
# Support vector machines with quadratic kernels perform almost perfectly
|
||||
# on this data set, so predictor should return their prediction.
|
||||
clf = CrossValidatedClassifier(classifier=SVC(probability=True,
|
||||
kernel='poly',
|
||||
degree=2,
|
||||
random_state=42),
|
||||
threshold=0.90,
|
||||
cv=30)
|
||||
clf = CrossValidatedClassifier(
|
||||
classifier=SVC(probability=True, kernel="poly", degree=2, random_state=42),
|
||||
threshold=0.90,
|
||||
cv=30,
|
||||
)
|
||||
clf.fit(x_train, y_train)
|
||||
print(y_train - clf.predict(x_train))
|
||||
assert norm(y_train - clf.predict(x_train)) < E
|
||||
|
||||
@@ -17,4 +17,3 @@ def test_evaluator():
|
||||
ev = ClassifierEvaluator()
|
||||
assert ev.evaluate(clf_a, x_train, y_train) == 1.0
|
||||
assert ev.evaluate(clf_b, x_train, y_train) == 0.5
|
||||
|
||||
|
||||
@@ -11,12 +11,16 @@ from miplearn.classifiers.threshold import MinPrecisionThreshold
|
||||
|
||||
def test_threshold_dynamic():
|
||||
clf = Mock(spec=Classifier)
|
||||
clf.predict_proba = Mock(return_value=np.array([
|
||||
[0.10, 0.90],
|
||||
[0.10, 0.90],
|
||||
[0.20, 0.80],
|
||||
[0.30, 0.70],
|
||||
]))
|
||||
clf.predict_proba = Mock(
|
||||
return_value=np.array(
|
||||
[
|
||||
[0.10, 0.90],
|
||||
[0.10, 0.90],
|
||||
[0.20, 0.80],
|
||||
[0.30, 0.70],
|
||||
]
|
||||
)
|
||||
)
|
||||
x_train = np.array([0, 1, 2, 3])
|
||||
y_train = np.array([1, 1, 0, 0])
|
||||
|
||||
@@ -31,4 +35,3 @@ def test_threshold_dynamic():
|
||||
|
||||
threshold = MinPrecisionThreshold(min_precision=0.00)
|
||||
assert threshold.find(clf, x_train, y_train) == 0.70
|
||||
|
||||
|
||||
@@ -30,11 +30,15 @@ class MinPrecisionThreshold(DynamicThreshold):
|
||||
def find(self, clf, x_train, y_train):
|
||||
proba = clf.predict_proba(x_train)
|
||||
|
||||
assert isinstance(proba, np.ndarray), \
|
||||
"classifier should return numpy array"
|
||||
assert proba.shape == (x_train.shape[0], 2), \
|
||||
"classifier should return (%d,%d)-shaped array, not %s" % (
|
||||
x_train.shape[0], 2, str(proba.shape))
|
||||
assert isinstance(proba, np.ndarray), "classifier should return numpy array"
|
||||
assert proba.shape == (
|
||||
x_train.shape[0],
|
||||
2,
|
||||
), "classifier should return (%d,%d)-shaped array, not %s" % (
|
||||
x_train.shape[0],
|
||||
2,
|
||||
str(proba.shape),
|
||||
)
|
||||
|
||||
fps, tps, thresholds = _binary_clf_curve(y_train, proba[:, 1])
|
||||
precision = tps / (tps + fps)
|
||||
|
||||
Reference in New Issue
Block a user