Make AdaptiveClassifier pick best classifier based on score

pull/3/head
Alinson S. Xavier 6 years ago
parent 89e5b9c2bf
commit 4c152d60f7

@ -7,7 +7,9 @@ from copy import deepcopy
from miplearn.classifiers import Classifier
from miplearn.classifiers.counting import CountingClassifier
from miplearn.classifiers.evaluator import ClassifierEvaluator
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
@ -17,45 +19,48 @@ logger = logging.getLogger(__name__)
class AdaptiveClassifier(Classifier):
"""
A meta-classifier which dynamically selects what actual classifier to use
based on the number of samples in the training data.
By default, uses CountingClassifier for less than 30 samples and
LogisticRegression (with standard scaling) for 30 or more samples.
based on its cross-validation score on a particular training data set.
"""
def __init__(self, classifiers=None):
def __init__(self,
candidates=None,
evaluator=ClassifierEvaluator()):
"""
Initializes the classifier.
The `classifiers` argument must be a list of tuples where the second element
of the tuple is the classifier and the first element is the number of
samples required. For example, if `classifiers` is set to
```
[(100, ClassifierA()),
(50, ClassifierB()),
(0, ClassifierC())]
``` then ClassifierA will be used if n_samples >= 100, ClassifierB will
be used if 100 > n_samples >= 50 and ClassifierC will be used if
50 > n_samples. The list must be ordered in (strictly) decreasing order.
Initializes the meta-classifier.
"""
if classifiers is None:
classifiers = [
(30, make_pipeline(StandardScaler(), LogisticRegression())),
(0, CountingClassifier())
]
self.available_classifiers = classifiers
if candidates is None:
candidates = {
"knn(100)": {
"classifier": KNeighborsClassifier(n_neighbors=100),
"min samples": 100,
},
"logistic": {
"classifier": make_pipeline(StandardScaler(),
LogisticRegression()),
"min samples": 30,
},
"counting": {
"classifier": CountingClassifier(),
"min samples": 0,
}
}
self.candidates = candidates
self.evaluator = evaluator
self.classifier = None
def fit(self, x_train, y_train):
best_clf = None
best_score = -float("inf")
n_samples = x_train.shape[0]
for (min_samples, clf_prototype) in self.available_classifiers:
if n_samples >= min_samples:
self.classifier = deepcopy(clf_prototype)
self.classifier.fit(x_train, y_train)
break
for clf_dict in self.candidates.values():
if n_samples < clf_dict["min samples"]:
continue
clf = deepcopy(clf_dict["classifier"])
clf.fit(x_train, y_train)
score = self.evaluator.evaluate(clf, x_train, y_train)
if score > best_score:
best_clf, best_score = clf, score
self.classifier = best_clf
def predict_proba(self, x_test):
return self.classifier.predict_proba(x_test)

@ -0,0 +1,15 @@
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
# Released under the modified BSD license. See COPYING.md for more details.
from sklearn.metrics import roc_auc_score
class ClassifierEvaluator:
def __init__(self):
pass
def evaluate(self, clf, x_train, y_train):
# FIXME: use cross-validation
proba = clf.predict_proba(x_train)
return roc_auc_score(y_train, proba[:, 1])

@ -0,0 +1,20 @@
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
# Released under the modified BSD license. See COPYING.md for more details.
import numpy as np
from miplearn.classifiers.evaluator import ClassifierEvaluator
from sklearn.neighbors import KNeighborsClassifier
def test_evaluator():
clf_a = KNeighborsClassifier(n_neighbors=1)
clf_b = KNeighborsClassifier(n_neighbors=2)
x_train = np.array([[0, 0], [1, 0]])
y_train = np.array([0, 1])
clf_a.fit(x_train, y_train)
clf_b.fit(x_train, y_train)
ev = ClassifierEvaluator()
assert ev.evaluate(clf_a, x_train, y_train) == 1.0
assert ev.evaluate(clf_b, x_train, y_train) == 0.5
Loading…
Cancel
Save