diff --git a/src/python/miplearn/classifiers/adaptive.py b/src/python/miplearn/classifiers/adaptive.py index e332850..37d552e 100644 --- a/src/python/miplearn/classifiers/adaptive.py +++ b/src/python/miplearn/classifiers/adaptive.py @@ -7,7 +7,9 @@ from copy import deepcopy from miplearn.classifiers import Classifier from miplearn.classifiers.counting import CountingClassifier +from miplearn.classifiers.evaluator import ClassifierEvaluator from sklearn.linear_model import LogisticRegression +from sklearn.neighbors import KNeighborsClassifier from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler @@ -17,45 +19,48 @@ logger = logging.getLogger(__name__) class AdaptiveClassifier(Classifier): """ A meta-classifier which dynamically selects what actual classifier to use - based on the number of samples in the training data. - - By default, uses CountingClassifier for less than 30 samples and - LogisticRegression (with standard scaling) for 30 or more samples. + based on its cross-validation score on a particular training data set. """ - def __init__(self, classifiers=None): + def __init__(self, + candidates=None, + evaluator=ClassifierEvaluator()): """ - Initializes the classifier. - - The `classifiers` argument must be a list of tuples where the second element - of the tuple is the classifier and the first element is the number of - samples required. For example, if `classifiers` is set to - ``` - [(100, ClassifierA()), - (50, ClassifierB()), - (0, ClassifierC())] - ``` then ClassifierA will be used if n_samples >= 100, ClassifierB will - be used if 100 > n_samples >= 50 and ClassifierC will be used if - 50 > n_samples. The list must be ordered in (strictly) decreasing order. + Initializes the meta-classifier. """ - if classifiers is None: - classifiers = [ - (30, make_pipeline(StandardScaler(), LogisticRegression())), - (0, CountingClassifier()) - ] - self.available_classifiers = classifiers + if candidates is None: + candidates = { + "knn(100)": { + "classifier": KNeighborsClassifier(n_neighbors=100), + "min samples": 100, + }, + "logistic": { + "classifier": make_pipeline(StandardScaler(), + LogisticRegression()), + "min samples": 30, + }, + "counting": { + "classifier": CountingClassifier(), + "min samples": 0, + } + } + self.candidates = candidates + self.evaluator = evaluator self.classifier = None def fit(self, x_train, y_train): + best_clf = None + best_score = -float("inf") n_samples = x_train.shape[0] - - for (min_samples, clf_prototype) in self.available_classifiers: - if n_samples >= min_samples: - self.classifier = deepcopy(clf_prototype) - self.classifier.fit(x_train, y_train) - break + for clf_dict in self.candidates.values(): + if n_samples < clf_dict["min samples"]: + continue + clf = deepcopy(clf_dict["classifier"]) + clf.fit(x_train, y_train) + score = self.evaluator.evaluate(clf, x_train, y_train) + if score > best_score: + best_clf, best_score = clf, score + self.classifier = best_clf def predict_proba(self, x_test): return self.classifier.predict_proba(x_test) - - diff --git a/src/python/miplearn/classifiers/evaluator.py b/src/python/miplearn/classifiers/evaluator.py new file mode 100644 index 0000000..5ff0f30 --- /dev/null +++ b/src/python/miplearn/classifiers/evaluator.py @@ -0,0 +1,15 @@ +# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization +# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved. +# Released under the modified BSD license. See COPYING.md for more details. + +from sklearn.metrics import roc_auc_score + + +class ClassifierEvaluator: + def __init__(self): + pass + + def evaluate(self, clf, x_train, y_train): + # FIXME: use cross-validation + proba = clf.predict_proba(x_train) + return roc_auc_score(y_train, proba[:, 1]) diff --git a/src/python/miplearn/classifiers/tests/test_evaluator.py b/src/python/miplearn/classifiers/tests/test_evaluator.py new file mode 100644 index 0000000..a7f16d6 --- /dev/null +++ b/src/python/miplearn/classifiers/tests/test_evaluator.py @@ -0,0 +1,20 @@ +# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization +# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved. +# Released under the modified BSD license. See COPYING.md for more details. + +import numpy as np +from miplearn.classifiers.evaluator import ClassifierEvaluator +from sklearn.neighbors import KNeighborsClassifier + + +def test_evaluator(): + clf_a = KNeighborsClassifier(n_neighbors=1) + clf_b = KNeighborsClassifier(n_neighbors=2) + x_train = np.array([[0, 0], [1, 0]]) + y_train = np.array([0, 1]) + clf_a.fit(x_train, y_train) + clf_b.fit(x_train, y_train) + ev = ClassifierEvaluator() + assert ev.evaluate(clf_a, x_train, y_train) == 1.0 + assert ev.evaluate(clf_b, x_train, y_train) == 0.5 +