You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
MIPLearn/miplearn/classifiers/adaptive.py

67 lines
2.4 KiB

# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
# Released under the modified BSD license. See COPYING.md for more details.
import logging
from copy import deepcopy
from typing import Any, Dict
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from miplearn.classifiers import Classifier
from miplearn.classifiers.counting import CountingClassifier
from miplearn.classifiers.evaluator import ClassifierEvaluator
logger = logging.getLogger(__name__)
class AdaptiveClassifier(Classifier):
"""
A meta-classifier which dynamically selects what actual classifier to use
based on its cross-validation score on a particular training data set.
"""
def __init__(
self,
candidates: Dict[str, Any] = None,
evaluator: ClassifierEvaluator = ClassifierEvaluator(),
) -> None:
if candidates is None:
candidates = {
"knn(100)": {
"classifier": KNeighborsClassifier(n_neighbors=100),
"min samples": 100,
},
"logistic": {
"classifier": make_pipeline(StandardScaler(), LogisticRegression()),
"min samples": 30,
},
"counting": {
"classifier": CountingClassifier(),
"min samples": 0,
},
}
self.candidates = candidates
self.evaluator = evaluator
self.classifier = None
def fit(self, x_train, y_train):
best_name, best_clf, best_score = None, None, -float("inf")
n_samples = x_train.shape[0]
for (name, clf_dict) in self.candidates.items():
if n_samples < clf_dict["min samples"]:
continue
clf = deepcopy(clf_dict["classifier"])
clf.fit(x_train, y_train)
score = self.evaluator.evaluate(clf, x_train, y_train)
if score > best_score:
best_name, best_clf, best_score = name, clf, score
logger.debug("Best classifier: %s (score=%.3f)" % (best_name, best_score))
self.classifier = best_clf
def predict_proba(self, x_test):
return self.classifier.predict_proba(x_test)