You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
MIPLearn/miplearn/classifiers/adaptive.py

136 lines
5.0 KiB

# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
# Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
# Released under the modified BSD license. See COPYING.md for more details.
import logging
from typing import Dict, Optional
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import cross_val_predict
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from miplearn.classifiers import Classifier
from miplearn.classifiers.counting import CountingClassifier
from miplearn.classifiers.sklearn import ScikitLearnClassifier
logger = logging.getLogger(__name__)
class CandidateClassifierSpecs:
"""
Specifications describing how to construct a certain classifier, and under
which circumstances it can be used.
Parameters
----------
min_samples: int
Minimum number of samples for this classifier to be considered.
classifier: Callable[[], Classifier]
Callable that constructs the classifier.
"""
def __init__(
self,
classifier: Classifier,
min_samples: int = 0,
) -> None:
self.min_samples = min_samples
self.classifier = classifier
class AdaptiveClassifier(Classifier):
"""
A meta-classifier which dynamically selects what actual classifier to use
based on its cross-validation score on a particular training data set.
Parameters
----------
candidates: Dict[str, CandidateClassifierSpecs]
A dictionary of candidate classifiers to consider, mapping the name of the
candidate to its specs, which describes how to construct it and under what
scenarios. If no candidates are provided, uses a fixed set of defaults,
which includes `CountingClassifier`, `KNeighborsClassifier` and
`LogisticRegression`.
"""
def __init__(
self,
candidates: Optional[Dict[str, CandidateClassifierSpecs]] = None,
) -> None:
super().__init__()
if candidates is None:
candidates = {
"forest(5,10)": CandidateClassifierSpecs(
classifier=ScikitLearnClassifier(
RandomForestClassifier(
n_estimators=5,
min_samples_split=10,
),
),
min_samples=100,
),
"knn(100)": CandidateClassifierSpecs(
classifier=ScikitLearnClassifier(
KNeighborsClassifier(n_neighbors=100)
),
min_samples=100,
),
"logistic": CandidateClassifierSpecs(
classifier=ScikitLearnClassifier(
make_pipeline(
StandardScaler(),
LogisticRegression(),
)
),
min_samples=30,
),
"counting": CandidateClassifierSpecs(
classifier=CountingClassifier(),
),
}
self.candidates = candidates
self.classifier: Optional[Classifier] = None
def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
super().fit(x_train, y_train)
n_samples = x_train.shape[0]
assert y_train.shape == (n_samples, 2)
# If almost all samples belong to the same class, return a fixed prediction and
# skip all the other steps.
if y_train[:, 0].mean() > 0.99 or y_train[:, 1].mean() > 0.99:
self.classifier = CountingClassifier()
self.classifier.fit(x_train, y_train)
return
best_name, best_clf, best_score = None, None, -float("inf")
for (name, specs) in self.candidates.items():
if n_samples < specs.min_samples:
continue
clf = specs.classifier.clone()
if isinstance(clf, ScikitLearnClassifier):
proba = cross_val_predict(clf.inner_clf, x_train, y_train[:, 1])
else:
clf.fit(x_train, y_train)
proba = clf.predict_proba(x_train)[:, 1]
score = roc_auc_score(y_train[:, 1], proba)
if score > best_score:
best_name, best_clf, best_score = name, clf, score
logger.debug("Best classifier: %s (score=%.3f)" % (best_name, best_score))
if isinstance(best_clf, ScikitLearnClassifier):
best_clf.fit(x_train, y_train)
self.classifier = best_clf
def predict_proba(self, x_test: np.ndarray) -> np.ndarray:
super().predict_proba(x_test)
assert self.classifier is not None
return self.classifier.predict_proba(x_test)
def clone(self) -> "AdaptiveClassifier":
return AdaptiveClassifier(self.candidates)