mirror of
https://github.com/ANL-CEEESA/MIPLearn.git
synced 2025-12-06 09:28:51 -06:00
AdaptiveClassifier: Refactor and add tests
This commit is contained in:
@@ -4,63 +4,107 @@
|
||||
|
||||
import logging
|
||||
from copy import deepcopy
|
||||
from typing import Any, Dict
|
||||
from typing import Dict, Callable, Optional
|
||||
|
||||
import numpy as np
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.metrics import roc_auc_score
|
||||
from sklearn.neighbors import KNeighborsClassifier
|
||||
from sklearn.pipeline import make_pipeline
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
|
||||
from miplearn.classifiers import Classifier
|
||||
from miplearn.classifiers import Classifier, ScikitLearnClassifier
|
||||
from miplearn.classifiers.counting import CountingClassifier
|
||||
from miplearn.classifiers.evaluator import ClassifierEvaluator
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CandidateClassifierSpecs:
|
||||
"""
|
||||
Specifications describing how to construct a certain classifier, and under
|
||||
which circumstances it can be used.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
min_samples: int
|
||||
Minimum number of samples for this classifier to be considered.
|
||||
classifier: Callable[[], Classifier]
|
||||
Callable that constructs the classifier.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
classifier: Callable[[], Classifier],
|
||||
min_samples: int = 0,
|
||||
) -> None:
|
||||
self.min_samples = min_samples
|
||||
self.classifier = classifier
|
||||
|
||||
|
||||
class AdaptiveClassifier(Classifier):
|
||||
"""
|
||||
A meta-classifier which dynamically selects what actual classifier to use
|
||||
based on its cross-validation score on a particular training data set.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
candidates: Dict[str, CandidateClassifierSpecs]
|
||||
A dictionary of candidate classifiers to consider, mapping the name of the
|
||||
candidate to its specs, which describes how to construct it and under what
|
||||
scenarios. If no candidates are provided, uses a fixed set of defaults,
|
||||
which includes `CountingClassifier`, `KNeighborsClassifier` and
|
||||
`LogisticRegression`.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
candidates: Dict[str, Any] = None,
|
||||
evaluator: ClassifierEvaluator = ClassifierEvaluator(),
|
||||
candidates: Dict[str, CandidateClassifierSpecs] = None,
|
||||
) -> None:
|
||||
super().__init__()
|
||||
if candidates is None:
|
||||
candidates = {
|
||||
"knn(100)": {
|
||||
"classifier": KNeighborsClassifier(n_neighbors=100),
|
||||
"min samples": 100,
|
||||
},
|
||||
"logistic": {
|
||||
"classifier": make_pipeline(StandardScaler(), LogisticRegression()),
|
||||
"min samples": 30,
|
||||
},
|
||||
"counting": {
|
||||
"classifier": CountingClassifier(),
|
||||
"min samples": 0,
|
||||
},
|
||||
"knn(100)": CandidateClassifierSpecs(
|
||||
classifier=lambda: ScikitLearnClassifier(
|
||||
KNeighborsClassifier(n_neighbors=100)
|
||||
),
|
||||
min_samples=100,
|
||||
),
|
||||
"logistic": CandidateClassifierSpecs(
|
||||
classifier=lambda: ScikitLearnClassifier(
|
||||
make_pipeline(
|
||||
StandardScaler(),
|
||||
LogisticRegression(),
|
||||
)
|
||||
),
|
||||
min_samples=30,
|
||||
),
|
||||
"counting": CandidateClassifierSpecs(
|
||||
classifier=lambda: CountingClassifier(),
|
||||
),
|
||||
}
|
||||
self.candidates = candidates
|
||||
self.evaluator = evaluator
|
||||
self.classifier = None
|
||||
self.classifier: Optional[Classifier] = None
|
||||
|
||||
def fit(self, x_train, y_train):
|
||||
best_name, best_clf, best_score = None, None, -float("inf")
|
||||
def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
|
||||
super().fit(x_train, y_train)
|
||||
n_samples = x_train.shape[0]
|
||||
for (name, clf_dict) in self.candidates.items():
|
||||
if n_samples < clf_dict["min samples"]:
|
||||
assert y_train.shape == (n_samples, 2)
|
||||
|
||||
best_name, best_clf, best_score = None, None, -float("inf")
|
||||
for (name, specs) in self.candidates.items():
|
||||
if n_samples < specs.min_samples:
|
||||
continue
|
||||
clf = deepcopy(clf_dict["classifier"])
|
||||
clf = specs.classifier()
|
||||
clf.fit(x_train, y_train)
|
||||
score = self.evaluator.evaluate(clf, x_train, y_train)
|
||||
proba = clf.predict_proba(x_train)
|
||||
# FIXME: Switch to k-fold cross validation
|
||||
score = roc_auc_score(y_train[:, 1], proba[:, 1])
|
||||
if score > best_score:
|
||||
best_name, best_clf, best_score = name, clf, score
|
||||
logger.debug("Best classifier: %s (score=%.3f)" % (best_name, best_score))
|
||||
self.classifier = best_clf
|
||||
|
||||
def predict_proba(self, x_test):
|
||||
def predict_proba(self, x_test: np.ndarray) -> np.ndarray:
|
||||
super().predict_proba(x_test)
|
||||
assert self.classifier is not None
|
||||
return self.classifier.predict_proba(x_test)
|
||||
|
||||
@@ -1,15 +0,0 @@
|
||||
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
|
||||
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
|
||||
# Released under the modified BSD license. See COPYING.md for more details.
|
||||
|
||||
from sklearn.metrics import roc_auc_score
|
||||
|
||||
|
||||
class ClassifierEvaluator:
|
||||
def __init__(self) -> None:
|
||||
pass
|
||||
|
||||
def evaluate(self, clf, x_train, y_train):
|
||||
# FIXME: use cross-validation
|
||||
proba = clf.predict_proba(x_train)
|
||||
return roc_auc_score(y_train, proba[:, 1])
|
||||
Reference in New Issue
Block a user