mirror of
https://github.com/ANL-CEEESA/MIPLearn.git
synced 2025-12-06 09:28:51 -06:00
Implement CrossValidatedClassifier
This commit is contained in:
@@ -4,6 +4,8 @@
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
class Classifier(ABC):
|
||||
@abstractmethod
|
||||
@@ -14,6 +16,12 @@ class Classifier(ABC):
|
||||
def predict_proba(self, x_test):
|
||||
pass
|
||||
|
||||
def predict(self, x_test):
|
||||
proba = self.predict_proba(x_test)
|
||||
assert isinstance(proba, np.ndarray)
|
||||
assert proba.shape == (x_test.shape[0], 2)
|
||||
return (proba[:, 1] > 0.5).astype(float)
|
||||
|
||||
|
||||
class Regressor(ABC):
|
||||
@abstractmethod
|
||||
|
||||
71
src/python/miplearn/classifiers/cv.py
Normal file
71
src/python/miplearn/classifiers/cv.py
Normal file
@@ -0,0 +1,71 @@
|
||||
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
|
||||
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
|
||||
# Released under the modified BSD license. See COPYING.md for more details.
|
||||
|
||||
from copy import deepcopy
|
||||
|
||||
import numpy as np
|
||||
from miplearn.classifiers import Classifier
|
||||
from sklearn.dummy import DummyClassifier
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.model_selection import cross_val_score
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CrossValidatedClassifier(Classifier):
|
||||
"""
|
||||
A meta-classifier that, upon training, evaluates the performance of another
|
||||
classifier on the training data set using k-fold cross validation, then
|
||||
either adopts the other classifier it if the cv-score is high enough, or
|
||||
returns a constant label for every x_test otherwise.
|
||||
|
||||
The threshold is specified in comparison to a dummy classifier trained
|
||||
on the same dataset. For example, a threshold of 0.0 indicates that any
|
||||
classifier as good as the dummy predictor is acceptable. A threshold of 1.0
|
||||
indicates that only classifier with a perfect cross-validation score are
|
||||
acceptable. Other numbers are a linear interpolation of these two extremes.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
classifier=LogisticRegression(),
|
||||
threshold=0.75,
|
||||
constant=0.0,
|
||||
cv=5,
|
||||
scoring='accuracy'):
|
||||
self.classifier = None
|
||||
self.classifier_prototype = classifier
|
||||
self.constant = constant
|
||||
self.threshold = threshold
|
||||
self.cv = cv
|
||||
self.scoring = scoring
|
||||
|
||||
def fit(self, x_train, y_train):
|
||||
# Calculate dummy score and absolute score threshold
|
||||
y_train_avg = np.average(y_train)
|
||||
dummy_score = max(y_train_avg, 1 - y_train_avg)
|
||||
absolute_threshold = 1. * self.threshold + dummy_score * (1 - self.threshold)
|
||||
|
||||
# Calculate cross validation score and decide which classifier to use
|
||||
clf = deepcopy(self.classifier_prototype)
|
||||
cv_score = float(np.mean(cross_val_score(clf,
|
||||
x_train,
|
||||
y_train,
|
||||
cv=self.cv,
|
||||
scoring=self.scoring)))
|
||||
if cv_score >= absolute_threshold:
|
||||
logger.debug("cv_score is above threshold (%.2f >= %.2f); keeping" %
|
||||
(cv_score, absolute_threshold))
|
||||
self.classifier = clf
|
||||
else:
|
||||
logger.debug("cv_score is below threshold (%.2f < %.2f); discarding" %
|
||||
(cv_score, absolute_threshold))
|
||||
self.classifier = DummyClassifier(strategy="constant",
|
||||
constant=self.constant)
|
||||
|
||||
# Train chosen classifier
|
||||
self.classifier.fit(x_train, y_train)
|
||||
|
||||
def predict_proba(self, x_test):
|
||||
return self.classifier.predict_proba(x_test)
|
||||
46
src/python/miplearn/classifiers/tests/test_cv.py
Normal file
46
src/python/miplearn/classifiers/tests/test_cv.py
Normal file
@@ -0,0 +1,46 @@
|
||||
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
|
||||
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
|
||||
# Released under the modified BSD license. See COPYING.md for more details.
|
||||
|
||||
import numpy as np
|
||||
from miplearn.classifiers.cv import CrossValidatedClassifier
|
||||
from numpy.linalg import norm
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.svm import SVC
|
||||
|
||||
E = 0.1
|
||||
|
||||
|
||||
def test_cv():
|
||||
# Training set: label is true if point is inside a 2D circle
|
||||
x_train = np.array([[x1, x2]
|
||||
for x1 in range(-10, 11)
|
||||
for x2 in range(-10, 11)])
|
||||
x_train = StandardScaler().fit_transform(x_train)
|
||||
n_samples = x_train.shape[0]
|
||||
|
||||
y_train = np.array([1.0 if x1*x1 + x2*x2 <= 100 else 0.0
|
||||
for x1 in range(-10, 11)
|
||||
for x2 in range(-10, 11)])
|
||||
|
||||
# Support vector machines with linear kernels do not perform well on this
|
||||
# data set, so predictor should return the given constant.
|
||||
clf = CrossValidatedClassifier(classifier=SVC(probability=True,
|
||||
random_state=42),
|
||||
threshold=0.90,
|
||||
constant=0.0,
|
||||
cv=30)
|
||||
clf.fit(x_train, y_train)
|
||||
assert norm(np.zeros(n_samples) - clf.predict(x_train)) < E
|
||||
|
||||
# Support vector machines with quadratic kernels perform almost perfectly
|
||||
# on this data set, so predictor should return their prediction.
|
||||
clf = CrossValidatedClassifier(classifier=SVC(probability=True,
|
||||
kernel='poly',
|
||||
degree=2,
|
||||
random_state=42),
|
||||
threshold=0.90,
|
||||
cv=30)
|
||||
clf.fit(x_train, y_train)
|
||||
print(y_train - clf.predict(x_train))
|
||||
assert norm(y_train - clf.predict(x_train)) < E
|
||||
Reference in New Issue
Block a user