mirror of
https://github.com/ANL-CEEESA/MIPLearn.git
synced 2025-12-06 09:28:51 -06:00
Move python files to root folder; remove built docs
This commit is contained in:
33
miplearn/classifiers/__init__.py
Normal file
33
miplearn/classifiers/__init__.py
Normal file
@@ -0,0 +1,33 @@
|
||||
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
|
||||
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
|
||||
# Released under the modified BSD license. See COPYING.md for more details.
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
class Classifier(ABC):
|
||||
@abstractmethod
|
||||
def fit(self, x_train, y_train):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def predict_proba(self, x_test):
|
||||
pass
|
||||
|
||||
def predict(self, x_test):
|
||||
proba = self.predict_proba(x_test)
|
||||
assert isinstance(proba, np.ndarray)
|
||||
assert proba.shape == (x_test.shape[0], 2)
|
||||
return (proba[:, 1] > 0.5).astype(float)
|
||||
|
||||
|
||||
class Regressor(ABC):
|
||||
@abstractmethod
|
||||
def fit(self, x_train, y_train):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def predict(self):
|
||||
pass
|
||||
66
miplearn/classifiers/adaptive.py
Normal file
66
miplearn/classifiers/adaptive.py
Normal file
@@ -0,0 +1,66 @@
|
||||
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
|
||||
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
|
||||
# Released under the modified BSD license. See COPYING.md for more details.
|
||||
|
||||
import logging
|
||||
from copy import deepcopy
|
||||
|
||||
from miplearn.classifiers import Classifier
|
||||
from miplearn.classifiers.counting import CountingClassifier
|
||||
from miplearn.classifiers.evaluator import ClassifierEvaluator
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.neighbors import KNeighborsClassifier
|
||||
from sklearn.pipeline import make_pipeline
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AdaptiveClassifier(Classifier):
|
||||
"""
|
||||
A meta-classifier which dynamically selects what actual classifier to use
|
||||
based on its cross-validation score on a particular training data set.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
candidates=None,
|
||||
evaluator=ClassifierEvaluator()):
|
||||
"""
|
||||
Initializes the meta-classifier.
|
||||
"""
|
||||
if candidates is None:
|
||||
candidates = {
|
||||
"knn(100)": {
|
||||
"classifier": KNeighborsClassifier(n_neighbors=100),
|
||||
"min samples": 100,
|
||||
},
|
||||
"logistic": {
|
||||
"classifier": make_pipeline(StandardScaler(),
|
||||
LogisticRegression()),
|
||||
"min samples": 30,
|
||||
},
|
||||
"counting": {
|
||||
"classifier": CountingClassifier(),
|
||||
"min samples": 0,
|
||||
}
|
||||
}
|
||||
self.candidates = candidates
|
||||
self.evaluator = evaluator
|
||||
self.classifier = None
|
||||
|
||||
def fit(self, x_train, y_train):
|
||||
best_name, best_clf, best_score = None, None, -float("inf")
|
||||
n_samples = x_train.shape[0]
|
||||
for (name, clf_dict) in self.candidates.items():
|
||||
if n_samples < clf_dict["min samples"]:
|
||||
continue
|
||||
clf = deepcopy(clf_dict["classifier"])
|
||||
clf.fit(x_train, y_train)
|
||||
score = self.evaluator.evaluate(clf, x_train, y_train)
|
||||
if score > best_score:
|
||||
best_name, best_clf, best_score = name, clf, score
|
||||
logger.debug("Best classifier: %s (score=%.3f)" % (best_name, best_score))
|
||||
self.classifier = best_clf
|
||||
|
||||
def predict_proba(self, x_test):
|
||||
return self.classifier.predict_proba(x_test)
|
||||
28
miplearn/classifiers/counting.py
Normal file
28
miplearn/classifiers/counting.py
Normal file
@@ -0,0 +1,28 @@
|
||||
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
|
||||
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
|
||||
# Released under the modified BSD license. See COPYING.md for more details.
|
||||
|
||||
from miplearn.classifiers import Classifier
|
||||
import numpy as np
|
||||
|
||||
|
||||
class CountingClassifier(Classifier):
|
||||
"""
|
||||
A classifier that generates constant predictions, based only on the
|
||||
frequency of the training labels. For example, if y_train is [1.0, 0.0, 0.0]
|
||||
this classifier always returns [0.66 0.33] for any x_test. It essentially
|
||||
counts how many times each label appeared, hence the name.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.mean = None
|
||||
|
||||
def fit(self, x_train, y_train):
|
||||
self.mean = np.mean(y_train)
|
||||
|
||||
def predict_proba(self, x_test):
|
||||
return np.array([[1 - self.mean, self.mean]
|
||||
for _ in range(x_test.shape[0])])
|
||||
|
||||
def __repr__(self):
|
||||
return "CountingClassifier(mean=%s)" % self.mean
|
||||
71
miplearn/classifiers/cv.py
Normal file
71
miplearn/classifiers/cv.py
Normal file
@@ -0,0 +1,71 @@
|
||||
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
|
||||
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
|
||||
# Released under the modified BSD license. See COPYING.md for more details.
|
||||
|
||||
from copy import deepcopy
|
||||
|
||||
import numpy as np
|
||||
from miplearn.classifiers import Classifier
|
||||
from sklearn.dummy import DummyClassifier
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.model_selection import cross_val_score
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CrossValidatedClassifier(Classifier):
|
||||
"""
|
||||
A meta-classifier that, upon training, evaluates the performance of another
|
||||
classifier on the training data set using k-fold cross validation, then
|
||||
either adopts the other classifier it if the cv-score is high enough, or
|
||||
returns a constant label for every x_test otherwise.
|
||||
|
||||
The threshold is specified in comparison to a dummy classifier trained
|
||||
on the same dataset. For example, a threshold of 0.0 indicates that any
|
||||
classifier as good as the dummy predictor is acceptable. A threshold of 1.0
|
||||
indicates that only classifier with a perfect cross-validation score are
|
||||
acceptable. Other numbers are a linear interpolation of these two extremes.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
classifier=LogisticRegression(),
|
||||
threshold=0.75,
|
||||
constant=0.0,
|
||||
cv=5,
|
||||
scoring='accuracy'):
|
||||
self.classifier = None
|
||||
self.classifier_prototype = classifier
|
||||
self.constant = constant
|
||||
self.threshold = threshold
|
||||
self.cv = cv
|
||||
self.scoring = scoring
|
||||
|
||||
def fit(self, x_train, y_train):
|
||||
# Calculate dummy score and absolute score threshold
|
||||
y_train_avg = np.average(y_train)
|
||||
dummy_score = max(y_train_avg, 1 - y_train_avg)
|
||||
absolute_threshold = 1. * self.threshold + dummy_score * (1 - self.threshold)
|
||||
|
||||
# Calculate cross validation score and decide which classifier to use
|
||||
clf = deepcopy(self.classifier_prototype)
|
||||
cv_score = float(np.mean(cross_val_score(clf,
|
||||
x_train,
|
||||
y_train,
|
||||
cv=self.cv,
|
||||
scoring=self.scoring)))
|
||||
if cv_score >= absolute_threshold:
|
||||
logger.debug("cv_score is above threshold (%.2f >= %.2f); keeping" %
|
||||
(cv_score, absolute_threshold))
|
||||
self.classifier = clf
|
||||
else:
|
||||
logger.debug("cv_score is below threshold (%.2f < %.2f); discarding" %
|
||||
(cv_score, absolute_threshold))
|
||||
self.classifier = DummyClassifier(strategy="constant",
|
||||
constant=self.constant)
|
||||
|
||||
# Train chosen classifier
|
||||
self.classifier.fit(x_train, y_train)
|
||||
|
||||
def predict_proba(self, x_test):
|
||||
return self.classifier.predict_proba(x_test)
|
||||
15
miplearn/classifiers/evaluator.py
Normal file
15
miplearn/classifiers/evaluator.py
Normal file
@@ -0,0 +1,15 @@
|
||||
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
|
||||
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
|
||||
# Released under the modified BSD license. See COPYING.md for more details.
|
||||
|
||||
from sklearn.metrics import roc_auc_score
|
||||
|
||||
|
||||
class ClassifierEvaluator:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def evaluate(self, clf, x_train, y_train):
|
||||
# FIXME: use cross-validation
|
||||
proba = clf.predict_proba(x_train)
|
||||
return roc_auc_score(y_train, proba[:, 1])
|
||||
3
miplearn/classifiers/tests/__init__.py
Normal file
3
miplearn/classifiers/tests/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
|
||||
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
|
||||
# Released under the modified BSD license. See COPYING.md for more details.
|
||||
18
miplearn/classifiers/tests/test_counting.py
Normal file
18
miplearn/classifiers/tests/test_counting.py
Normal file
@@ -0,0 +1,18 @@
|
||||
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
|
||||
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
|
||||
# Released under the modified BSD license. See COPYING.md for more details.
|
||||
from miplearn.classifiers.counting import CountingClassifier
|
||||
|
||||
import numpy as np
|
||||
from numpy.linalg import norm
|
||||
|
||||
E = 0.1
|
||||
|
||||
|
||||
def test_counting():
|
||||
clf = CountingClassifier()
|
||||
clf.fit(np.zeros((8, 25)), [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0])
|
||||
expected_proba = np.array([[0.375, 0.625],
|
||||
[0.375, 0.625]])
|
||||
actual_proba = clf.predict_proba(np.zeros((2, 25)))
|
||||
assert norm(actual_proba - expected_proba) < E
|
||||
46
miplearn/classifiers/tests/test_cv.py
Normal file
46
miplearn/classifiers/tests/test_cv.py
Normal file
@@ -0,0 +1,46 @@
|
||||
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
|
||||
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
|
||||
# Released under the modified BSD license. See COPYING.md for more details.
|
||||
|
||||
import numpy as np
|
||||
from miplearn.classifiers.cv import CrossValidatedClassifier
|
||||
from numpy.linalg import norm
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.svm import SVC
|
||||
|
||||
E = 0.1
|
||||
|
||||
|
||||
def test_cv():
|
||||
# Training set: label is true if point is inside a 2D circle
|
||||
x_train = np.array([[x1, x2]
|
||||
for x1 in range(-10, 11)
|
||||
for x2 in range(-10, 11)])
|
||||
x_train = StandardScaler().fit_transform(x_train)
|
||||
n_samples = x_train.shape[0]
|
||||
|
||||
y_train = np.array([1.0 if x1*x1 + x2*x2 <= 100 else 0.0
|
||||
for x1 in range(-10, 11)
|
||||
for x2 in range(-10, 11)])
|
||||
|
||||
# Support vector machines with linear kernels do not perform well on this
|
||||
# data set, so predictor should return the given constant.
|
||||
clf = CrossValidatedClassifier(classifier=SVC(probability=True,
|
||||
random_state=42),
|
||||
threshold=0.90,
|
||||
constant=0.0,
|
||||
cv=30)
|
||||
clf.fit(x_train, y_train)
|
||||
assert norm(np.zeros(n_samples) - clf.predict(x_train)) < E
|
||||
|
||||
# Support vector machines with quadratic kernels perform almost perfectly
|
||||
# on this data set, so predictor should return their prediction.
|
||||
clf = CrossValidatedClassifier(classifier=SVC(probability=True,
|
||||
kernel='poly',
|
||||
degree=2,
|
||||
random_state=42),
|
||||
threshold=0.90,
|
||||
cv=30)
|
||||
clf.fit(x_train, y_train)
|
||||
print(y_train - clf.predict(x_train))
|
||||
assert norm(y_train - clf.predict(x_train)) < E
|
||||
20
miplearn/classifiers/tests/test_evaluator.py
Normal file
20
miplearn/classifiers/tests/test_evaluator.py
Normal file
@@ -0,0 +1,20 @@
|
||||
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
|
||||
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
|
||||
# Released under the modified BSD license. See COPYING.md for more details.
|
||||
|
||||
import numpy as np
|
||||
from miplearn.classifiers.evaluator import ClassifierEvaluator
|
||||
from sklearn.neighbors import KNeighborsClassifier
|
||||
|
||||
|
||||
def test_evaluator():
|
||||
clf_a = KNeighborsClassifier(n_neighbors=1)
|
||||
clf_b = KNeighborsClassifier(n_neighbors=2)
|
||||
x_train = np.array([[0, 0], [1, 0]])
|
||||
y_train = np.array([0, 1])
|
||||
clf_a.fit(x_train, y_train)
|
||||
clf_b.fit(x_train, y_train)
|
||||
ev = ClassifierEvaluator()
|
||||
assert ev.evaluate(clf_a, x_train, y_train) == 1.0
|
||||
assert ev.evaluate(clf_b, x_train, y_train) == 0.5
|
||||
|
||||
34
miplearn/classifiers/tests/test_threshold.py
Normal file
34
miplearn/classifiers/tests/test_threshold.py
Normal file
@@ -0,0 +1,34 @@
|
||||
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
|
||||
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
|
||||
# Released under the modified BSD license. See COPYING.md for more details.
|
||||
|
||||
from unittest.mock import Mock
|
||||
|
||||
import numpy as np
|
||||
from miplearn.classifiers import Classifier
|
||||
from miplearn.classifiers.threshold import MinPrecisionThreshold
|
||||
|
||||
|
||||
def test_threshold_dynamic():
|
||||
clf = Mock(spec=Classifier)
|
||||
clf.predict_proba = Mock(return_value=np.array([
|
||||
[0.10, 0.90],
|
||||
[0.10, 0.90],
|
||||
[0.20, 0.80],
|
||||
[0.30, 0.70],
|
||||
]))
|
||||
x_train = np.array([0, 1, 2, 3])
|
||||
y_train = np.array([1, 1, 0, 0])
|
||||
|
||||
threshold = MinPrecisionThreshold(min_precision=1.0)
|
||||
assert threshold.find(clf, x_train, y_train) == 0.90
|
||||
|
||||
threshold = MinPrecisionThreshold(min_precision=0.65)
|
||||
assert threshold.find(clf, x_train, y_train) == 0.80
|
||||
|
||||
threshold = MinPrecisionThreshold(min_precision=0.50)
|
||||
assert threshold.find(clf, x_train, y_train) == 0.70
|
||||
|
||||
threshold = MinPrecisionThreshold(min_precision=0.00)
|
||||
assert threshold.find(clf, x_train, y_train) == 0.70
|
||||
|
||||
45
miplearn/classifiers/threshold.py
Normal file
45
miplearn/classifiers/threshold.py
Normal file
@@ -0,0 +1,45 @@
|
||||
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
|
||||
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
|
||||
# Released under the modified BSD license. See COPYING.md for more details.
|
||||
|
||||
from abc import abstractmethod, ABC
|
||||
|
||||
import numpy as np
|
||||
from sklearn.metrics._ranking import _binary_clf_curve
|
||||
|
||||
|
||||
class DynamicThreshold(ABC):
|
||||
@abstractmethod
|
||||
def find(self, clf, x_train, y_train):
|
||||
"""
|
||||
Given a trained binary classifier `clf` and a training data set,
|
||||
returns the numerical threshold (float) satisfying some criterea.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class MinPrecisionThreshold(DynamicThreshold):
|
||||
"""
|
||||
The smallest possible threshold satisfying a minimum acceptable true
|
||||
positive rate (also known as precision).
|
||||
"""
|
||||
|
||||
def __init__(self, min_precision):
|
||||
self.min_precision = min_precision
|
||||
|
||||
def find(self, clf, x_train, y_train):
|
||||
proba = clf.predict_proba(x_train)
|
||||
|
||||
assert isinstance(proba, np.ndarray), \
|
||||
"classifier should return numpy array"
|
||||
assert proba.shape == (x_train.shape[0], 2), \
|
||||
"classifier should return (%d,%d)-shaped array, not %s" % (
|
||||
x_train.shape[0], 2, str(proba.shape))
|
||||
|
||||
fps, tps, thresholds = _binary_clf_curve(y_train, proba[:, 1])
|
||||
precision = tps / (tps + fps)
|
||||
|
||||
for k in reversed(range(len(precision))):
|
||||
if precision[k] >= self.min_precision:
|
||||
return thresholds[k]
|
||||
return 2.0
|
||||
Reference in New Issue
Block a user