Move python files to root folder; remove built docs

This commit is contained in:
2020-08-29 11:42:02 -05:00
parent 741af8506b
commit 5663ced0be
116 changed files with 8 additions and 12408 deletions

View File

@@ -0,0 +1,33 @@
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
# Released under the modified BSD license. See COPYING.md for more details.
from abc import ABC, abstractmethod
import numpy as np
class Classifier(ABC):
@abstractmethod
def fit(self, x_train, y_train):
pass
@abstractmethod
def predict_proba(self, x_test):
pass
def predict(self, x_test):
proba = self.predict_proba(x_test)
assert isinstance(proba, np.ndarray)
assert proba.shape == (x_test.shape[0], 2)
return (proba[:, 1] > 0.5).astype(float)
class Regressor(ABC):
@abstractmethod
def fit(self, x_train, y_train):
pass
@abstractmethod
def predict(self):
pass

View File

@@ -0,0 +1,66 @@
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
# Released under the modified BSD license. See COPYING.md for more details.
import logging
from copy import deepcopy
from miplearn.classifiers import Classifier
from miplearn.classifiers.counting import CountingClassifier
from miplearn.classifiers.evaluator import ClassifierEvaluator
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
logger = logging.getLogger(__name__)
class AdaptiveClassifier(Classifier):
"""
A meta-classifier which dynamically selects what actual classifier to use
based on its cross-validation score on a particular training data set.
"""
def __init__(self,
candidates=None,
evaluator=ClassifierEvaluator()):
"""
Initializes the meta-classifier.
"""
if candidates is None:
candidates = {
"knn(100)": {
"classifier": KNeighborsClassifier(n_neighbors=100),
"min samples": 100,
},
"logistic": {
"classifier": make_pipeline(StandardScaler(),
LogisticRegression()),
"min samples": 30,
},
"counting": {
"classifier": CountingClassifier(),
"min samples": 0,
}
}
self.candidates = candidates
self.evaluator = evaluator
self.classifier = None
def fit(self, x_train, y_train):
best_name, best_clf, best_score = None, None, -float("inf")
n_samples = x_train.shape[0]
for (name, clf_dict) in self.candidates.items():
if n_samples < clf_dict["min samples"]:
continue
clf = deepcopy(clf_dict["classifier"])
clf.fit(x_train, y_train)
score = self.evaluator.evaluate(clf, x_train, y_train)
if score > best_score:
best_name, best_clf, best_score = name, clf, score
logger.debug("Best classifier: %s (score=%.3f)" % (best_name, best_score))
self.classifier = best_clf
def predict_proba(self, x_test):
return self.classifier.predict_proba(x_test)

View File

@@ -0,0 +1,28 @@
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
# Released under the modified BSD license. See COPYING.md for more details.
from miplearn.classifiers import Classifier
import numpy as np
class CountingClassifier(Classifier):
"""
A classifier that generates constant predictions, based only on the
frequency of the training labels. For example, if y_train is [1.0, 0.0, 0.0]
this classifier always returns [0.66 0.33] for any x_test. It essentially
counts how many times each label appeared, hence the name.
"""
def __init__(self):
self.mean = None
def fit(self, x_train, y_train):
self.mean = np.mean(y_train)
def predict_proba(self, x_test):
return np.array([[1 - self.mean, self.mean]
for _ in range(x_test.shape[0])])
def __repr__(self):
return "CountingClassifier(mean=%s)" % self.mean

View File

@@ -0,0 +1,71 @@
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
# Released under the modified BSD license. See COPYING.md for more details.
from copy import deepcopy
import numpy as np
from miplearn.classifiers import Classifier
from sklearn.dummy import DummyClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
import logging
logger = logging.getLogger(__name__)
class CrossValidatedClassifier(Classifier):
"""
A meta-classifier that, upon training, evaluates the performance of another
classifier on the training data set using k-fold cross validation, then
either adopts the other classifier it if the cv-score is high enough, or
returns a constant label for every x_test otherwise.
The threshold is specified in comparison to a dummy classifier trained
on the same dataset. For example, a threshold of 0.0 indicates that any
classifier as good as the dummy predictor is acceptable. A threshold of 1.0
indicates that only classifier with a perfect cross-validation score are
acceptable. Other numbers are a linear interpolation of these two extremes.
"""
def __init__(self,
classifier=LogisticRegression(),
threshold=0.75,
constant=0.0,
cv=5,
scoring='accuracy'):
self.classifier = None
self.classifier_prototype = classifier
self.constant = constant
self.threshold = threshold
self.cv = cv
self.scoring = scoring
def fit(self, x_train, y_train):
# Calculate dummy score and absolute score threshold
y_train_avg = np.average(y_train)
dummy_score = max(y_train_avg, 1 - y_train_avg)
absolute_threshold = 1. * self.threshold + dummy_score * (1 - self.threshold)
# Calculate cross validation score and decide which classifier to use
clf = deepcopy(self.classifier_prototype)
cv_score = float(np.mean(cross_val_score(clf,
x_train,
y_train,
cv=self.cv,
scoring=self.scoring)))
if cv_score >= absolute_threshold:
logger.debug("cv_score is above threshold (%.2f >= %.2f); keeping" %
(cv_score, absolute_threshold))
self.classifier = clf
else:
logger.debug("cv_score is below threshold (%.2f < %.2f); discarding" %
(cv_score, absolute_threshold))
self.classifier = DummyClassifier(strategy="constant",
constant=self.constant)
# Train chosen classifier
self.classifier.fit(x_train, y_train)
def predict_proba(self, x_test):
return self.classifier.predict_proba(x_test)

View File

@@ -0,0 +1,15 @@
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
# Released under the modified BSD license. See COPYING.md for more details.
from sklearn.metrics import roc_auc_score
class ClassifierEvaluator:
def __init__(self):
pass
def evaluate(self, clf, x_train, y_train):
# FIXME: use cross-validation
proba = clf.predict_proba(x_train)
return roc_auc_score(y_train, proba[:, 1])

View File

@@ -0,0 +1,3 @@
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
# Released under the modified BSD license. See COPYING.md for more details.

View File

@@ -0,0 +1,18 @@
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
# Released under the modified BSD license. See COPYING.md for more details.
from miplearn.classifiers.counting import CountingClassifier
import numpy as np
from numpy.linalg import norm
E = 0.1
def test_counting():
clf = CountingClassifier()
clf.fit(np.zeros((8, 25)), [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0])
expected_proba = np.array([[0.375, 0.625],
[0.375, 0.625]])
actual_proba = clf.predict_proba(np.zeros((2, 25)))
assert norm(actual_proba - expected_proba) < E

View File

@@ -0,0 +1,46 @@
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
# Released under the modified BSD license. See COPYING.md for more details.
import numpy as np
from miplearn.classifiers.cv import CrossValidatedClassifier
from numpy.linalg import norm
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
E = 0.1
def test_cv():
# Training set: label is true if point is inside a 2D circle
x_train = np.array([[x1, x2]
for x1 in range(-10, 11)
for x2 in range(-10, 11)])
x_train = StandardScaler().fit_transform(x_train)
n_samples = x_train.shape[0]
y_train = np.array([1.0 if x1*x1 + x2*x2 <= 100 else 0.0
for x1 in range(-10, 11)
for x2 in range(-10, 11)])
# Support vector machines with linear kernels do not perform well on this
# data set, so predictor should return the given constant.
clf = CrossValidatedClassifier(classifier=SVC(probability=True,
random_state=42),
threshold=0.90,
constant=0.0,
cv=30)
clf.fit(x_train, y_train)
assert norm(np.zeros(n_samples) - clf.predict(x_train)) < E
# Support vector machines with quadratic kernels perform almost perfectly
# on this data set, so predictor should return their prediction.
clf = CrossValidatedClassifier(classifier=SVC(probability=True,
kernel='poly',
degree=2,
random_state=42),
threshold=0.90,
cv=30)
clf.fit(x_train, y_train)
print(y_train - clf.predict(x_train))
assert norm(y_train - clf.predict(x_train)) < E

View File

@@ -0,0 +1,20 @@
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
# Released under the modified BSD license. See COPYING.md for more details.
import numpy as np
from miplearn.classifiers.evaluator import ClassifierEvaluator
from sklearn.neighbors import KNeighborsClassifier
def test_evaluator():
clf_a = KNeighborsClassifier(n_neighbors=1)
clf_b = KNeighborsClassifier(n_neighbors=2)
x_train = np.array([[0, 0], [1, 0]])
y_train = np.array([0, 1])
clf_a.fit(x_train, y_train)
clf_b.fit(x_train, y_train)
ev = ClassifierEvaluator()
assert ev.evaluate(clf_a, x_train, y_train) == 1.0
assert ev.evaluate(clf_b, x_train, y_train) == 0.5

View File

@@ -0,0 +1,34 @@
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
# Released under the modified BSD license. See COPYING.md for more details.
from unittest.mock import Mock
import numpy as np
from miplearn.classifiers import Classifier
from miplearn.classifiers.threshold import MinPrecisionThreshold
def test_threshold_dynamic():
clf = Mock(spec=Classifier)
clf.predict_proba = Mock(return_value=np.array([
[0.10, 0.90],
[0.10, 0.90],
[0.20, 0.80],
[0.30, 0.70],
]))
x_train = np.array([0, 1, 2, 3])
y_train = np.array([1, 1, 0, 0])
threshold = MinPrecisionThreshold(min_precision=1.0)
assert threshold.find(clf, x_train, y_train) == 0.90
threshold = MinPrecisionThreshold(min_precision=0.65)
assert threshold.find(clf, x_train, y_train) == 0.80
threshold = MinPrecisionThreshold(min_precision=0.50)
assert threshold.find(clf, x_train, y_train) == 0.70
threshold = MinPrecisionThreshold(min_precision=0.00)
assert threshold.find(clf, x_train, y_train) == 0.70

View File

@@ -0,0 +1,45 @@
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
# Released under the modified BSD license. See COPYING.md for more details.
from abc import abstractmethod, ABC
import numpy as np
from sklearn.metrics._ranking import _binary_clf_curve
class DynamicThreshold(ABC):
@abstractmethod
def find(self, clf, x_train, y_train):
"""
Given a trained binary classifier `clf` and a training data set,
returns the numerical threshold (float) satisfying some criterea.
"""
pass
class MinPrecisionThreshold(DynamicThreshold):
"""
The smallest possible threshold satisfying a minimum acceptable true
positive rate (also known as precision).
"""
def __init__(self, min_precision):
self.min_precision = min_precision
def find(self, clf, x_train, y_train):
proba = clf.predict_proba(x_train)
assert isinstance(proba, np.ndarray), \
"classifier should return numpy array"
assert proba.shape == (x_train.shape[0], 2), \
"classifier should return (%d,%d)-shaped array, not %s" % (
x_train.shape[0], 2, str(proba.shape))
fps, tps, thresholds = _binary_clf_curve(y_train, proba[:, 1])
precision = tps / (tps + fps)
for k in reversed(range(len(precision))):
if precision[k] >= self.min_precision:
return thresholds[k]
return 2.0