Move python files to root folder; remove built docs

2025-12-06 09:28:51 -06:00 · 2020-08-29 11:42:02 -05:00
parent 741af8506b
commit 5663ced0be
116 changed files with 8 additions and 12408 deletions
--- a/miplearn/classifiers/init.py
+++ b/miplearn/classifiers/init.py
@@ -0,0 +1,33 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+
+from abc import ABC, abstractmethod
+
+import numpy as np
+
+
+class Classifier(ABC):
+    @abstractmethod
+    def fit(self, x_train, y_train):
+        pass
+
+    @abstractmethod
+    def predict_proba(self, x_test):
+        pass
+
+    def predict(self, x_test):
+        proba = self.predict_proba(x_test)
+        assert isinstance(proba, np.ndarray)
+        assert proba.shape == (x_test.shape[0], 2)
+        return (proba[:, 1] > 0.5).astype(float)
+
+
+class Regressor(ABC):
+    @abstractmethod
+    def fit(self, x_train, y_train):
+        pass
+
+    @abstractmethod
+    def predict(self):
+        pass
--- a/miplearn/classifiers/adaptive.py
+++ b/miplearn/classifiers/adaptive.py
@@ -0,0 +1,66 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+
+import logging
+from copy import deepcopy
+
+from miplearn.classifiers import Classifier
+from miplearn.classifiers.counting import CountingClassifier
+from miplearn.classifiers.evaluator import ClassifierEvaluator
+from sklearn.linear_model import LogisticRegression
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import StandardScaler
+
+logger = logging.getLogger(__name__)
+
+
+class AdaptiveClassifier(Classifier):
+    """
+    A meta-classifier which dynamically selects what actual classifier to use
+    based on its cross-validation score on a particular training data set.
+    """
+
+    def __init__(self,
+                 candidates=None,
+                 evaluator=ClassifierEvaluator()):
+        """
+        Initializes the meta-classifier.
+        """
+        if candidates is None:
+            candidates = {
+                "knn(100)": {
+                    "classifier": KNeighborsClassifier(n_neighbors=100),
+                    "min samples": 100,
+                },
+                "logistic": {
+                    "classifier": make_pipeline(StandardScaler(),
+                                                LogisticRegression()),
+                    "min samples": 30,
+                },
+                "counting": {
+                    "classifier": CountingClassifier(),
+                    "min samples": 0,
+                }
+            }
+        self.candidates = candidates
+        self.evaluator = evaluator
+        self.classifier = None
+
+    def fit(self, x_train, y_train):
+        best_name, best_clf, best_score = None, None, -float("inf")
+        n_samples = x_train.shape[0]
+        for (name, clf_dict) in self.candidates.items():
+            if n_samples < clf_dict["min samples"]:
+                continue
+            clf = deepcopy(clf_dict["classifier"])
+            clf.fit(x_train, y_train)
+            score = self.evaluator.evaluate(clf, x_train, y_train)
+            if score > best_score:
+                best_name, best_clf, best_score = name, clf, score
+        logger.debug("Best classifier: %s (score=%.3f)" % (best_name, best_score))
+        self.classifier = best_clf
+
+    def predict_proba(self, x_test):
+        return self.classifier.predict_proba(x_test)
--- a/miplearn/classifiers/counting.py
+++ b/miplearn/classifiers/counting.py
@@ -0,0 +1,28 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+
+from miplearn.classifiers import Classifier
+import numpy as np
+
+
+class CountingClassifier(Classifier):
+    """
+    A classifier that generates constant predictions, based only on the
+    frequency of the training labels. For example, if y_train is [1.0, 0.0, 0.0]
+    this classifier always returns [0.66 0.33] for any x_test. It essentially
+    counts how many times each label appeared, hence the name.
+    """
+
+    def __init__(self):
+        self.mean = None
+
+    def fit(self, x_train, y_train):
+        self.mean = np.mean(y_train)
+
+    def predict_proba(self, x_test):
+        return np.array([[1 - self.mean, self.mean]
+                         for _ in range(x_test.shape[0])])
+
+    def __repr__(self):
+        return "CountingClassifier(mean=%s)" % self.mean
--- a/miplearn/classifiers/cv.py
+++ b/miplearn/classifiers/cv.py
@@ -0,0 +1,71 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+
+from copy import deepcopy
+
+import numpy as np
+from miplearn.classifiers import Classifier
+from sklearn.dummy import DummyClassifier
+from sklearn.linear_model import LogisticRegression
+from sklearn.model_selection import cross_val_score
+
+import logging
+logger = logging.getLogger(__name__)
+
+
+class CrossValidatedClassifier(Classifier):
+    """
+    A meta-classifier that, upon training, evaluates the performance of another
+    classifier on the training data set using k-fold cross validation, then
+    either adopts the other classifier it if the cv-score is high enough, or
+    returns a constant label for every x_test otherwise.
+
+    The threshold is specified in comparison to a dummy classifier trained
+    on the same dataset. For example, a threshold of 0.0 indicates that any
+    classifier as good as the dummy predictor is acceptable. A threshold of 1.0
+    indicates that only classifier with a perfect cross-validation score are
+    acceptable. Other numbers are a linear interpolation of these two extremes.
+    """
+
+    def __init__(self,
+                 classifier=LogisticRegression(),
+                 threshold=0.75,
+                 constant=0.0,
+                 cv=5,
+                 scoring='accuracy'):
+        self.classifier = None
+        self.classifier_prototype = classifier
+        self.constant = constant
+        self.threshold = threshold
+        self.cv = cv
+        self.scoring = scoring
+
+    def fit(self, x_train, y_train):
+        # Calculate dummy score and absolute score threshold
+        y_train_avg = np.average(y_train)
+        dummy_score = max(y_train_avg, 1 - y_train_avg)
+        absolute_threshold = 1. * self.threshold + dummy_score * (1 - self.threshold)
+
+        # Calculate cross validation score and decide which classifier to use
+        clf = deepcopy(self.classifier_prototype)
+        cv_score = float(np.mean(cross_val_score(clf,
+                                                 x_train,
+                                                 y_train,
+                                                 cv=self.cv,
+                                                 scoring=self.scoring)))
+        if cv_score >= absolute_threshold:
+            logger.debug("cv_score is above threshold (%.2f >= %.2f); keeping" %
+                         (cv_score, absolute_threshold))
+            self.classifier = clf
+        else:
+            logger.debug("cv_score is below threshold (%.2f < %.2f); discarding" %
+                         (cv_score, absolute_threshold))
+            self.classifier = DummyClassifier(strategy="constant",
+                                              constant=self.constant)
+
+        # Train chosen classifier
+        self.classifier.fit(x_train, y_train)
+
+    def predict_proba(self, x_test):
+        return self.classifier.predict_proba(x_test)
--- a/miplearn/classifiers/evaluator.py
+++ b/miplearn/classifiers/evaluator.py
@@ -0,0 +1,15 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+
+from sklearn.metrics import roc_auc_score
+
+
+class ClassifierEvaluator:
+    def __init__(self):
+        pass
+
+    def evaluate(self, clf, x_train, y_train):
+        # FIXME: use cross-validation
+        proba = clf.predict_proba(x_train)
+        return roc_auc_score(y_train, proba[:, 1])
--- a/miplearn/classifiers/tests/init.py
+++ b/miplearn/classifiers/tests/init.py
@@ -0,0 +1,3 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
--- a/miplearn/classifiers/tests/test_counting.py
+++ b/miplearn/classifiers/tests/test_counting.py
@@ -0,0 +1,18 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+from miplearn.classifiers.counting import CountingClassifier
+
+import numpy as np
+from numpy.linalg import norm
+
+E = 0.1
+
+
+def test_counting():
+    clf = CountingClassifier()
+    clf.fit(np.zeros((8, 25)), [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0])
+    expected_proba = np.array([[0.375, 0.625],
+                               [0.375, 0.625]])
+    actual_proba = clf.predict_proba(np.zeros((2, 25)))
+    assert norm(actual_proba - expected_proba) < E
--- a/miplearn/classifiers/tests/test_cv.py
+++ b/miplearn/classifiers/tests/test_cv.py
@@ -0,0 +1,46 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+
+import numpy as np
+from miplearn.classifiers.cv import CrossValidatedClassifier
+from numpy.linalg import norm
+from sklearn.preprocessing import StandardScaler
+from sklearn.svm import SVC
+
+E = 0.1
+
+
+def test_cv():
+    # Training set: label is true if point is inside a 2D circle
+    x_train = np.array([[x1, x2]
+                        for x1 in range(-10, 11)
+                        for x2 in range(-10, 11)])
+    x_train = StandardScaler().fit_transform(x_train)
+    n_samples = x_train.shape[0]
+
+    y_train = np.array([1.0 if x1*x1 + x2*x2 <= 100 else 0.0
+                        for x1 in range(-10, 11)
+                        for x2 in range(-10, 11)])
+
+    # Support vector machines with linear kernels do not perform well on this
+    # data set, so predictor should return the given constant.
+    clf = CrossValidatedClassifier(classifier=SVC(probability=True,
+                                                  random_state=42),
+                                   threshold=0.90,
+                                   constant=0.0,
+                                   cv=30)
+    clf.fit(x_train, y_train)
+    assert norm(np.zeros(n_samples) - clf.predict(x_train)) < E
+
+    # Support vector machines with quadratic kernels perform almost perfectly
+    # on this data set, so predictor should return their prediction.
+    clf = CrossValidatedClassifier(classifier=SVC(probability=True,
+                                                  kernel='poly',
+                                                  degree=2,
+                                                  random_state=42),
+                                   threshold=0.90,
+                                   cv=30)
+    clf.fit(x_train, y_train)
+    print(y_train - clf.predict(x_train))
+    assert norm(y_train - clf.predict(x_train)) < E
--- a/miplearn/classifiers/tests/test_evaluator.py
+++ b/miplearn/classifiers/tests/test_evaluator.py
@@ -0,0 +1,20 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+
+import numpy as np
+from miplearn.classifiers.evaluator import ClassifierEvaluator
+from sklearn.neighbors import KNeighborsClassifier
+
+
+def test_evaluator():
+    clf_a = KNeighborsClassifier(n_neighbors=1)
+    clf_b = KNeighborsClassifier(n_neighbors=2)
+    x_train = np.array([[0, 0], [1, 0]])
+    y_train = np.array([0, 1])
+    clf_a.fit(x_train, y_train)
+    clf_b.fit(x_train, y_train)
+    ev = ClassifierEvaluator()
+    assert ev.evaluate(clf_a, x_train, y_train) == 1.0
+    assert ev.evaluate(clf_b, x_train, y_train) == 0.5
+
--- a/miplearn/classifiers/tests/test_threshold.py
+++ b/miplearn/classifiers/tests/test_threshold.py
@@ -0,0 +1,34 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+
+from unittest.mock import Mock
+
+import numpy as np
+from miplearn.classifiers import Classifier
+from miplearn.classifiers.threshold import MinPrecisionThreshold
+
+
+def test_threshold_dynamic():
+    clf = Mock(spec=Classifier)
+    clf.predict_proba = Mock(return_value=np.array([
+        [0.10, 0.90],
+        [0.10, 0.90],
+        [0.20, 0.80],
+        [0.30, 0.70],
+    ]))
+    x_train = np.array([0, 1, 2, 3])
+    y_train = np.array([1, 1, 0, 0])
+
+    threshold = MinPrecisionThreshold(min_precision=1.0)
+    assert threshold.find(clf, x_train, y_train) == 0.90
+
+    threshold = MinPrecisionThreshold(min_precision=0.65)
+    assert threshold.find(clf, x_train, y_train) == 0.80
+
+    threshold = MinPrecisionThreshold(min_precision=0.50)
+    assert threshold.find(clf, x_train, y_train) == 0.70
+
+    threshold = MinPrecisionThreshold(min_precision=0.00)
+    assert threshold.find(clf, x_train, y_train) == 0.70
+
--- a/miplearn/classifiers/threshold.py
+++ b/miplearn/classifiers/threshold.py
@@ -0,0 +1,45 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+
+from abc import abstractmethod, ABC
+
+import numpy as np
+from sklearn.metrics._ranking import _binary_clf_curve
+
+
+class DynamicThreshold(ABC):
+    @abstractmethod
+    def find(self, clf, x_train, y_train):
+        """
+        Given a trained binary classifier `clf` and a training data set,
+        returns the numerical threshold (float) satisfying some criterea.
+        """
+        pass
+
+
+class MinPrecisionThreshold(DynamicThreshold):
+    """
+    The smallest possible threshold satisfying a minimum acceptable true
+    positive rate (also known as precision).
+    """
+
+    def __init__(self, min_precision):
+        self.min_precision = min_precision
+
+    def find(self, clf, x_train, y_train):
+        proba = clf.predict_proba(x_train)
+
+        assert isinstance(proba, np.ndarray), \
+            "classifier should return numpy array"
+        assert proba.shape == (x_train.shape[0], 2), \
+            "classifier should return (%d,%d)-shaped array, not %s" % (
+                x_train.shape[0], 2, str(proba.shape))
+
+        fps, tps, thresholds = _binary_clf_curve(y_train, proba[:, 1])
+        precision = tps / (tps + fps)
+
+        for k in reversed(range(len(precision))):
+            if precision[k] >= self.min_precision:
+                return thresholds[k]
+        return 2.0