MIPLearn v0.3

2025-12-06 01:18:52 -06:00 · 2023-06-08 11:25:39 -05:00
parent 6cc253a903
commit 1ea989d48a
172 changed files with 10495 additions and 24812 deletions
--- a/miplearn/classifiers/init.py
+++ b/miplearn/classifiers/init.py
@@ -1,163 +1,3 @@
 #  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
+#  Copyright (C) 2020-2022, UChicago Argonne, LLC. All rights reserved.
 #  Released under the modified BSD license. See COPYING.md for more details.
-
-from abc import ABC, abstractmethod
-from typing import Optional
-
-import numpy as np
-
-
-class Classifier(ABC):
-    """
-    A Classifier decides which class each sample belongs to, based on historical
-    data.
-    """
-
-    def __init__(self) -> None:
-        self.n_features: Optional[int] = None
-        self.n_classes: Optional[int] = None
-
-    @abstractmethod
-    def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
-        """
-        Trains the classifier.
-
-        Parameters
-        ----------
-        x_train: np.ndarray
-            An array of features with shape (`n_samples`, `n_features`). Each entry
-            must be a float.
-        y_train: np.ndarray
-            An array of labels with shape (`n_samples`, `n_classes`). Each entry must be
-            a bool, and there must be exactly one True element in each row.
-        """
-        assert isinstance(x_train, np.ndarray)
-        assert isinstance(y_train, np.ndarray)
-        assert x_train.dtype in [
-            np.float16,
-            np.float32,
-            np.float64,
-        ], f"x_train.dtype should be float. Found {x_train.dtype} instead."
-        assert y_train.dtype == np.bool8
-        assert len(x_train.shape) == 2
-        assert len(y_train.shape) == 2
-        (n_samples_x, n_features) = x_train.shape
-        (n_samples_y, n_classes) = y_train.shape
-        assert n_samples_y == n_samples_x
-        self.n_features = n_features
-        self.n_classes = n_classes
-
-    @abstractmethod
-    def predict_proba(self, x_test: np.ndarray) -> np.ndarray:
-        """
-        Predicts the probability of each sample belonging to each class. Must be called
-        after fit.
-
-        Parameters
-        ----------
-        x_test: np.ndarray
-            An array of features with shape (`n_samples`, `n_features`). The number of
-            features in `x_test` must match the number of features in `x_train` provided
-            to `fit`.
-
-        Returns
-        -------
-        np.ndarray
-            An array of predicted probabilities with shape (`n_samples`, `n_classes`),
-            where `n_classes` is the number of columns in `y_train` provided to `fit`.
-        """
-        assert self.n_features is not None
-        assert isinstance(x_test, np.ndarray)
-        assert len(x_test.shape) == 2
-        (n_samples, n_features_x) = x_test.shape
-        assert n_features_x == self.n_features, (
-            f"Test and training data have different number of "
-            f"features: {n_features_x} != {self.n_features}"
-        )
-        return np.ndarray([])
-
-    @abstractmethod
-    def clone(self) -> "Classifier":
-        """
-        Returns an unfitted copy of this classifier with the same hyperparameters.
-        """
-        pass
-
-
-class Regressor(ABC):
-    """
-    A Regressor tries to predict the values of some continous variables, given the
-    values of other variables.
-    """
-
-    def __init__(self) -> None:
-        self.n_inputs: Optional[int] = None
-
-    @abstractmethod
-    def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
-        """
-        Trains the regressor.
-
-        Parameters
-        ----------
-        x_train: np.ndarray
-            An array of inputs with shape (`n_samples`, `n_inputs`). Each entry must be
-            a float.
-        y_train: np.ndarray
-            An array of outputs with shape (`n_samples`, `n_outputs`). Each entry must
-            be a float.
-        """
-        assert isinstance(x_train, np.ndarray)
-        assert isinstance(y_train, np.ndarray)
-        assert x_train.dtype in [np.float16, np.float32, np.float64]
-        assert y_train.dtype in [np.float16, np.float32, np.float64]
-        assert len(x_train.shape) == 2, (
-            f"Parameter x_train should be a square matrix. "
-            f"Found {x_train.shape} ndarray instead."
-        )
-        assert len(y_train.shape) == 2, (
-            f"Parameter y_train should be a square matrix. "
-            f"Found {y_train.shape} ndarray instead."
-        )
-        (n_samples_x, n_inputs) = x_train.shape
-        (n_samples_y, n_outputs) = y_train.shape
-        assert n_samples_y == n_samples_x
-        self.n_inputs = n_inputs
-
-    @abstractmethod
-    def predict(self, x_test: np.ndarray) -> np.ndarray:
-        """
-        Predicts the values of the output variables. Must be called after fit.
-
-        Parameters
-        ----------
-        x_test: np.ndarray
-            An array of inputs with shape (`n_samples`, `n_inputs`), where `n_inputs`
-            must match the number of columns in `x_train` provided to `fit`.
-
-        Returns
-        -------
-        np.ndarray
-            An array of outputs  with shape (`n_samples`, `n_outputs`), where
-            `n_outputs` is the number of columns in `y_train` provided to `fit`.
-        """
-        assert self.n_inputs is not None
-        assert isinstance(x_test, np.ndarray), (
-            f"Parameter x_train must be np.ndarray. "
-            f"Found {x_test.__class__.__name__} instead."
-        )
-        assert len(x_test.shape) == 2
-        (n_samples, n_inputs_x) = x_test.shape
-        assert n_inputs_x == self.n_inputs, (
-            f"Test and training data have different number of "
-            f"inputs: {n_inputs_x} != {self.n_inputs}"
-        )
-        return np.ndarray([])
-
-    @abstractmethod
-    def clone(self) -> "Regressor":
-        """
-        Returns an unfitted copy of this regressor with the same hyperparameters.
-        """
-        pass
--- a/miplearn/classifiers/adaptive.py
+++ b/miplearn/classifiers/adaptive.py
@@ -1,135 +0,0 @@
-#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
-#  Released under the modified BSD license. See COPYING.md for more details.
-
-import logging
-from typing import Dict, Optional
-
-import numpy as np
-from sklearn.ensemble import RandomForestClassifier
-from sklearn.linear_model import LogisticRegression
-from sklearn.metrics import roc_auc_score
-from sklearn.model_selection import cross_val_predict
-from sklearn.neighbors import KNeighborsClassifier
-from sklearn.pipeline import make_pipeline
-from sklearn.preprocessing import StandardScaler
-
-from miplearn.classifiers import Classifier
-from miplearn.classifiers.counting import CountingClassifier
-from miplearn.classifiers.sklearn import ScikitLearnClassifier
-
-logger = logging.getLogger(__name__)
-
-
-class CandidateClassifierSpecs:
-    """
-    Specifications describing how to construct a certain classifier, and under
-    which circumstances it can be used.
-
-    Parameters
-    ----------
-    min_samples: int
-        Minimum number of samples for this classifier to be considered.
-    classifier: Callable[[], Classifier]
-        Callable that constructs the classifier.
-    """
-
-    def __init__(
-        self,
-        classifier: Classifier,
-        min_samples: int = 0,
-    ) -> None:
-        self.min_samples = min_samples
-        self.classifier = classifier
-
-
-class AdaptiveClassifier(Classifier):
-    """
-    A meta-classifier which dynamically selects what actual classifier to use
-    based on its cross-validation score on a particular training data set.
-
-    Parameters
-    ----------
-    candidates: Dict[str, CandidateClassifierSpecs]
-        A dictionary of candidate classifiers to consider, mapping the name of the
-        candidate to its specs, which describes how to construct it and under what
-        scenarios. If no candidates are provided, uses a fixed set of defaults,
-        which includes `CountingClassifier`, `KNeighborsClassifier` and
-        `LogisticRegression`.
-    """
-
-    def __init__(
-        self,
-        candidates: Optional[Dict[str, CandidateClassifierSpecs]] = None,
-    ) -> None:
-        super().__init__()
-        if candidates is None:
-            candidates = {
-                "forest(5,10)": CandidateClassifierSpecs(
-                    classifier=ScikitLearnClassifier(
-                        RandomForestClassifier(
-                            n_estimators=5,
-                            min_samples_split=10,
-                        ),
-                    ),
-                    min_samples=100,
-                ),
-                "knn(100)": CandidateClassifierSpecs(
-                    classifier=ScikitLearnClassifier(
-                        KNeighborsClassifier(n_neighbors=100)
-                    ),
-                    min_samples=100,
-                ),
-                "logistic": CandidateClassifierSpecs(
-                    classifier=ScikitLearnClassifier(
-                        make_pipeline(
-                            StandardScaler(),
-                            LogisticRegression(),
-                        )
-                    ),
-                    min_samples=30,
-                ),
-                "counting": CandidateClassifierSpecs(
-                    classifier=CountingClassifier(),
-                ),
-            }
-        self.candidates = candidates
-        self.classifier: Optional[Classifier] = None
-
-    def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
-        super().fit(x_train, y_train)
-        n_samples = x_train.shape[0]
-        assert y_train.shape == (n_samples, 2)
-
-        # If almost all samples belong to the same class, return a fixed prediction and
-        # skip all the other steps.
-        if y_train[:, 0].mean() > 0.99 or y_train[:, 1].mean() > 0.99:
-            self.classifier = CountingClassifier()
-            self.classifier.fit(x_train, y_train)
-            return
-
-        best_name, best_clf, best_score = None, None, -float("inf")
-        for (name, specs) in self.candidates.items():
-            if n_samples < specs.min_samples:
-                continue
-            clf = specs.classifier.clone()
-            if isinstance(clf, ScikitLearnClassifier):
-                proba = cross_val_predict(clf.inner_clf, x_train, y_train[:, 1])
-            else:
-                clf.fit(x_train, y_train)
-                proba = clf.predict_proba(x_train)[:, 1]
-            score = roc_auc_score(y_train[:, 1], proba)
-            if score > best_score:
-                best_name, best_clf, best_score = name, clf, score
-        logger.debug("Best classifier: %s (score=%.3f)" % (best_name, best_score))
-        if isinstance(best_clf, ScikitLearnClassifier):
-            best_clf.fit(x_train, y_train)
-        self.classifier = best_clf
-
-    def predict_proba(self, x_test: np.ndarray) -> np.ndarray:
-        super().predict_proba(x_test)
-        assert self.classifier is not None
-        return self.classifier.predict_proba(x_test)
-
-    def clone(self) -> "AdaptiveClassifier":
-        return AdaptiveClassifier(self.candidates)
--- a/miplearn/classifiers/counting.py
+++ b/miplearn/classifiers/counting.py
@@ -1,45 +0,0 @@
-#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
-#  Released under the modified BSD license. See COPYING.md for more details.
-from typing import Optional, cast
-
-import numpy as np
-
-from miplearn.classifiers import Classifier
-
-
-class CountingClassifier(Classifier):
-    """
-
-    A classifier that generates constant predictions, based only on the frequency of
-    the training labels. For example, suppose `y_train` is given by:
-    ```python
-    y_train = np.array([
-        [True, False],
-        [False, True],
-        [False, True],
-    ])
-    ```
-    Then `predict_proba` always returns `[0.33 0.66]` for every sample, regardless of
-    `x_train`. It essentially counts how many times each label appeared, hence the name.
-
-    """
-
-    def __init__(self) -> None:
-        super().__init__()
-        self.mean: Optional[np.ndarray] = None
-
-    def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
-        super().fit(x_train, y_train)
-        self.mean = cast(np.ndarray, np.mean(y_train, axis=0))
-
-    def predict_proba(self, x_test: np.ndarray) -> np.ndarray:
-        super().predict_proba(x_test)
-        n_samples = x_test.shape[0]
-        return np.array([self.mean for _ in range(n_samples)])
-
-    def __repr__(self) -> str:
-        return "CountingClassifier(mean=%s)" % self.mean
-
-    def clone(self) -> "CountingClassifier":
-        return CountingClassifier()
--- a/miplearn/classifiers/cv.py
+++ b/miplearn/classifiers/cv.py
@@ -1,132 +0,0 @@
-#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
-#  Released under the modified BSD license. See COPYING.md for more details.
-
-import logging
-from typing import Optional, List
-
-import numpy as np
-from sklearn.dummy import DummyClassifier
-from sklearn.linear_model import LogisticRegression
-from sklearn.model_selection import cross_val_score
-
-from miplearn.classifiers import Classifier
-from miplearn.classifiers.sklearn import ScikitLearnClassifier
-
-logger = logging.getLogger(__name__)
-
-
-class CrossValidatedClassifier(Classifier):
-    """
-    A meta-classifier that, upon training, evaluates the performance of another
-    candidate classifier on the training data set, using k-fold cross validation,
-    then either adopts it, if its cv-score is high enough, or returns constant
-    predictions for every x_test, otherwise.
-
-    Parameters
-    ----------
-    classifier: Callable[[], ScikitLearnClassifier]
-        A callable that constructs the candidate classifier.
-    threshold: float
-        Number from zero to one indicating how well must the candidate classifier
-        perform to be adopted. The threshold is specified in comparison to a dummy
-        classifier trained on the same dataset. For example, a threshold of 0.0
-        indicates that any classifier as good as the dummy predictor is acceptable. A
-        threshold of 1.0 indicates that only classifiers with perfect
-        cross-validation scores are acceptable. Other numbers are a linear
-        interpolation of these two extremes.
-    constant: Optional[List[bool]]
-        If the candidate classifier fails to meet the threshold, use a dummy classifier
-        which always returns this prediction instead. The list should have exactly as
-        many elements as the number of columns of `x_train` provided to `fit`.
-    cv: int
-        Number of folds.
-    scoring: str
-        Scoring function.
-    """
-
-    def __init__(
-        self,
-        classifier: ScikitLearnClassifier = ScikitLearnClassifier(LogisticRegression()),
-        threshold: float = 0.75,
-        constant: Optional[List[bool]] = None,
-        cv: int = 5,
-        scoring: str = "accuracy",
-    ):
-        super().__init__()
-        if constant is None:
-            constant = [True, False]
-        self.n_classes = len(constant)
-        self.classifier: Optional[ScikitLearnClassifier] = None
-        self.classifier_prototype = classifier
-        self.constant: List[bool] = constant
-        self.threshold = threshold
-        self.cv = cv
-        self.scoring = scoring
-
-    def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
-        super().fit(x_train, y_train)
-        (n_samples, n_classes) = x_train.shape
-        assert n_classes == self.n_classes
-
-        # Calculate dummy score and absolute score threshold
-        y_train_avg = np.average(y_train)
-        dummy_score = max(y_train_avg, 1 - y_train_avg)
-        absolute_threshold = 1.0 * self.threshold + dummy_score * (1 - self.threshold)
-
-        # Calculate cross validation score and decide which classifier to use
-        clf = self.classifier_prototype.clone()
-        assert clf is not None
-        assert isinstance(clf, ScikitLearnClassifier), (
-            f"The provided classifier callable must return a ScikitLearnClassifier. "
-            f"Found {clf.__class__.__name__} instead. If this is a scikit-learn "
-            f"classifier, you must wrap it with ScikitLearnClassifier."
-        )
-
-        cv_score = float(
-            np.mean(
-                cross_val_score(
-                    clf.inner_clf,
-                    x_train,
-                    y_train[:, 1],
-                    cv=self.cv,
-                    scoring=self.scoring,
-                )
-            )
-        )
-        if cv_score >= absolute_threshold:
-            logger.debug(
-                "cv_score is above threshold (%.2f >= %.2f); keeping"
-                % (cv_score, absolute_threshold)
-            )
-            self.classifier = clf
-        else:
-            logger.debug(
-                "cv_score is below threshold (%.2f < %.2f); discarding"
-                % (cv_score, absolute_threshold)
-            )
-            self.classifier = ScikitLearnClassifier(
-                DummyClassifier(
-                    strategy="constant",
-                    constant=self.constant[1],
-                )
-            )
-
-        # Train chosen classifier
-        assert self.classifier is not None
-        assert isinstance(self.classifier, ScikitLearnClassifier)
-        self.classifier.fit(x_train, y_train)
-
-    def predict_proba(self, x_test: np.ndarray) -> np.ndarray:
-        super().predict_proba(x_test)
-        assert self.classifier is not None
-        return self.classifier.predict_proba(x_test)
-
-    def clone(self) -> "CrossValidatedClassifier":
-        return CrossValidatedClassifier(
-            classifier=self.classifier_prototype,
-            threshold=self.threshold,
-            constant=self.constant,
-            cv=self.cv,
-            scoring=self.scoring,
-        )
--- a/miplearn/classifiers/minprob.py
+++ b/miplearn/classifiers/minprob.py
@@ -0,0 +1,61 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020-2022, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+from typing import List, Any, Callable, Optional
+
+import numpy as np
+import sklearn
+from sklearn.base import BaseEstimator
+from sklearn.utils.multiclass import unique_labels
+
+
+class MinProbabilityClassifier(BaseEstimator):
+    """
+    Meta-classifier that returns NaN for predictions made by a base classifier that
+    have probability below a given threshold. More specifically, this meta-classifier
+    calls base_clf.predict_proba and compares the result against the provided
+    thresholds. If the probability for one of the classes is above its threshold,
+    the meta-classifier returns that prediction. Otherwise, it returns NaN.
+    """
+
+    def __init__(
+        self,
+        base_clf: Any,
+        thresholds: List[float],
+        clone_fn: Callable[[Any], Any] = sklearn.base.clone,
+    ) -> None:
+        assert len(thresholds) == 2
+        self.base_clf = base_clf
+        self.thresholds = thresholds
+        self.clone_fn = clone_fn
+        self.clf_: Optional[Any] = None
+        self.classes_: Optional[List[Any]] = None
+
+    def fit(self, x: np.ndarray, y: np.ndarray) -> None:
+        assert len(y.shape) == 1
+        assert len(x.shape) == 2
+        classes = unique_labels(y)
+        assert len(classes) == len(self.thresholds)
+
+        self.clf_ = self.clone_fn(self.base_clf)
+        self.clf_.fit(x, y)
+        self.classes_ = self.clf_.classes_
+
+    def predict(self, x: np.ndarray) -> np.ndarray:
+        assert self.clf_ is not None
+        assert self.classes_ is not None
+
+        y_proba = self.clf_.predict_proba(x)
+        assert len(y_proba.shape) == 2
+        assert y_proba.shape[0] == x.shape[0]
+        assert y_proba.shape[1] == 2
+        n_samples = x.shape[0]
+
+        y_pred = []
+        for sample_idx in range(n_samples):
+            yi = float("nan")
+            for (class_idx, class_val) in enumerate(self.classes_):
+                if y_proba[sample_idx, class_idx] >= self.thresholds[class_idx]:
+                    yi = class_val
+            y_pred.append(yi)
+        return np.array(y_pred)
--- a/miplearn/classifiers/singleclass.py
+++ b/miplearn/classifiers/singleclass.py
@@ -0,0 +1,51 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020-2022, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+from typing import Callable, Optional
+
+import numpy as np
+import sklearn.base
+from sklearn.base import BaseEstimator
+from sklearn.utils.multiclass import unique_labels
+
+
+class SingleClassFix(BaseEstimator):
+    """
+    Some sklearn classifiers, such as logistic regression, have issues with datasets
+    that contain a single class. This meta-classifier fixes the issue. If the
+    training data contains a single class, this meta-classifier always returns that
+    class as a prediction. Otherwise, it fits the provided base classifier,
+    and returns its predictions instead.
+    """
+
+    def __init__(
+        self,
+        base_clf: BaseEstimator,
+        clone_fn: Callable = sklearn.base.clone,
+    ):
+        self.base_clf = base_clf
+        self.clf_: Optional[BaseEstimator] = None
+        self.constant_ = None
+        self.classes_ = None
+        self.clone_fn = clone_fn
+
+    def fit(self, x: np.ndarray, y: np.ndarray) -> None:
+        classes = unique_labels(y)
+        if len(classes) == 1:
+            assert classes[0] is not None
+            self.clf_ = None
+            self.constant_ = classes[0]
+            self.classes_ = classes
+        else:
+            self.clf_ = self.clone_fn(self.base_clf)
+            assert self.clf_ is not None
+            self.clf_.fit(x, y)
+            self.constant_ = None
+            self.classes_ = self.clf_.classes_
+
+    def predict(self, x: np.ndarray) -> np.ndarray:
+        if self.constant_ is not None:
+            return np.full(x.shape[0], self.constant_)
+        else:
+            assert self.clf_ is not None
+            return self.clf_.predict(x)
--- a/miplearn/classifiers/sklearn.py
+++ b/miplearn/classifiers/sklearn.py
@@ -1,93 +0,0 @@
-#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
-#  Released under the modified BSD license. See COPYING.md for more details.
-
-from typing import Optional, Any, cast
-
-import numpy as np
-import sklearn
-
-from miplearn.classifiers import Classifier, Regressor
-
-
-class ScikitLearnClassifier(Classifier):
-    """
-    Wrapper for ScikitLearn classifiers, which makes sure inputs and outputs have the
-    correct dimensions and types.
-    """
-
-    def __init__(self, clf: Any) -> None:
-        super().__init__()
-        self.inner_clf = clf
-        self.constant: Optional[np.ndarray] = None
-
-    def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
-        super().fit(x_train, y_train)
-        (n_samples, n_classes) = y_train.shape
-        assert n_classes == 2, (
-            f"Scikit-learn classifiers must have exactly two classes. "
-            f"{n_classes} classes were provided instead."
-        )
-
-        # When all samples belong to the same class, sklearn's predict_proba returns
-        # an array with a single column. The following check avoid this strange
-        # behavior.
-        mean = cast(np.ndarray, y_train.astype(float).mean(axis=0))
-        if mean.max() == 1.0:
-            self.constant = mean
-            return
-
-        self.inner_clf.fit(x_train, y_train[:, 1])
-
-    def predict_proba(self, x_test: np.ndarray) -> np.ndarray:
-        super().predict_proba(x_test)
-        n_samples = x_test.shape[0]
-        if self.constant is not None:
-            return np.array([self.constant for n in range(n_samples)])
-        sklearn_proba = self.inner_clf.predict_proba(x_test)
-        if isinstance(sklearn_proba, list):
-            assert len(sklearn_proba) == self.n_classes
-            for pb in sklearn_proba:
-                assert isinstance(pb, np.ndarray)
-                assert pb.dtype in [np.float16, np.float32, np.float64]
-                assert pb.shape == (n_samples, 2)
-            proba = np.hstack([pb[:, [1]] for pb in sklearn_proba])
-            assert proba.shape == (n_samples, self.n_classes)
-            return proba
-        else:
-            assert isinstance(sklearn_proba, np.ndarray)
-            assert sklearn_proba.shape == (n_samples, 2)
-            return sklearn_proba
-
-    def clone(self) -> "ScikitLearnClassifier":
-        return ScikitLearnClassifier(
-            clf=sklearn.base.clone(self.inner_clf),
-        )
-
-
-class ScikitLearnRegressor(Regressor):
-    """
-    Wrapper for ScikitLearn regressors, which makes sure inputs and outputs have the
-    correct dimensions and types.
-    """
-
-    def __init__(self, reg: Any) -> None:
-        super().__init__()
-        self.inner_reg = reg
-
-    def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
-        super().fit(x_train, y_train)
-        self.inner_reg.fit(x_train, y_train)
-
-    def predict(self, x_test: np.ndarray) -> np.ndarray:
-        super().predict(x_test)
-        n_samples = x_test.shape[0]
-        sklearn_pred = self.inner_reg.predict(x_test)
-        assert isinstance(sklearn_pred, np.ndarray)
-        assert sklearn_pred.shape[0] == n_samples
-        return sklearn_pred
-
-    def clone(self) -> "ScikitLearnRegressor":
-        return ScikitLearnRegressor(
-            reg=sklearn.base.clone(self.inner_reg),
-        )
--- a/miplearn/classifiers/threshold.py
+++ b/miplearn/classifiers/threshold.py
@@ -1,143 +0,0 @@
-#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
-#  Released under the modified BSD license. See COPYING.md for more details.
-
-from abc import abstractmethod, ABC
-from typing import Optional, List
-
-import numpy as np
-from sklearn.metrics._ranking import _binary_clf_curve
-from sklearn.model_selection import cross_val_predict
-
-from miplearn.classifiers.sklearn import ScikitLearnClassifier
-from miplearn.classifiers.adaptive import AdaptiveClassifier
-from miplearn.classifiers import Classifier
-
-
-class Threshold(ABC):
-    """
-    Solver components ask the machine learning models how confident are they on each
-    prediction they make, then automatically discard all predictions that have low
-    confidence. A Threshold specifies how confident should the ML models be for a
-    prediction to be considered trustworthy.
-
-    To model dynamic thresholds, which automatically adjust themselves during
-    training to reach some desired target (such as minimum precision, or minimum
-    recall), thresholds behave somewhat similar to ML models themselves, with `fit`
-    and `predict` methods.
-    """
-
-    @abstractmethod
-    def fit(
-        self,
-        clf: Classifier,
-        x_train: np.ndarray,
-        y_train: np.ndarray,
-    ) -> None:
-        """
-        Given a trained binary classifier `clf`, calibrates itself based on the
-        classifier's performance on the given training data set.
-        """
-        assert isinstance(clf, Classifier)
-        assert isinstance(x_train, np.ndarray)
-        assert isinstance(y_train, np.ndarray)
-        n_samples = x_train.shape[0]
-        assert y_train.shape[0] == n_samples
-
-    @abstractmethod
-    def predict(self, x_test: np.ndarray) -> List[float]:
-        """
-        Returns the minimum probability for a machine learning prediction to be
-        considered trustworthy. There is one value for each label.
-        """
-        pass
-
-    @abstractmethod
-    def clone(self) -> "Threshold":
-        """
-        Returns an unfitted copy of this threshold with the same hyperparameters.
-        """
-        pass
-
-
-class MinProbabilityThreshold(Threshold):
-    """
-    A threshold which considers predictions trustworthy if their probability of being
-    correct, as computed by the machine learning models, are above a fixed value.
-    """
-
-    def __init__(self, min_probability: List[float]):
-        self.min_probability = min_probability
-
-    def fit(self, clf: Classifier, x_train: np.ndarray, y_train: np.ndarray) -> None:
-        pass
-
-    def predict(self, x_test: np.ndarray) -> List[float]:
-        return self.min_probability
-
-    def clone(self) -> "MinProbabilityThreshold":
-        return MinProbabilityThreshold(self.min_probability)
-
-
-class MinPrecisionThreshold(Threshold):
-    """
-    A dynamic threshold which automatically adjusts itself during training to ensure
-    that the component achieves at least a given precision `p` on the training data
-    set. Note that increasing a component's minimum precision may reduce its recall.
-    """
-
-    def __init__(self, min_precision: List[float]) -> None:
-        self.min_precision = min_precision
-        self._computed_threshold: Optional[List[float]] = None
-
-    def fit(
-        self,
-        clf: Classifier,
-        x_train: np.ndarray,
-        y_train: np.ndarray,
-    ) -> None:
-        super().fit(clf, x_train, y_train)
-        (n_samples, n_classes) = y_train.shape
-        if isinstance(clf, AdaptiveClassifier) and isinstance(
-            clf.classifier, ScikitLearnClassifier
-        ):
-            proba = cross_val_predict(
-                clf.classifier.inner_clf,
-                x_train,
-                y_train[:, 1],
-                method="predict_proba",
-            )
-        else:
-            proba = clf.predict_proba(x_train)
-        self._computed_threshold = [
-            self._compute(
-                y_train[:, i],
-                proba[:, i],
-                self.min_precision[i],
-            )
-            for i in range(n_classes)
-        ]
-
-    def predict(self, x_test: np.ndarray) -> List[float]:
-        assert self._computed_threshold is not None
-        return self._computed_threshold
-
-    @staticmethod
-    def _compute(
-        y_actual: np.ndarray,
-        y_prob: np.ndarray,
-        min_precision: float,
-        min_recall: float = 0.1,
-    ) -> float:
-        fps, tps, thresholds = _binary_clf_curve(y_actual, y_prob)
-        precision = tps / (tps + fps)
-        recall = tps / tps[-1]
-        for k in reversed(range(len(precision))):
-            if precision[k] >= min_precision and recall[k] >= min_recall:
-                return thresholds[k]
-        return float("inf")
-
-    def clone(self) -> "MinPrecisionThreshold":
-        return MinPrecisionThreshold(
-            min_precision=self.min_precision,
-        )