MIPLearn v0.3

This commit is contained in:
2023-06-08 11:25:39 -05:00
parent 6cc253a903
commit 1ea989d48a
172 changed files with 10495 additions and 24812 deletions

View File

@@ -1,163 +1,3 @@
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
# Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
# Copyright (C) 2020-2022, UChicago Argonne, LLC. All rights reserved.
# Released under the modified BSD license. See COPYING.md for more details.
from abc import ABC, abstractmethod
from typing import Optional
import numpy as np
class Classifier(ABC):
"""
A Classifier decides which class each sample belongs to, based on historical
data.
"""
def __init__(self) -> None:
self.n_features: Optional[int] = None
self.n_classes: Optional[int] = None
@abstractmethod
def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
"""
Trains the classifier.
Parameters
----------
x_train: np.ndarray
An array of features with shape (`n_samples`, `n_features`). Each entry
must be a float.
y_train: np.ndarray
An array of labels with shape (`n_samples`, `n_classes`). Each entry must be
a bool, and there must be exactly one True element in each row.
"""
assert isinstance(x_train, np.ndarray)
assert isinstance(y_train, np.ndarray)
assert x_train.dtype in [
np.float16,
np.float32,
np.float64,
], f"x_train.dtype should be float. Found {x_train.dtype} instead."
assert y_train.dtype == np.bool8
assert len(x_train.shape) == 2
assert len(y_train.shape) == 2
(n_samples_x, n_features) = x_train.shape
(n_samples_y, n_classes) = y_train.shape
assert n_samples_y == n_samples_x
self.n_features = n_features
self.n_classes = n_classes
@abstractmethod
def predict_proba(self, x_test: np.ndarray) -> np.ndarray:
"""
Predicts the probability of each sample belonging to each class. Must be called
after fit.
Parameters
----------
x_test: np.ndarray
An array of features with shape (`n_samples`, `n_features`). The number of
features in `x_test` must match the number of features in `x_train` provided
to `fit`.
Returns
-------
np.ndarray
An array of predicted probabilities with shape (`n_samples`, `n_classes`),
where `n_classes` is the number of columns in `y_train` provided to `fit`.
"""
assert self.n_features is not None
assert isinstance(x_test, np.ndarray)
assert len(x_test.shape) == 2
(n_samples, n_features_x) = x_test.shape
assert n_features_x == self.n_features, (
f"Test and training data have different number of "
f"features: {n_features_x} != {self.n_features}"
)
return np.ndarray([])
@abstractmethod
def clone(self) -> "Classifier":
"""
Returns an unfitted copy of this classifier with the same hyperparameters.
"""
pass
class Regressor(ABC):
"""
A Regressor tries to predict the values of some continous variables, given the
values of other variables.
"""
def __init__(self) -> None:
self.n_inputs: Optional[int] = None
@abstractmethod
def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
"""
Trains the regressor.
Parameters
----------
x_train: np.ndarray
An array of inputs with shape (`n_samples`, `n_inputs`). Each entry must be
a float.
y_train: np.ndarray
An array of outputs with shape (`n_samples`, `n_outputs`). Each entry must
be a float.
"""
assert isinstance(x_train, np.ndarray)
assert isinstance(y_train, np.ndarray)
assert x_train.dtype in [np.float16, np.float32, np.float64]
assert y_train.dtype in [np.float16, np.float32, np.float64]
assert len(x_train.shape) == 2, (
f"Parameter x_train should be a square matrix. "
f"Found {x_train.shape} ndarray instead."
)
assert len(y_train.shape) == 2, (
f"Parameter y_train should be a square matrix. "
f"Found {y_train.shape} ndarray instead."
)
(n_samples_x, n_inputs) = x_train.shape
(n_samples_y, n_outputs) = y_train.shape
assert n_samples_y == n_samples_x
self.n_inputs = n_inputs
@abstractmethod
def predict(self, x_test: np.ndarray) -> np.ndarray:
"""
Predicts the values of the output variables. Must be called after fit.
Parameters
----------
x_test: np.ndarray
An array of inputs with shape (`n_samples`, `n_inputs`), where `n_inputs`
must match the number of columns in `x_train` provided to `fit`.
Returns
-------
np.ndarray
An array of outputs with shape (`n_samples`, `n_outputs`), where
`n_outputs` is the number of columns in `y_train` provided to `fit`.
"""
assert self.n_inputs is not None
assert isinstance(x_test, np.ndarray), (
f"Parameter x_train must be np.ndarray. "
f"Found {x_test.__class__.__name__} instead."
)
assert len(x_test.shape) == 2
(n_samples, n_inputs_x) = x_test.shape
assert n_inputs_x == self.n_inputs, (
f"Test and training data have different number of "
f"inputs: {n_inputs_x} != {self.n_inputs}"
)
return np.ndarray([])
@abstractmethod
def clone(self) -> "Regressor":
"""
Returns an unfitted copy of this regressor with the same hyperparameters.
"""
pass

View File

@@ -1,135 +0,0 @@
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
# Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
# Released under the modified BSD license. See COPYING.md for more details.
import logging
from typing import Dict, Optional
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import cross_val_predict
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from miplearn.classifiers import Classifier
from miplearn.classifiers.counting import CountingClassifier
from miplearn.classifiers.sklearn import ScikitLearnClassifier
logger = logging.getLogger(__name__)
class CandidateClassifierSpecs:
"""
Specifications describing how to construct a certain classifier, and under
which circumstances it can be used.
Parameters
----------
min_samples: int
Minimum number of samples for this classifier to be considered.
classifier: Callable[[], Classifier]
Callable that constructs the classifier.
"""
def __init__(
self,
classifier: Classifier,
min_samples: int = 0,
) -> None:
self.min_samples = min_samples
self.classifier = classifier
class AdaptiveClassifier(Classifier):
"""
A meta-classifier which dynamically selects what actual classifier to use
based on its cross-validation score on a particular training data set.
Parameters
----------
candidates: Dict[str, CandidateClassifierSpecs]
A dictionary of candidate classifiers to consider, mapping the name of the
candidate to its specs, which describes how to construct it and under what
scenarios. If no candidates are provided, uses a fixed set of defaults,
which includes `CountingClassifier`, `KNeighborsClassifier` and
`LogisticRegression`.
"""
def __init__(
self,
candidates: Optional[Dict[str, CandidateClassifierSpecs]] = None,
) -> None:
super().__init__()
if candidates is None:
candidates = {
"forest(5,10)": CandidateClassifierSpecs(
classifier=ScikitLearnClassifier(
RandomForestClassifier(
n_estimators=5,
min_samples_split=10,
),
),
min_samples=100,
),
"knn(100)": CandidateClassifierSpecs(
classifier=ScikitLearnClassifier(
KNeighborsClassifier(n_neighbors=100)
),
min_samples=100,
),
"logistic": CandidateClassifierSpecs(
classifier=ScikitLearnClassifier(
make_pipeline(
StandardScaler(),
LogisticRegression(),
)
),
min_samples=30,
),
"counting": CandidateClassifierSpecs(
classifier=CountingClassifier(),
),
}
self.candidates = candidates
self.classifier: Optional[Classifier] = None
def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
super().fit(x_train, y_train)
n_samples = x_train.shape[0]
assert y_train.shape == (n_samples, 2)
# If almost all samples belong to the same class, return a fixed prediction and
# skip all the other steps.
if y_train[:, 0].mean() > 0.99 or y_train[:, 1].mean() > 0.99:
self.classifier = CountingClassifier()
self.classifier.fit(x_train, y_train)
return
best_name, best_clf, best_score = None, None, -float("inf")
for (name, specs) in self.candidates.items():
if n_samples < specs.min_samples:
continue
clf = specs.classifier.clone()
if isinstance(clf, ScikitLearnClassifier):
proba = cross_val_predict(clf.inner_clf, x_train, y_train[:, 1])
else:
clf.fit(x_train, y_train)
proba = clf.predict_proba(x_train)[:, 1]
score = roc_auc_score(y_train[:, 1], proba)
if score > best_score:
best_name, best_clf, best_score = name, clf, score
logger.debug("Best classifier: %s (score=%.3f)" % (best_name, best_score))
if isinstance(best_clf, ScikitLearnClassifier):
best_clf.fit(x_train, y_train)
self.classifier = best_clf
def predict_proba(self, x_test: np.ndarray) -> np.ndarray:
super().predict_proba(x_test)
assert self.classifier is not None
return self.classifier.predict_proba(x_test)
def clone(self) -> "AdaptiveClassifier":
return AdaptiveClassifier(self.candidates)

View File

@@ -1,45 +0,0 @@
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
# Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
# Released under the modified BSD license. See COPYING.md for more details.
from typing import Optional, cast
import numpy as np
from miplearn.classifiers import Classifier
class CountingClassifier(Classifier):
"""
A classifier that generates constant predictions, based only on the frequency of
the training labels. For example, suppose `y_train` is given by:
```python
y_train = np.array([
[True, False],
[False, True],
[False, True],
])
```
Then `predict_proba` always returns `[0.33 0.66]` for every sample, regardless of
`x_train`. It essentially counts how many times each label appeared, hence the name.
"""
def __init__(self) -> None:
super().__init__()
self.mean: Optional[np.ndarray] = None
def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
super().fit(x_train, y_train)
self.mean = cast(np.ndarray, np.mean(y_train, axis=0))
def predict_proba(self, x_test: np.ndarray) -> np.ndarray:
super().predict_proba(x_test)
n_samples = x_test.shape[0]
return np.array([self.mean for _ in range(n_samples)])
def __repr__(self) -> str:
return "CountingClassifier(mean=%s)" % self.mean
def clone(self) -> "CountingClassifier":
return CountingClassifier()

View File

@@ -1,132 +0,0 @@
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
# Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
# Released under the modified BSD license. See COPYING.md for more details.
import logging
from typing import Optional, List
import numpy as np
from sklearn.dummy import DummyClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from miplearn.classifiers import Classifier
from miplearn.classifiers.sklearn import ScikitLearnClassifier
logger = logging.getLogger(__name__)
class CrossValidatedClassifier(Classifier):
"""
A meta-classifier that, upon training, evaluates the performance of another
candidate classifier on the training data set, using k-fold cross validation,
then either adopts it, if its cv-score is high enough, or returns constant
predictions for every x_test, otherwise.
Parameters
----------
classifier: Callable[[], ScikitLearnClassifier]
A callable that constructs the candidate classifier.
threshold: float
Number from zero to one indicating how well must the candidate classifier
perform to be adopted. The threshold is specified in comparison to a dummy
classifier trained on the same dataset. For example, a threshold of 0.0
indicates that any classifier as good as the dummy predictor is acceptable. A
threshold of 1.0 indicates that only classifiers with perfect
cross-validation scores are acceptable. Other numbers are a linear
interpolation of these two extremes.
constant: Optional[List[bool]]
If the candidate classifier fails to meet the threshold, use a dummy classifier
which always returns this prediction instead. The list should have exactly as
many elements as the number of columns of `x_train` provided to `fit`.
cv: int
Number of folds.
scoring: str
Scoring function.
"""
def __init__(
self,
classifier: ScikitLearnClassifier = ScikitLearnClassifier(LogisticRegression()),
threshold: float = 0.75,
constant: Optional[List[bool]] = None,
cv: int = 5,
scoring: str = "accuracy",
):
super().__init__()
if constant is None:
constant = [True, False]
self.n_classes = len(constant)
self.classifier: Optional[ScikitLearnClassifier] = None
self.classifier_prototype = classifier
self.constant: List[bool] = constant
self.threshold = threshold
self.cv = cv
self.scoring = scoring
def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
super().fit(x_train, y_train)
(n_samples, n_classes) = x_train.shape
assert n_classes == self.n_classes
# Calculate dummy score and absolute score threshold
y_train_avg = np.average(y_train)
dummy_score = max(y_train_avg, 1 - y_train_avg)
absolute_threshold = 1.0 * self.threshold + dummy_score * (1 - self.threshold)
# Calculate cross validation score and decide which classifier to use
clf = self.classifier_prototype.clone()
assert clf is not None
assert isinstance(clf, ScikitLearnClassifier), (
f"The provided classifier callable must return a ScikitLearnClassifier. "
f"Found {clf.__class__.__name__} instead. If this is a scikit-learn "
f"classifier, you must wrap it with ScikitLearnClassifier."
)
cv_score = float(
np.mean(
cross_val_score(
clf.inner_clf,
x_train,
y_train[:, 1],
cv=self.cv,
scoring=self.scoring,
)
)
)
if cv_score >= absolute_threshold:
logger.debug(
"cv_score is above threshold (%.2f >= %.2f); keeping"
% (cv_score, absolute_threshold)
)
self.classifier = clf
else:
logger.debug(
"cv_score is below threshold (%.2f < %.2f); discarding"
% (cv_score, absolute_threshold)
)
self.classifier = ScikitLearnClassifier(
DummyClassifier(
strategy="constant",
constant=self.constant[1],
)
)
# Train chosen classifier
assert self.classifier is not None
assert isinstance(self.classifier, ScikitLearnClassifier)
self.classifier.fit(x_train, y_train)
def predict_proba(self, x_test: np.ndarray) -> np.ndarray:
super().predict_proba(x_test)
assert self.classifier is not None
return self.classifier.predict_proba(x_test)
def clone(self) -> "CrossValidatedClassifier":
return CrossValidatedClassifier(
classifier=self.classifier_prototype,
threshold=self.threshold,
constant=self.constant,
cv=self.cv,
scoring=self.scoring,
)

View File

@@ -0,0 +1,61 @@
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
# Copyright (C) 2020-2022, UChicago Argonne, LLC. All rights reserved.
# Released under the modified BSD license. See COPYING.md for more details.
from typing import List, Any, Callable, Optional
import numpy as np
import sklearn
from sklearn.base import BaseEstimator
from sklearn.utils.multiclass import unique_labels
class MinProbabilityClassifier(BaseEstimator):
"""
Meta-classifier that returns NaN for predictions made by a base classifier that
have probability below a given threshold. More specifically, this meta-classifier
calls base_clf.predict_proba and compares the result against the provided
thresholds. If the probability for one of the classes is above its threshold,
the meta-classifier returns that prediction. Otherwise, it returns NaN.
"""
def __init__(
self,
base_clf: Any,
thresholds: List[float],
clone_fn: Callable[[Any], Any] = sklearn.base.clone,
) -> None:
assert len(thresholds) == 2
self.base_clf = base_clf
self.thresholds = thresholds
self.clone_fn = clone_fn
self.clf_: Optional[Any] = None
self.classes_: Optional[List[Any]] = None
def fit(self, x: np.ndarray, y: np.ndarray) -> None:
assert len(y.shape) == 1
assert len(x.shape) == 2
classes = unique_labels(y)
assert len(classes) == len(self.thresholds)
self.clf_ = self.clone_fn(self.base_clf)
self.clf_.fit(x, y)
self.classes_ = self.clf_.classes_
def predict(self, x: np.ndarray) -> np.ndarray:
assert self.clf_ is not None
assert self.classes_ is not None
y_proba = self.clf_.predict_proba(x)
assert len(y_proba.shape) == 2
assert y_proba.shape[0] == x.shape[0]
assert y_proba.shape[1] == 2
n_samples = x.shape[0]
y_pred = []
for sample_idx in range(n_samples):
yi = float("nan")
for (class_idx, class_val) in enumerate(self.classes_):
if y_proba[sample_idx, class_idx] >= self.thresholds[class_idx]:
yi = class_val
y_pred.append(yi)
return np.array(y_pred)

View File

@@ -0,0 +1,51 @@
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
# Copyright (C) 2020-2022, UChicago Argonne, LLC. All rights reserved.
# Released under the modified BSD license. See COPYING.md for more details.
from typing import Callable, Optional
import numpy as np
import sklearn.base
from sklearn.base import BaseEstimator
from sklearn.utils.multiclass import unique_labels
class SingleClassFix(BaseEstimator):
"""
Some sklearn classifiers, such as logistic regression, have issues with datasets
that contain a single class. This meta-classifier fixes the issue. If the
training data contains a single class, this meta-classifier always returns that
class as a prediction. Otherwise, it fits the provided base classifier,
and returns its predictions instead.
"""
def __init__(
self,
base_clf: BaseEstimator,
clone_fn: Callable = sklearn.base.clone,
):
self.base_clf = base_clf
self.clf_: Optional[BaseEstimator] = None
self.constant_ = None
self.classes_ = None
self.clone_fn = clone_fn
def fit(self, x: np.ndarray, y: np.ndarray) -> None:
classes = unique_labels(y)
if len(classes) == 1:
assert classes[0] is not None
self.clf_ = None
self.constant_ = classes[0]
self.classes_ = classes
else:
self.clf_ = self.clone_fn(self.base_clf)
assert self.clf_ is not None
self.clf_.fit(x, y)
self.constant_ = None
self.classes_ = self.clf_.classes_
def predict(self, x: np.ndarray) -> np.ndarray:
if self.constant_ is not None:
return np.full(x.shape[0], self.constant_)
else:
assert self.clf_ is not None
return self.clf_.predict(x)

View File

@@ -1,93 +0,0 @@
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
# Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
# Released under the modified BSD license. See COPYING.md for more details.
from typing import Optional, Any, cast
import numpy as np
import sklearn
from miplearn.classifiers import Classifier, Regressor
class ScikitLearnClassifier(Classifier):
"""
Wrapper for ScikitLearn classifiers, which makes sure inputs and outputs have the
correct dimensions and types.
"""
def __init__(self, clf: Any) -> None:
super().__init__()
self.inner_clf = clf
self.constant: Optional[np.ndarray] = None
def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
super().fit(x_train, y_train)
(n_samples, n_classes) = y_train.shape
assert n_classes == 2, (
f"Scikit-learn classifiers must have exactly two classes. "
f"{n_classes} classes were provided instead."
)
# When all samples belong to the same class, sklearn's predict_proba returns
# an array with a single column. The following check avoid this strange
# behavior.
mean = cast(np.ndarray, y_train.astype(float).mean(axis=0))
if mean.max() == 1.0:
self.constant = mean
return
self.inner_clf.fit(x_train, y_train[:, 1])
def predict_proba(self, x_test: np.ndarray) -> np.ndarray:
super().predict_proba(x_test)
n_samples = x_test.shape[0]
if self.constant is not None:
return np.array([self.constant for n in range(n_samples)])
sklearn_proba = self.inner_clf.predict_proba(x_test)
if isinstance(sklearn_proba, list):
assert len(sklearn_proba) == self.n_classes
for pb in sklearn_proba:
assert isinstance(pb, np.ndarray)
assert pb.dtype in [np.float16, np.float32, np.float64]
assert pb.shape == (n_samples, 2)
proba = np.hstack([pb[:, [1]] for pb in sklearn_proba])
assert proba.shape == (n_samples, self.n_classes)
return proba
else:
assert isinstance(sklearn_proba, np.ndarray)
assert sklearn_proba.shape == (n_samples, 2)
return sklearn_proba
def clone(self) -> "ScikitLearnClassifier":
return ScikitLearnClassifier(
clf=sklearn.base.clone(self.inner_clf),
)
class ScikitLearnRegressor(Regressor):
"""
Wrapper for ScikitLearn regressors, which makes sure inputs and outputs have the
correct dimensions and types.
"""
def __init__(self, reg: Any) -> None:
super().__init__()
self.inner_reg = reg
def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
super().fit(x_train, y_train)
self.inner_reg.fit(x_train, y_train)
def predict(self, x_test: np.ndarray) -> np.ndarray:
super().predict(x_test)
n_samples = x_test.shape[0]
sklearn_pred = self.inner_reg.predict(x_test)
assert isinstance(sklearn_pred, np.ndarray)
assert sklearn_pred.shape[0] == n_samples
return sklearn_pred
def clone(self) -> "ScikitLearnRegressor":
return ScikitLearnRegressor(
reg=sklearn.base.clone(self.inner_reg),
)

View File

@@ -1,143 +0,0 @@
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
# Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
# Released under the modified BSD license. See COPYING.md for more details.
from abc import abstractmethod, ABC
from typing import Optional, List
import numpy as np
from sklearn.metrics._ranking import _binary_clf_curve
from sklearn.model_selection import cross_val_predict
from miplearn.classifiers.sklearn import ScikitLearnClassifier
from miplearn.classifiers.adaptive import AdaptiveClassifier
from miplearn.classifiers import Classifier
class Threshold(ABC):
"""
Solver components ask the machine learning models how confident are they on each
prediction they make, then automatically discard all predictions that have low
confidence. A Threshold specifies how confident should the ML models be for a
prediction to be considered trustworthy.
To model dynamic thresholds, which automatically adjust themselves during
training to reach some desired target (such as minimum precision, or minimum
recall), thresholds behave somewhat similar to ML models themselves, with `fit`
and `predict` methods.
"""
@abstractmethod
def fit(
self,
clf: Classifier,
x_train: np.ndarray,
y_train: np.ndarray,
) -> None:
"""
Given a trained binary classifier `clf`, calibrates itself based on the
classifier's performance on the given training data set.
"""
assert isinstance(clf, Classifier)
assert isinstance(x_train, np.ndarray)
assert isinstance(y_train, np.ndarray)
n_samples = x_train.shape[0]
assert y_train.shape[0] == n_samples
@abstractmethod
def predict(self, x_test: np.ndarray) -> List[float]:
"""
Returns the minimum probability for a machine learning prediction to be
considered trustworthy. There is one value for each label.
"""
pass
@abstractmethod
def clone(self) -> "Threshold":
"""
Returns an unfitted copy of this threshold with the same hyperparameters.
"""
pass
class MinProbabilityThreshold(Threshold):
"""
A threshold which considers predictions trustworthy if their probability of being
correct, as computed by the machine learning models, are above a fixed value.
"""
def __init__(self, min_probability: List[float]):
self.min_probability = min_probability
def fit(self, clf: Classifier, x_train: np.ndarray, y_train: np.ndarray) -> None:
pass
def predict(self, x_test: np.ndarray) -> List[float]:
return self.min_probability
def clone(self) -> "MinProbabilityThreshold":
return MinProbabilityThreshold(self.min_probability)
class MinPrecisionThreshold(Threshold):
"""
A dynamic threshold which automatically adjusts itself during training to ensure
that the component achieves at least a given precision `p` on the training data
set. Note that increasing a component's minimum precision may reduce its recall.
"""
def __init__(self, min_precision: List[float]) -> None:
self.min_precision = min_precision
self._computed_threshold: Optional[List[float]] = None
def fit(
self,
clf: Classifier,
x_train: np.ndarray,
y_train: np.ndarray,
) -> None:
super().fit(clf, x_train, y_train)
(n_samples, n_classes) = y_train.shape
if isinstance(clf, AdaptiveClassifier) and isinstance(
clf.classifier, ScikitLearnClassifier
):
proba = cross_val_predict(
clf.classifier.inner_clf,
x_train,
y_train[:, 1],
method="predict_proba",
)
else:
proba = clf.predict_proba(x_train)
self._computed_threshold = [
self._compute(
y_train[:, i],
proba[:, i],
self.min_precision[i],
)
for i in range(n_classes)
]
def predict(self, x_test: np.ndarray) -> List[float]:
assert self._computed_threshold is not None
return self._computed_threshold
@staticmethod
def _compute(
y_actual: np.ndarray,
y_prob: np.ndarray,
min_precision: float,
min_recall: float = 0.1,
) -> float:
fps, tps, thresholds = _binary_clf_curve(y_actual, y_prob)
precision = tps / (tps + fps)
recall = tps / tps[-1]
for k in reversed(range(len(precision))):
if precision[k] >= min_precision and recall[k] >= min_recall:
return thresholds[k]
return float("inf")
def clone(self) -> "MinPrecisionThreshold":
return MinPrecisionThreshold(
min_precision=self.min_precision,
)