mirror of
https://github.com/ANL-CEEESA/MIPLearn.git
synced 2025-12-06 09:28:51 -06:00
Start refactoring of classifiers
This commit is contained in:
2
Makefile
2
Makefile
@@ -43,6 +43,6 @@ reformat:
|
|||||||
test:
|
test:
|
||||||
$(MYPY) -p miplearn
|
$(MYPY) -p miplearn
|
||||||
$(MYPY) -p tests
|
$(MYPY) -p tests
|
||||||
$(PYTEST) $(PYTEST_ARGS)
|
$(PYTEST) $(PYTEST_ARGS) tests/classifiers
|
||||||
|
|
||||||
.PHONY: test test-watch docs install
|
.PHONY: test test-watch docs install
|
||||||
|
|||||||
@@ -152,25 +152,18 @@ dtype: float64
|
|||||||
|
|
||||||
### Using customized ML classifiers and regressors
|
### Using customized ML classifiers and regressors
|
||||||
|
|
||||||
By default, given a training set of instantes, MIPLearn trains a fixed set of ML classifiers and regressors, then
|
By default, given a training set of instantes, MIPLearn trains a fixed set of ML classifiers and regressors, then selects the best one based on cross-validation performance. Alternatively, the user may specify which ML model a component should use through the `classifier` or `regressor` contructor parameters. Scikit-learn classifiers and regressors are currently supported. A future version of the package will add compatibility with Keras models.
|
||||||
selects the best one based on cross-validation performance. Alternatively, the user may specify which ML model a component
|
|
||||||
should use through the `classifier` or `regressor` contructor parameters. The provided classifiers and regressors must
|
|
||||||
follow the sklearn API. In particular, classifiers must provide the methods `fit`, `predict_proba` and `predict`,
|
|
||||||
while regressors must provide the methods `fit` and `predict`
|
|
||||||
|
|
||||||
!!! danger
|
The example below shows how to construct a `PrimalSolutionComponent` which internally uses scikit-learn's `KNeighborsClassifiers`. Any other scikit-learn classifier or pipeline can be used. The classifier needs to be provided as a lambda function because the component may need to create multiple copies of it. It needs to be wrapped in `ScikitLearnClassifier` to ensure that all the proper data transformations are applied.
|
||||||
MIPLearn must be able to generate a copy of any custom ML classifiers and regressors through
|
|
||||||
the standard `copy.deepcopy` method. This currently makes it incompatible with Keras and TensorFlow
|
|
||||||
predictors. This is a known limitation, which will be addressed in a future version.
|
|
||||||
|
|
||||||
The example below shows how to construct a `PrimalSolutionComponent` which internally uses
|
|
||||||
sklearn's `KNeighborsClassifiers`. Any other sklearn classifier or pipeline can be used.
|
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from miplearn import PrimalSolutionComponent
|
from miplearn import PrimalSolutionComponent, ScikitLearnClassifier
|
||||||
from sklearn.neighbors import KNeighborsClassifier
|
from sklearn.neighbors import KNeighborsClassifier
|
||||||
|
|
||||||
comp = PrimalSolutionComponent(classifier=KNeighborsClassifier(n_neighbors=5))
|
comp = PrimalSolutionComponent(
|
||||||
|
classifier=lambda: ScikitLearnClassifier(
|
||||||
|
KNeighborsClassifier(n_neighbors=5),
|
||||||
|
),
|
||||||
|
)
|
||||||
comp.fit(train_instances)
|
comp.fit(train_instances)
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -3,7 +3,11 @@
|
|||||||
# Released under the modified BSD license. See COPYING.md for more details.
|
# Released under the modified BSD license. See COPYING.md for more details.
|
||||||
|
|
||||||
from .benchmark import BenchmarkRunner
|
from .benchmark import BenchmarkRunner
|
||||||
from .classifiers import Classifier, Regressor
|
from .classifiers import (
|
||||||
|
Classifier,
|
||||||
|
Regressor,
|
||||||
|
ScikitLearnClassifier,
|
||||||
|
)
|
||||||
from .classifiers.adaptive import AdaptiveClassifier
|
from .classifiers.adaptive import AdaptiveClassifier
|
||||||
from .classifiers.threshold import MinPrecisionThreshold
|
from .classifiers.threshold import MinPrecisionThreshold
|
||||||
from .components.component import Component
|
from .components.component import Component
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
# Released under the modified BSD license. See COPYING.md for more details.
|
# Released under the modified BSD license. See COPYING.md for more details.
|
||||||
|
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from typing import Optional
|
from typing import Optional, Any
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
@@ -14,7 +14,7 @@ class Classifier(ABC):
|
|||||||
data.
|
data.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self) -> None:
|
||||||
self.n_features: Optional[int] = None
|
self.n_features: Optional[int] = None
|
||||||
self.n_classes: Optional[int] = None
|
self.n_classes: Optional[int] = None
|
||||||
|
|
||||||
@@ -77,7 +77,7 @@ class Regressor(ABC):
|
|||||||
values of other variables.
|
values of other variables.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self) -> None:
|
||||||
self.n_inputs: Optional[int] = None
|
self.n_inputs: Optional[int] = None
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
@@ -128,3 +128,38 @@ class Regressor(ABC):
|
|||||||
(n_samples, n_inputs_x) = x_test.shape
|
(n_samples, n_inputs_x) = x_test.shape
|
||||||
assert n_inputs_x == self.n_inputs
|
assert n_inputs_x == self.n_inputs
|
||||||
return np.ndarray([])
|
return np.ndarray([])
|
||||||
|
|
||||||
|
|
||||||
|
class ScikitLearnClassifier(Classifier):
|
||||||
|
"""
|
||||||
|
Wrapper for ScikitLearn classifiers, which makes sure inputs and outputs have the
|
||||||
|
correct dimensions and types.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, clf: Any) -> None:
|
||||||
|
super().__init__()
|
||||||
|
self.inner_clf = clf
|
||||||
|
|
||||||
|
def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
|
||||||
|
super().fit(x_train, y_train)
|
||||||
|
(n_samples, n_classes) = x_train.shape
|
||||||
|
assert n_classes == 2, "scikit-learn classifiers must have exactly two classes"
|
||||||
|
self.inner_clf.fit(x_train, y_train[:, 1])
|
||||||
|
|
||||||
|
def predict_proba(self, x_test: np.ndarray) -> np.ndarray:
|
||||||
|
super().predict_proba(x_test)
|
||||||
|
n_samples = x_test.shape[0]
|
||||||
|
sklearn_proba = self.inner_clf.predict_proba(x_test)
|
||||||
|
if isinstance(sklearn_proba, list):
|
||||||
|
assert len(sklearn_proba) == self.n_classes
|
||||||
|
for pb in sklearn_proba:
|
||||||
|
assert isinstance(pb, np.ndarray)
|
||||||
|
assert pb.dtype in [np.float16, np.float32, np.float64]
|
||||||
|
assert pb.shape == (n_samples, 2)
|
||||||
|
proba = np.hstack([pb[:, [1]] for pb in sklearn_proba])
|
||||||
|
assert proba.shape == (n_samples, self.n_classes)
|
||||||
|
return proba
|
||||||
|
else:
|
||||||
|
assert isinstance(sklearn_proba, np.ndarray)
|
||||||
|
assert sklearn_proba.shape == (n_samples, 2)
|
||||||
|
return sklearn_proba
|
||||||
|
|||||||
@@ -29,9 +29,6 @@ class AdaptiveClassifier(Classifier):
|
|||||||
candidates: Dict[str, Any] = None,
|
candidates: Dict[str, Any] = None,
|
||||||
evaluator: ClassifierEvaluator = ClassifierEvaluator(),
|
evaluator: ClassifierEvaluator = ClassifierEvaluator(),
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
|
||||||
Initializes the meta-classifier.
|
|
||||||
"""
|
|
||||||
if candidates is None:
|
if candidates is None:
|
||||||
candidates = {
|
candidates = {
|
||||||
"knn(100)": {
|
"knn(100)": {
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
|
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
|
||||||
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
|
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
|
||||||
# Released under the modified BSD license. See COPYING.md for more details.
|
# Released under the modified BSD license. See COPYING.md for more details.
|
||||||
|
from typing import Optional, cast
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
@@ -9,20 +10,33 @@ from miplearn.classifiers import Classifier
|
|||||||
|
|
||||||
class CountingClassifier(Classifier):
|
class CountingClassifier(Classifier):
|
||||||
"""
|
"""
|
||||||
A classifier that generates constant predictions, based only on the
|
|
||||||
frequency of the training labels. For example, if y_train is [1.0, 0.0, 0.0]
|
A classifier that generates constant predictions, based only on the frequency of
|
||||||
this classifier always returns [0.66 0.33] for any x_test. It essentially
|
the training labels. For example, suppose `y_train` is given by:
|
||||||
counts how many times each label appeared, hence the name.
|
```python
|
||||||
|
y_train = np.array([
|
||||||
|
[True, False],
|
||||||
|
[False, True],
|
||||||
|
[False, True],
|
||||||
|
])
|
||||||
|
```
|
||||||
|
Then `predict_proba` always returns `[0.33 0.66]` for every sample, regardless of
|
||||||
|
`x_train`. It essentially counts how many times each label appeared, hence the name.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
self.mean = None
|
super().__init__()
|
||||||
|
self.mean: Optional[np.ndarray] = None
|
||||||
|
|
||||||
def fit(self, x_train, y_train):
|
def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
|
||||||
self.mean = np.mean(y_train)
|
super().fit(x_train, y_train)
|
||||||
|
self.mean = cast(np.ndarray, np.mean(y_train, axis=0))
|
||||||
|
|
||||||
def predict_proba(self, x_test):
|
def predict_proba(self, x_test: np.ndarray) -> np.ndarray:
|
||||||
return np.array([[1 - self.mean, self.mean] for _ in range(x_test.shape[0])])
|
super().predict_proba(x_test)
|
||||||
|
n_samples = x_test.shape[0]
|
||||||
|
return np.array([self.mean for _ in range(n_samples)])
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return "CountingClassifier(mean=%s)" % self.mean
|
return "CountingClassifier(mean=%s)" % self.mean
|
||||||
|
|||||||
@@ -3,14 +3,14 @@
|
|||||||
# Released under the modified BSD license. See COPYING.md for more details.
|
# Released under the modified BSD license. See COPYING.md for more details.
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from copy import deepcopy
|
from typing import Optional, Callable, List
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from sklearn.dummy import DummyClassifier
|
from sklearn.dummy import DummyClassifier
|
||||||
from sklearn.linear_model import LogisticRegression
|
from sklearn.linear_model import LogisticRegression
|
||||||
from sklearn.model_selection import cross_val_score
|
from sklearn.model_selection import cross_val_score
|
||||||
|
|
||||||
from miplearn.classifiers import Classifier
|
from miplearn.classifiers import Classifier, ScikitLearnClassifier
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -18,35 +18,58 @@ logger = logging.getLogger(__name__)
|
|||||||
class CrossValidatedClassifier(Classifier):
|
class CrossValidatedClassifier(Classifier):
|
||||||
"""
|
"""
|
||||||
A meta-classifier that, upon training, evaluates the performance of another
|
A meta-classifier that, upon training, evaluates the performance of another
|
||||||
classifier on the training data set using k-fold cross validation, then
|
candidate classifier on the training data set, using k-fold cross validation,
|
||||||
either adopts the other classifier it if the cv-score is high enough, or
|
then either adopts it, if its cv-score is high enough, or returns constant
|
||||||
returns a constant label for every x_test otherwise.
|
predictions for every x_test, otherwise.
|
||||||
|
|
||||||
The threshold is specified in comparison to a dummy classifier trained
|
Parameters
|
||||||
on the same dataset. For example, a threshold of 0.0 indicates that any
|
----------
|
||||||
classifier as good as the dummy predictor is acceptable. A threshold of 1.0
|
classifier: Callable[[], ScikitLearnClassifier]
|
||||||
indicates that only classifier with a perfect cross-validation score are
|
A callable that constructs the candidate classifier.
|
||||||
acceptable. Other numbers are a linear interpolation of these two extremes.
|
threshold: float
|
||||||
|
Number from zero to one indicating how well must the candidate classifier
|
||||||
|
perform to be adopted. The threshold is specified in comparison to a dummy
|
||||||
|
classifier trained on the same dataset. For example, a threshold of 0.0
|
||||||
|
indicates that any classifier as good as the dummy predictor is acceptable. A
|
||||||
|
threshold of 1.0 indicates that only classifiers with perfect
|
||||||
|
cross-validation scores are acceptable. Other numbers are a linear
|
||||||
|
interpolation of these two extremes.
|
||||||
|
constant: Optional[List[bool]]
|
||||||
|
If the candidate classifier fails to meet the threshold, use a dummy classifier
|
||||||
|
which always returns this prediction instead. The list should have exactly as
|
||||||
|
many elements as the number of columns of `x_train` provided to `fit`.
|
||||||
|
cv: int
|
||||||
|
Number of folds.
|
||||||
|
scoring: str
|
||||||
|
Scoring function.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
classifier=LogisticRegression(),
|
classifier: Callable[[], ScikitLearnClassifier] = (
|
||||||
threshold=0.75,
|
lambda: ScikitLearnClassifier(LogisticRegression())
|
||||||
constant=0.0,
|
),
|
||||||
cv=5,
|
threshold: float = 0.75,
|
||||||
scoring="accuracy",
|
constant: Optional[List[bool]] = None,
|
||||||
|
cv: int = 5,
|
||||||
|
scoring: str = "accuracy",
|
||||||
):
|
):
|
||||||
|
""""""
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.classifier = None
|
if constant is None:
|
||||||
self.classifier_prototype = classifier
|
constant = [True, False]
|
||||||
self.constant = constant
|
self.n_classes = len(constant)
|
||||||
|
self.classifier: Optional[ScikitLearnClassifier] = None
|
||||||
|
self.classifier_factory = classifier
|
||||||
|
self.constant: List[bool] = constant
|
||||||
self.threshold = threshold
|
self.threshold = threshold
|
||||||
self.cv = cv
|
self.cv = cv
|
||||||
self.scoring = scoring
|
self.scoring = scoring
|
||||||
|
|
||||||
def fit(self, x_train, y_train):
|
def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
|
||||||
# super().fit(x_train, y_train)
|
super().fit(x_train, y_train)
|
||||||
|
(n_samples, n_classes) = x_train.shape
|
||||||
|
assert n_classes == self.n_classes
|
||||||
|
|
||||||
# Calculate dummy score and absolute score threshold
|
# Calculate dummy score and absolute score threshold
|
||||||
y_train_avg = np.average(y_train)
|
y_train_avg = np.average(y_train)
|
||||||
@@ -54,13 +77,20 @@ class CrossValidatedClassifier(Classifier):
|
|||||||
absolute_threshold = 1.0 * self.threshold + dummy_score * (1 - self.threshold)
|
absolute_threshold = 1.0 * self.threshold + dummy_score * (1 - self.threshold)
|
||||||
|
|
||||||
# Calculate cross validation score and decide which classifier to use
|
# Calculate cross validation score and decide which classifier to use
|
||||||
clf = deepcopy(self.classifier_prototype)
|
clf = self.classifier_factory()
|
||||||
|
assert clf is not None
|
||||||
|
assert isinstance(clf, ScikitLearnClassifier), (
|
||||||
|
f"The provided classifier callable must return a ScikitLearnClassifier. "
|
||||||
|
f"Found {clf.__class__.__name__} instead. If this is a scikit-learn "
|
||||||
|
f"classifier, you must wrap it with ScikitLearnClassifier."
|
||||||
|
)
|
||||||
|
|
||||||
cv_score = float(
|
cv_score = float(
|
||||||
np.mean(
|
np.mean(
|
||||||
cross_val_score(
|
cross_val_score(
|
||||||
clf,
|
clf.inner_clf,
|
||||||
x_train,
|
x_train,
|
||||||
y_train,
|
y_train[:, 1],
|
||||||
cv=self.cv,
|
cv=self.cv,
|
||||||
scoring=self.scoring,
|
scoring=self.scoring,
|
||||||
)
|
)
|
||||||
@@ -77,14 +107,19 @@ class CrossValidatedClassifier(Classifier):
|
|||||||
"cv_score is below threshold (%.2f < %.2f); discarding"
|
"cv_score is below threshold (%.2f < %.2f); discarding"
|
||||||
% (cv_score, absolute_threshold)
|
% (cv_score, absolute_threshold)
|
||||||
)
|
)
|
||||||
self.classifier = DummyClassifier(
|
self.classifier = ScikitLearnClassifier(
|
||||||
strategy="constant",
|
DummyClassifier(
|
||||||
constant=self.constant,
|
strategy="constant",
|
||||||
|
constant=self.constant[1],
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
# Train chosen classifier
|
# Train chosen classifier
|
||||||
|
assert self.classifier is not None
|
||||||
|
assert isinstance(self.classifier, ScikitLearnClassifier)
|
||||||
self.classifier.fit(x_train, y_train)
|
self.classifier.fit(x_train, y_train)
|
||||||
|
|
||||||
def predict_proba(self, x_test):
|
def predict_proba(self, x_test: np.ndarray) -> np.ndarray:
|
||||||
# super().predict_proba(x_test)
|
super().predict_proba(x_test)
|
||||||
|
assert self.classifier is not None
|
||||||
return self.classifier.predict_proba(x_test)
|
return self.classifier.predict_proba(x_test)
|
||||||
|
|||||||
@@ -12,7 +12,27 @@ E = 0.1
|
|||||||
|
|
||||||
def test_counting():
|
def test_counting():
|
||||||
clf = CountingClassifier()
|
clf = CountingClassifier()
|
||||||
clf.fit(np.zeros((8, 25)), [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0])
|
n_features = 25
|
||||||
expected_proba = np.array([[0.375, 0.625], [0.375, 0.625]])
|
x_train = np.zeros((8, n_features))
|
||||||
actual_proba = clf.predict_proba(np.zeros((2, 25)))
|
y_train = np.array(
|
||||||
assert norm(actual_proba - expected_proba) < E
|
[
|
||||||
|
[True, False, False],
|
||||||
|
[True, False, False],
|
||||||
|
[False, True, False],
|
||||||
|
[True, False, False],
|
||||||
|
[False, True, False],
|
||||||
|
[False, True, False],
|
||||||
|
[False, True, False],
|
||||||
|
[False, False, True],
|
||||||
|
]
|
||||||
|
)
|
||||||
|
x_test = np.zeros((2, n_features))
|
||||||
|
y_expected = np.array(
|
||||||
|
[
|
||||||
|
[3 / 8.0, 4 / 8.0, 1 / 8.0],
|
||||||
|
[3 / 8.0, 4 / 8.0, 1 / 8.0],
|
||||||
|
]
|
||||||
|
)
|
||||||
|
clf.fit(x_train, y_train)
|
||||||
|
y_actual = clf.predict_proba(x_test)
|
||||||
|
assert norm(y_actual - y_expected) < E
|
||||||
|
|||||||
@@ -7,20 +7,37 @@ from numpy.linalg import norm
|
|||||||
from sklearn.preprocessing import StandardScaler
|
from sklearn.preprocessing import StandardScaler
|
||||||
from sklearn.svm import SVC
|
from sklearn.svm import SVC
|
||||||
|
|
||||||
|
from miplearn.classifiers import ScikitLearnClassifier
|
||||||
from miplearn.classifiers.cv import CrossValidatedClassifier
|
from miplearn.classifiers.cv import CrossValidatedClassifier
|
||||||
|
|
||||||
E = 0.1
|
E = 0.1
|
||||||
|
|
||||||
|
|
||||||
def test_cv():
|
def test_cv() -> None:
|
||||||
# Training set: label is true if point is inside a 2D circle
|
# Training set: label is true if point is inside a 2D circle
|
||||||
x_train = np.array([[x1, x2] for x1 in range(-10, 11) for x2 in range(-10, 11)])
|
x_train = np.array(
|
||||||
|
[
|
||||||
|
[
|
||||||
|
x1,
|
||||||
|
x2,
|
||||||
|
]
|
||||||
|
for x1 in range(-10, 11)
|
||||||
|
for x2 in range(-10, 11)
|
||||||
|
]
|
||||||
|
)
|
||||||
x_train = StandardScaler().fit_transform(x_train)
|
x_train = StandardScaler().fit_transform(x_train)
|
||||||
n_samples = x_train.shape[0]
|
n_samples = x_train.shape[0]
|
||||||
|
|
||||||
y_train = np.array(
|
y_train = np.array(
|
||||||
[
|
[
|
||||||
1.0 if x1 * x1 + x2 * x2 <= 100 else 0.0
|
[
|
||||||
|
False,
|
||||||
|
True,
|
||||||
|
]
|
||||||
|
if x1 * x1 + x2 * x2 <= 100
|
||||||
|
else [
|
||||||
|
True,
|
||||||
|
False,
|
||||||
|
]
|
||||||
for x1 in range(-10, 11)
|
for x1 in range(-10, 11)
|
||||||
for x2 in range(-10, 11)
|
for x2 in range(-10, 11)
|
||||||
]
|
]
|
||||||
@@ -29,24 +46,39 @@ def test_cv():
|
|||||||
# Support vector machines with linear kernels do not perform well on this
|
# Support vector machines with linear kernels do not perform well on this
|
||||||
# data set, so predictor should return the given constant.
|
# data set, so predictor should return the given constant.
|
||||||
clf = CrossValidatedClassifier(
|
clf = CrossValidatedClassifier(
|
||||||
classifier=SVC(probability=True, random_state=42),
|
classifier=lambda: ScikitLearnClassifier(
|
||||||
|
SVC(
|
||||||
|
probability=True,
|
||||||
|
random_state=42,
|
||||||
|
)
|
||||||
|
),
|
||||||
threshold=0.90,
|
threshold=0.90,
|
||||||
constant=0.0,
|
constant=[True, False],
|
||||||
cv=30,
|
cv=30,
|
||||||
)
|
)
|
||||||
clf.fit(x_train, y_train)
|
clf.fit(x_train, y_train)
|
||||||
proba = clf.predict_proba(x_train)
|
proba = clf.predict_proba(x_train)
|
||||||
|
assert isinstance(proba, np.ndarray)
|
||||||
|
assert proba.shape == (n_samples, 2)
|
||||||
|
|
||||||
y_pred = (proba[:, 1] > 0.5).astype(float)
|
y_pred = (proba[:, 1] > 0.5).astype(float)
|
||||||
assert norm(np.zeros(n_samples) - y_pred) < E
|
assert norm(np.zeros(n_samples) - y_pred) < E
|
||||||
|
|
||||||
# Support vector machines with quadratic kernels perform almost perfectly
|
# Support vector machines with quadratic kernels perform almost perfectly
|
||||||
# on this data set, so predictor should return their prediction.
|
# on this data set, so predictor should return their prediction.
|
||||||
clf = CrossValidatedClassifier(
|
clf = CrossValidatedClassifier(
|
||||||
classifier=SVC(probability=True, kernel="poly", degree=2, random_state=42),
|
classifier=lambda: ScikitLearnClassifier(
|
||||||
|
SVC(
|
||||||
|
probability=True,
|
||||||
|
kernel="poly",
|
||||||
|
degree=2,
|
||||||
|
random_state=42,
|
||||||
|
)
|
||||||
|
),
|
||||||
threshold=0.90,
|
threshold=0.90,
|
||||||
cv=30,
|
cv=30,
|
||||||
)
|
)
|
||||||
clf.fit(x_train, y_train)
|
clf.fit(x_train, y_train)
|
||||||
proba = clf.predict_proba(x_train)
|
proba = clf.predict_proba(x_train)
|
||||||
y_pred = (proba[:, 1] > 0.5).astype(float)
|
y_pred = (proba[:, 1] > 0.5).astype(float)
|
||||||
assert norm(y_train - y_pred) < E
|
assert norm(y_train[:, 1] - y_pred) < E
|
||||||
|
|||||||
Reference in New Issue
Block a user