mirror of
https://github.com/ANL-CEEESA/MIPLearn.git
synced 2025-12-06 09:28:51 -06:00
Document and simplify Classifier and Regressor
This commit is contained in:
1
Makefile
1
Makefile
@@ -42,6 +42,7 @@ reformat:
|
|||||||
|
|
||||||
test:
|
test:
|
||||||
$(MYPY) -p miplearn
|
$(MYPY) -p miplearn
|
||||||
|
$(MYPY) -p tests
|
||||||
$(PYTEST) $(PYTEST_ARGS)
|
$(PYTEST) $(PYTEST_ARGS)
|
||||||
|
|
||||||
.PHONY: test test-watch docs install
|
.PHONY: test test-watch docs install
|
||||||
|
|||||||
@@ -3,31 +3,128 @@
|
|||||||
# Released under the modified BSD license. See COPYING.md for more details.
|
# Released under the modified BSD license. See COPYING.md for more details.
|
||||||
|
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
class Classifier(ABC):
|
class Classifier(ABC):
|
||||||
@abstractmethod
|
"""
|
||||||
def fit(self, x_train, y_train):
|
A Classifier decides which class each sample belongs to, based on historical
|
||||||
pass
|
data.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.n_features: Optional[int] = None
|
||||||
|
self.n_classes: Optional[int] = None
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def predict_proba(self, x_test):
|
def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
|
||||||
pass
|
"""
|
||||||
|
Trains the classifier.
|
||||||
|
|
||||||
def predict(self, x_test):
|
Parameters
|
||||||
proba = self.predict_proba(x_test)
|
----------
|
||||||
assert isinstance(proba, np.ndarray)
|
x_train: np.ndarray
|
||||||
assert proba.shape == (x_test.shape[0], 2)
|
An array of features with shape (`n_samples`, `n_features`). Each entry
|
||||||
return (proba[:, 1] > 0.5).astype(float)
|
must be a float.
|
||||||
|
y_train: np.ndarray
|
||||||
|
An array of labels with shape (`n_samples`, `n_classes`). Each entry must be
|
||||||
|
a bool, and there must be exactly one True element in each row.
|
||||||
|
"""
|
||||||
|
assert isinstance(x_train, np.ndarray)
|
||||||
|
assert isinstance(y_train, np.ndarray)
|
||||||
|
assert x_train.dtype in [np.float16, np.float32, np.float64]
|
||||||
|
assert y_train.dtype == np.bool8
|
||||||
|
assert len(x_train.shape) == 2
|
||||||
|
assert len(y_train.shape) == 2
|
||||||
|
(n_samples_x, n_features) = x_train.shape
|
||||||
|
(n_samples_y, n_classes) = y_train.shape
|
||||||
|
assert n_samples_y == n_samples_x
|
||||||
|
self.n_features = n_features
|
||||||
|
self.n_classes = n_classes
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def predict_proba(self, x_test: np.ndarray) -> np.ndarray:
|
||||||
|
"""
|
||||||
|
Predicts the probability of each sample belonging to each class. Must be called
|
||||||
|
after fit.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
x_test: np.ndarray
|
||||||
|
An array of features with shape (`n_samples`, `n_features`). The number of
|
||||||
|
features in `x_test` must match the number of features in `x_train` provided
|
||||||
|
to `fit`.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
np.ndarray
|
||||||
|
An array of predicted probabilities with shape (`n_samples`, `n_classes`),
|
||||||
|
where `n_classes` is the number of columns in `y_train` provided to `fit`.
|
||||||
|
"""
|
||||||
|
assert self.n_features is not None
|
||||||
|
assert isinstance(x_test, np.ndarray)
|
||||||
|
assert len(x_test.shape) == 2
|
||||||
|
(n_samples, n_features_x) = x_test.shape
|
||||||
|
assert n_features_x == self.n_features
|
||||||
|
return np.ndarray([])
|
||||||
|
|
||||||
|
|
||||||
class Regressor(ABC):
|
class Regressor(ABC):
|
||||||
@abstractmethod
|
"""
|
||||||
def fit(self, x_train, y_train):
|
A Regressor tries to predict the values of some continous variables, given the
|
||||||
pass
|
values of other variables.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.n_inputs: Optional[int] = None
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def predict(self):
|
def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
|
||||||
pass
|
"""
|
||||||
|
Trains the regressor.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
x_train: np.ndarray
|
||||||
|
An array of inputs with shape (`n_samples`, `n_inputs`). Each entry must be
|
||||||
|
a float.
|
||||||
|
y_train: np.ndarray
|
||||||
|
An array of outputs with shape (`n_samples`, `n_outputs`). Each entry must
|
||||||
|
be a float.
|
||||||
|
"""
|
||||||
|
assert isinstance(x_train, np.ndarray)
|
||||||
|
assert isinstance(y_train, np.ndarray)
|
||||||
|
assert x_train.dtype in [np.float16, np.float32, np.float64]
|
||||||
|
assert y_train.dtype in [np.float16, np.float32, np.float64]
|
||||||
|
assert len(x_train.shape) == 2
|
||||||
|
assert len(y_train.shape) == 2
|
||||||
|
(n_samples_x, n_inputs) = x_train.shape
|
||||||
|
(n_samples_y, n_outputs) = y_train.shape
|
||||||
|
assert n_samples_y == n_samples_x
|
||||||
|
self.n_inputs = n_inputs
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def predict(self, x_test: np.ndarray) -> np.ndarray:
|
||||||
|
"""
|
||||||
|
Predicts the values of the output variables. Must be called after fit.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
x_test: np.ndarray
|
||||||
|
An array of inputs with shape (`n_samples`, `n_inputs`), where `n_inputs`
|
||||||
|
must match the number of columns in `x_train` provided to `fit`.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
np.ndarray
|
||||||
|
An array of outputs with shape (`n_samples`, `n_outputs`), where
|
||||||
|
`n_outputs` is the number of columns in `y_train` provided to `fit`.
|
||||||
|
"""
|
||||||
|
assert self.n_inputs is not None
|
||||||
|
assert isinstance(x_test, np.ndarray)
|
||||||
|
assert len(x_test.shape) == 2
|
||||||
|
(n_samples, n_inputs_x) = x_test.shape
|
||||||
|
assert n_inputs_x == self.n_inputs
|
||||||
|
return np.ndarray([])
|
||||||
|
|||||||
@@ -37,6 +37,7 @@ class CrossValidatedClassifier(Classifier):
|
|||||||
cv=5,
|
cv=5,
|
||||||
scoring="accuracy",
|
scoring="accuracy",
|
||||||
):
|
):
|
||||||
|
super().__init__()
|
||||||
self.classifier = None
|
self.classifier = None
|
||||||
self.classifier_prototype = classifier
|
self.classifier_prototype = classifier
|
||||||
self.constant = constant
|
self.constant = constant
|
||||||
@@ -45,6 +46,8 @@ class CrossValidatedClassifier(Classifier):
|
|||||||
self.scoring = scoring
|
self.scoring = scoring
|
||||||
|
|
||||||
def fit(self, x_train, y_train):
|
def fit(self, x_train, y_train):
|
||||||
|
# super().fit(x_train, y_train)
|
||||||
|
|
||||||
# Calculate dummy score and absolute score threshold
|
# Calculate dummy score and absolute score threshold
|
||||||
y_train_avg = np.average(y_train)
|
y_train_avg = np.average(y_train)
|
||||||
dummy_score = max(y_train_avg, 1 - y_train_avg)
|
dummy_score = max(y_train_avg, 1 - y_train_avg)
|
||||||
@@ -83,4 +86,5 @@ class CrossValidatedClassifier(Classifier):
|
|||||||
self.classifier.fit(x_train, y_train)
|
self.classifier.fit(x_train, y_train)
|
||||||
|
|
||||||
def predict_proba(self, x_test):
|
def predict_proba(self, x_test):
|
||||||
|
# super().predict_proba(x_test)
|
||||||
return self.classifier.predict_proba(x_test)
|
return self.classifier.predict_proba(x_test)
|
||||||
|
|||||||
@@ -35,7 +35,9 @@ def test_cv():
|
|||||||
cv=30,
|
cv=30,
|
||||||
)
|
)
|
||||||
clf.fit(x_train, y_train)
|
clf.fit(x_train, y_train)
|
||||||
assert norm(np.zeros(n_samples) - clf.predict(x_train)) < E
|
proba = clf.predict_proba(x_train)
|
||||||
|
y_pred = (proba[:, 1] > 0.5).astype(float)
|
||||||
|
assert norm(np.zeros(n_samples) - y_pred) < E
|
||||||
|
|
||||||
# Support vector machines with quadratic kernels perform almost perfectly
|
# Support vector machines with quadratic kernels perform almost perfectly
|
||||||
# on this data set, so predictor should return their prediction.
|
# on this data set, so predictor should return their prediction.
|
||||||
@@ -45,5 +47,6 @@ def test_cv():
|
|||||||
cv=30,
|
cv=30,
|
||||||
)
|
)
|
||||||
clf.fit(x_train, y_train)
|
clf.fit(x_train, y_train)
|
||||||
print(y_train - clf.predict(x_train))
|
proba = clf.predict_proba(x_train)
|
||||||
assert norm(y_train - clf.predict(x_train)) < E
|
y_pred = (proba[:, 1] > 0.5).astype(float)
|
||||||
|
assert norm(y_train - y_pred) < E
|
||||||
|
|||||||
Reference in New Issue
Block a user