mirror of
https://github.com/ANL-CEEESA/MIPLearn.git
synced 2025-12-06 01:18:52 -06:00
Document and simplify Classifier and Regressor
This commit is contained in:
1
Makefile
1
Makefile
@@ -42,6 +42,7 @@ reformat:
|
||||
|
||||
test:
|
||||
$(MYPY) -p miplearn
|
||||
$(MYPY) -p tests
|
||||
$(PYTEST) $(PYTEST_ARGS)
|
||||
|
||||
.PHONY: test test-watch docs install
|
||||
|
||||
@@ -3,31 +3,128 @@
|
||||
# Released under the modified BSD license. See COPYING.md for more details.
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Optional
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
class Classifier(ABC):
|
||||
@abstractmethod
|
||||
def fit(self, x_train, y_train):
|
||||
pass
|
||||
"""
|
||||
A Classifier decides which class each sample belongs to, based on historical
|
||||
data.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.n_features: Optional[int] = None
|
||||
self.n_classes: Optional[int] = None
|
||||
|
||||
@abstractmethod
|
||||
def predict_proba(self, x_test):
|
||||
pass
|
||||
def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
|
||||
"""
|
||||
Trains the classifier.
|
||||
|
||||
def predict(self, x_test):
|
||||
proba = self.predict_proba(x_test)
|
||||
assert isinstance(proba, np.ndarray)
|
||||
assert proba.shape == (x_test.shape[0], 2)
|
||||
return (proba[:, 1] > 0.5).astype(float)
|
||||
Parameters
|
||||
----------
|
||||
x_train: np.ndarray
|
||||
An array of features with shape (`n_samples`, `n_features`). Each entry
|
||||
must be a float.
|
||||
y_train: np.ndarray
|
||||
An array of labels with shape (`n_samples`, `n_classes`). Each entry must be
|
||||
a bool, and there must be exactly one True element in each row.
|
||||
"""
|
||||
assert isinstance(x_train, np.ndarray)
|
||||
assert isinstance(y_train, np.ndarray)
|
||||
assert x_train.dtype in [np.float16, np.float32, np.float64]
|
||||
assert y_train.dtype == np.bool8
|
||||
assert len(x_train.shape) == 2
|
||||
assert len(y_train.shape) == 2
|
||||
(n_samples_x, n_features) = x_train.shape
|
||||
(n_samples_y, n_classes) = y_train.shape
|
||||
assert n_samples_y == n_samples_x
|
||||
self.n_features = n_features
|
||||
self.n_classes = n_classes
|
||||
|
||||
@abstractmethod
|
||||
def predict_proba(self, x_test: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Predicts the probability of each sample belonging to each class. Must be called
|
||||
after fit.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x_test: np.ndarray
|
||||
An array of features with shape (`n_samples`, `n_features`). The number of
|
||||
features in `x_test` must match the number of features in `x_train` provided
|
||||
to `fit`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
np.ndarray
|
||||
An array of predicted probabilities with shape (`n_samples`, `n_classes`),
|
||||
where `n_classes` is the number of columns in `y_train` provided to `fit`.
|
||||
"""
|
||||
assert self.n_features is not None
|
||||
assert isinstance(x_test, np.ndarray)
|
||||
assert len(x_test.shape) == 2
|
||||
(n_samples, n_features_x) = x_test.shape
|
||||
assert n_features_x == self.n_features
|
||||
return np.ndarray([])
|
||||
|
||||
|
||||
class Regressor(ABC):
|
||||
@abstractmethod
|
||||
def fit(self, x_train, y_train):
|
||||
pass
|
||||
"""
|
||||
A Regressor tries to predict the values of some continous variables, given the
|
||||
values of other variables.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.n_inputs: Optional[int] = None
|
||||
|
||||
@abstractmethod
|
||||
def predict(self):
|
||||
pass
|
||||
def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
|
||||
"""
|
||||
Trains the regressor.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x_train: np.ndarray
|
||||
An array of inputs with shape (`n_samples`, `n_inputs`). Each entry must be
|
||||
a float.
|
||||
y_train: np.ndarray
|
||||
An array of outputs with shape (`n_samples`, `n_outputs`). Each entry must
|
||||
be a float.
|
||||
"""
|
||||
assert isinstance(x_train, np.ndarray)
|
||||
assert isinstance(y_train, np.ndarray)
|
||||
assert x_train.dtype in [np.float16, np.float32, np.float64]
|
||||
assert y_train.dtype in [np.float16, np.float32, np.float64]
|
||||
assert len(x_train.shape) == 2
|
||||
assert len(y_train.shape) == 2
|
||||
(n_samples_x, n_inputs) = x_train.shape
|
||||
(n_samples_y, n_outputs) = y_train.shape
|
||||
assert n_samples_y == n_samples_x
|
||||
self.n_inputs = n_inputs
|
||||
|
||||
@abstractmethod
|
||||
def predict(self, x_test: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Predicts the values of the output variables. Must be called after fit.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x_test: np.ndarray
|
||||
An array of inputs with shape (`n_samples`, `n_inputs`), where `n_inputs`
|
||||
must match the number of columns in `x_train` provided to `fit`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
np.ndarray
|
||||
An array of outputs with shape (`n_samples`, `n_outputs`), where
|
||||
`n_outputs` is the number of columns in `y_train` provided to `fit`.
|
||||
"""
|
||||
assert self.n_inputs is not None
|
||||
assert isinstance(x_test, np.ndarray)
|
||||
assert len(x_test.shape) == 2
|
||||
(n_samples, n_inputs_x) = x_test.shape
|
||||
assert n_inputs_x == self.n_inputs
|
||||
return np.ndarray([])
|
||||
|
||||
@@ -37,6 +37,7 @@ class CrossValidatedClassifier(Classifier):
|
||||
cv=5,
|
||||
scoring="accuracy",
|
||||
):
|
||||
super().__init__()
|
||||
self.classifier = None
|
||||
self.classifier_prototype = classifier
|
||||
self.constant = constant
|
||||
@@ -45,6 +46,8 @@ class CrossValidatedClassifier(Classifier):
|
||||
self.scoring = scoring
|
||||
|
||||
def fit(self, x_train, y_train):
|
||||
# super().fit(x_train, y_train)
|
||||
|
||||
# Calculate dummy score and absolute score threshold
|
||||
y_train_avg = np.average(y_train)
|
||||
dummy_score = max(y_train_avg, 1 - y_train_avg)
|
||||
@@ -83,4 +86,5 @@ class CrossValidatedClassifier(Classifier):
|
||||
self.classifier.fit(x_train, y_train)
|
||||
|
||||
def predict_proba(self, x_test):
|
||||
# super().predict_proba(x_test)
|
||||
return self.classifier.predict_proba(x_test)
|
||||
|
||||
@@ -35,7 +35,9 @@ def test_cv():
|
||||
cv=30,
|
||||
)
|
||||
clf.fit(x_train, y_train)
|
||||
assert norm(np.zeros(n_samples) - clf.predict(x_train)) < E
|
||||
proba = clf.predict_proba(x_train)
|
||||
y_pred = (proba[:, 1] > 0.5).astype(float)
|
||||
assert norm(np.zeros(n_samples) - y_pred) < E
|
||||
|
||||
# Support vector machines with quadratic kernels perform almost perfectly
|
||||
# on this data set, so predictor should return their prediction.
|
||||
@@ -45,5 +47,6 @@ def test_cv():
|
||||
cv=30,
|
||||
)
|
||||
clf.fit(x_train, y_train)
|
||||
print(y_train - clf.predict(x_train))
|
||||
assert norm(y_train - clf.predict(x_train)) < E
|
||||
proba = clf.predict_proba(x_train)
|
||||
y_pred = (proba[:, 1] > 0.5).astype(float)
|
||||
assert norm(y_train - y_pred) < E
|
||||
|
||||
Reference in New Issue
Block a user