|
|
@ -3,31 +3,128 @@
|
|
|
|
# Released under the modified BSD license. See COPYING.md for more details.
|
|
|
|
# Released under the modified BSD license. See COPYING.md for more details.
|
|
|
|
|
|
|
|
|
|
|
|
from abc import ABC, abstractmethod
|
|
|
|
from abc import ABC, abstractmethod
|
|
|
|
|
|
|
|
from typing import Optional
|
|
|
|
|
|
|
|
|
|
|
|
import numpy as np
|
|
|
|
import numpy as np
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Classifier(ABC):
|
|
|
|
class Classifier(ABC):
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
A Classifier decides which class each sample belongs to, based on historical
|
|
|
|
|
|
|
|
data.
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
|
|
|
|
self.n_features: Optional[int] = None
|
|
|
|
|
|
|
|
self.n_classes: Optional[int] = None
|
|
|
|
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
@abstractmethod
|
|
|
|
def fit(self, x_train, y_train):
|
|
|
|
def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
|
|
|
|
pass
|
|
|
|
"""
|
|
|
|
|
|
|
|
Trains the classifier.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
|
|
|
----------
|
|
|
|
|
|
|
|
x_train: np.ndarray
|
|
|
|
|
|
|
|
An array of features with shape (`n_samples`, `n_features`). Each entry
|
|
|
|
|
|
|
|
must be a float.
|
|
|
|
|
|
|
|
y_train: np.ndarray
|
|
|
|
|
|
|
|
An array of labels with shape (`n_samples`, `n_classes`). Each entry must be
|
|
|
|
|
|
|
|
a bool, and there must be exactly one True element in each row.
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
assert isinstance(x_train, np.ndarray)
|
|
|
|
|
|
|
|
assert isinstance(y_train, np.ndarray)
|
|
|
|
|
|
|
|
assert x_train.dtype in [np.float16, np.float32, np.float64]
|
|
|
|
|
|
|
|
assert y_train.dtype == np.bool8
|
|
|
|
|
|
|
|
assert len(x_train.shape) == 2
|
|
|
|
|
|
|
|
assert len(y_train.shape) == 2
|
|
|
|
|
|
|
|
(n_samples_x, n_features) = x_train.shape
|
|
|
|
|
|
|
|
(n_samples_y, n_classes) = y_train.shape
|
|
|
|
|
|
|
|
assert n_samples_y == n_samples_x
|
|
|
|
|
|
|
|
self.n_features = n_features
|
|
|
|
|
|
|
|
self.n_classes = n_classes
|
|
|
|
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
@abstractmethod
|
|
|
|
def predict_proba(self, x_test):
|
|
|
|
def predict_proba(self, x_test: np.ndarray) -> np.ndarray:
|
|
|
|
pass
|
|
|
|
"""
|
|
|
|
|
|
|
|
Predicts the probability of each sample belonging to each class. Must be called
|
|
|
|
|
|
|
|
after fit.
|
|
|
|
|
|
|
|
|
|
|
|
def predict(self, x_test):
|
|
|
|
Parameters
|
|
|
|
proba = self.predict_proba(x_test)
|
|
|
|
----------
|
|
|
|
assert isinstance(proba, np.ndarray)
|
|
|
|
x_test: np.ndarray
|
|
|
|
assert proba.shape == (x_test.shape[0], 2)
|
|
|
|
An array of features with shape (`n_samples`, `n_features`). The number of
|
|
|
|
return (proba[:, 1] > 0.5).astype(float)
|
|
|
|
features in `x_test` must match the number of features in `x_train` provided
|
|
|
|
|
|
|
|
to `fit`.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Returns
|
|
|
|
|
|
|
|
-------
|
|
|
|
|
|
|
|
np.ndarray
|
|
|
|
|
|
|
|
An array of predicted probabilities with shape (`n_samples`, `n_classes`),
|
|
|
|
|
|
|
|
where `n_classes` is the number of columns in `y_train` provided to `fit`.
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
assert self.n_features is not None
|
|
|
|
|
|
|
|
assert isinstance(x_test, np.ndarray)
|
|
|
|
|
|
|
|
assert len(x_test.shape) == 2
|
|
|
|
|
|
|
|
(n_samples, n_features_x) = x_test.shape
|
|
|
|
|
|
|
|
assert n_features_x == self.n_features
|
|
|
|
|
|
|
|
return np.ndarray([])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Regressor(ABC):
|
|
|
|
class Regressor(ABC):
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
A Regressor tries to predict the values of some continous variables, given the
|
|
|
|
|
|
|
|
values of other variables.
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
|
|
|
|
self.n_inputs: Optional[int] = None
|
|
|
|
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
@abstractmethod
|
|
|
|
def fit(self, x_train, y_train):
|
|
|
|
def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
|
|
|
|
pass
|
|
|
|
"""
|
|
|
|
|
|
|
|
Trains the regressor.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
|
|
|
----------
|
|
|
|
|
|
|
|
x_train: np.ndarray
|
|
|
|
|
|
|
|
An array of inputs with shape (`n_samples`, `n_inputs`). Each entry must be
|
|
|
|
|
|
|
|
a float.
|
|
|
|
|
|
|
|
y_train: np.ndarray
|
|
|
|
|
|
|
|
An array of outputs with shape (`n_samples`, `n_outputs`). Each entry must
|
|
|
|
|
|
|
|
be a float.
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
assert isinstance(x_train, np.ndarray)
|
|
|
|
|
|
|
|
assert isinstance(y_train, np.ndarray)
|
|
|
|
|
|
|
|
assert x_train.dtype in [np.float16, np.float32, np.float64]
|
|
|
|
|
|
|
|
assert y_train.dtype in [np.float16, np.float32, np.float64]
|
|
|
|
|
|
|
|
assert len(x_train.shape) == 2
|
|
|
|
|
|
|
|
assert len(y_train.shape) == 2
|
|
|
|
|
|
|
|
(n_samples_x, n_inputs) = x_train.shape
|
|
|
|
|
|
|
|
(n_samples_y, n_outputs) = y_train.shape
|
|
|
|
|
|
|
|
assert n_samples_y == n_samples_x
|
|
|
|
|
|
|
|
self.n_inputs = n_inputs
|
|
|
|
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
@abstractmethod
|
|
|
|
def predict(self):
|
|
|
|
def predict(self, x_test: np.ndarray) -> np.ndarray:
|
|
|
|
pass
|
|
|
|
"""
|
|
|
|
|
|
|
|
Predicts the values of the output variables. Must be called after fit.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
|
|
|
----------
|
|
|
|
|
|
|
|
x_test: np.ndarray
|
|
|
|
|
|
|
|
An array of inputs with shape (`n_samples`, `n_inputs`), where `n_inputs`
|
|
|
|
|
|
|
|
must match the number of columns in `x_train` provided to `fit`.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Returns
|
|
|
|
|
|
|
|
-------
|
|
|
|
|
|
|
|
np.ndarray
|
|
|
|
|
|
|
|
An array of outputs with shape (`n_samples`, `n_outputs`), where
|
|
|
|
|
|
|
|
`n_outputs` is the number of columns in `y_train` provided to `fit`.
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
assert self.n_inputs is not None
|
|
|
|
|
|
|
|
assert isinstance(x_test, np.ndarray)
|
|
|
|
|
|
|
|
assert len(x_test.shape) == 2
|
|
|
|
|
|
|
|
(n_samples, n_inputs_x) = x_test.shape
|
|
|
|
|
|
|
|
assert n_inputs_x == self.n_inputs
|
|
|
|
|
|
|
|
return np.ndarray([])
|
|
|
|