diff --git a/Makefile b/Makefile
index 0968210..2ed3f1c 100644
--- a/Makefile
+++ b/Makefile
@@ -42,6 +42,7 @@ reformat:
 
 test:
 	$(MYPY) -p miplearn
+	$(MYPY) -p tests
 	$(PYTEST) $(PYTEST_ARGS)
 
 .PHONY: test test-watch docs install
diff --git a/miplearn/classifiers/__init__.py b/miplearn/classifiers/__init__.py
index a8dc64b..f46c41d 100644
--- a/miplearn/classifiers/__init__.py
+++ b/miplearn/classifiers/__init__.py
@@ -3,31 +3,128 @@
 #  Released under the modified BSD license. See COPYING.md for more details.
 
 from abc import ABC, abstractmethod
+from typing import Optional
 
 import numpy as np
 
 
 class Classifier(ABC):
+    """
+    A Classifier decides which class each sample belongs to, based on historical
+    data.
+    """
+
+    def __init__(self):
+        self.n_features: Optional[int] = None
+        self.n_classes: Optional[int] = None
+
     @abstractmethod
-    def fit(self, x_train, y_train):
-        pass
+    def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
+        """
+        Trains the classifier.
+
+        Parameters
+        ----------
+        x_train: np.ndarray
+            An array of features with shape (`n_samples`, `n_features`). Each entry
+            must be a float.
+        y_train: np.ndarray
+            An array of labels with shape (`n_samples`, `n_classes`). Each entry must be
+            a bool, and there must be exactly one True element in each row.
+        """
+        assert isinstance(x_train, np.ndarray)
+        assert isinstance(y_train, np.ndarray)
+        assert x_train.dtype in [np.float16, np.float32, np.float64]
+        assert y_train.dtype == np.bool8
+        assert len(x_train.shape) == 2
+        assert len(y_train.shape) == 2
+        (n_samples_x, n_features) = x_train.shape
+        (n_samples_y, n_classes) = y_train.shape
+        assert n_samples_y == n_samples_x
+        self.n_features = n_features
+        self.n_classes = n_classes
 
     @abstractmethod
-    def predict_proba(self, x_test):
-        pass
+    def predict_proba(self, x_test: np.ndarray) -> np.ndarray:
+        """
+        Predicts the probability of each sample belonging to each class. Must be called
+        after fit.
 
-    def predict(self, x_test):
-        proba = self.predict_proba(x_test)
-        assert isinstance(proba, np.ndarray)
-        assert proba.shape == (x_test.shape[0], 2)
-        return (proba[:, 1] > 0.5).astype(float)
+        Parameters
+        ----------
+        x_test: np.ndarray
+            An array of features with shape (`n_samples`, `n_features`). The number of
+            features in `x_test` must match the number of features in `x_train` provided
+            to `fit`.
+
+        Returns
+        -------
+        np.ndarray
+            An array of predicted probabilities with shape (`n_samples`, `n_classes`),
+            where `n_classes` is the number of columns in `y_train` provided to `fit`.
+        """
+        assert self.n_features is not None
+        assert isinstance(x_test, np.ndarray)
+        assert len(x_test.shape) == 2
+        (n_samples, n_features_x) = x_test.shape
+        assert n_features_x == self.n_features
+        return np.ndarray([])
 
 
 class Regressor(ABC):
+    """
+    A Regressor tries to predict the values of some continous variables, given the
+    values of other variables.
+    """
+
+    def __init__(self):
+        self.n_inputs: Optional[int] = None
+
     @abstractmethod
-    def fit(self, x_train, y_train):
-        pass
+    def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
+        """
+        Trains the regressor.
+
+        Parameters
+        ----------
+        x_train: np.ndarray
+            An array of inputs with shape (`n_samples`, `n_inputs`). Each entry must be
+            a float.
+        y_train: np.ndarray
+            An array of outputs with shape (`n_samples`, `n_outputs`). Each entry must
+            be a float.
+        """
+        assert isinstance(x_train, np.ndarray)
+        assert isinstance(y_train, np.ndarray)
+        assert x_train.dtype in [np.float16, np.float32, np.float64]
+        assert y_train.dtype in [np.float16, np.float32, np.float64]
+        assert len(x_train.shape) == 2
+        assert len(y_train.shape) == 2
+        (n_samples_x, n_inputs) = x_train.shape
+        (n_samples_y, n_outputs) = y_train.shape
+        assert n_samples_y == n_samples_x
+        self.n_inputs = n_inputs
 
     @abstractmethod
-    def predict(self):
-        pass
+    def predict(self, x_test: np.ndarray) -> np.ndarray:
+        """
+        Predicts the values of the output variables. Must be called after fit.
+
+        Parameters
+        ----------
+        x_test: np.ndarray
+            An array of inputs with shape (`n_samples`, `n_inputs`), where `n_inputs`
+            must match the number of columns in `x_train` provided to `fit`.
+
+        Returns
+        -------
+        np.ndarray
+            An array of outputs  with shape (`n_samples`, `n_outputs`), where
+            `n_outputs` is the number of columns in `y_train` provided to `fit`.
+        """
+        assert self.n_inputs is not None
+        assert isinstance(x_test, np.ndarray)
+        assert len(x_test.shape) == 2
+        (n_samples, n_inputs_x) = x_test.shape
+        assert n_inputs_x == self.n_inputs
+        return np.ndarray([])
diff --git a/miplearn/classifiers/cv.py b/miplearn/classifiers/cv.py
index 3a4cc1f..e8c8b42 100644
--- a/miplearn/classifiers/cv.py
+++ b/miplearn/classifiers/cv.py
@@ -37,6 +37,7 @@ class CrossValidatedClassifier(Classifier):
         cv=5,
         scoring="accuracy",
     ):
+        super().__init__()
         self.classifier = None
         self.classifier_prototype = classifier
         self.constant = constant
@@ -45,6 +46,8 @@ class CrossValidatedClassifier(Classifier):
         self.scoring = scoring
 
     def fit(self, x_train, y_train):
+        # super().fit(x_train, y_train)
+
         # Calculate dummy score and absolute score threshold
         y_train_avg = np.average(y_train)
         dummy_score = max(y_train_avg, 1 - y_train_avg)
@@ -83,4 +86,5 @@ class CrossValidatedClassifier(Classifier):
         self.classifier.fit(x_train, y_train)
 
     def predict_proba(self, x_test):
+        # super().predict_proba(x_test)
         return self.classifier.predict_proba(x_test)
diff --git a/tests/classifiers/test_cv.py b/tests/classifiers/test_cv.py
index 545ce41..fa6baa6 100644
--- a/tests/classifiers/test_cv.py
+++ b/tests/classifiers/test_cv.py
@@ -35,7 +35,9 @@ def test_cv():
         cv=30,
     )
     clf.fit(x_train, y_train)
-    assert norm(np.zeros(n_samples) - clf.predict(x_train)) < E
+    proba = clf.predict_proba(x_train)
+    y_pred = (proba[:, 1] > 0.5).astype(float)
+    assert norm(np.zeros(n_samples) - y_pred) < E
 
     # Support vector machines with quadratic kernels perform almost perfectly
     # on this data set, so predictor should return their prediction.
@@ -45,5 +47,6 @@ def test_cv():
         cv=30,
     )
     clf.fit(x_train, y_train)
-    print(y_train - clf.predict(x_train))
-    assert norm(y_train - clf.predict(x_train)) < E
+    proba = clf.predict_proba(x_train)
+    y_pred = (proba[:, 1] > 0.5).astype(float)
+    assert norm(y_train - y_pred) < E