diff --git a/Makefile b/Makefile
index 0927569..0968210 100644
--- a/Makefile
+++ b/Makefile
@@ -20,10 +20,11 @@ dist-upload:
 	$(PYTHON) -m twine upload dist/*
 
 docs:
+	rm -rf ../docs/$(VERSION) html
 	mkdocs build -d ../docs/$(VERSION)/
+	pdoc3 --html miplearn
+	mv -v html ../docs/$(VERSION)/api
 
-docs-dev:
-	mkdocs build -d ../docs/dev/
 
 install-deps:
 	$(PIP) install -i https://pypi.gurobi.com gurobipy
diff --git a/docs/benchmark.md b/docs/benchmark.md
deleted file mode 100644
index c8a0aef..0000000
--- a/docs/benchmark.md
+++ /dev/null
@@ -1,61 +0,0 @@
-# Benchmarks Utilities
-
-
-### Using `BenchmarkRunner`
-
-MIPLearn provides the utility class `BenchmarkRunner`, which simplifies the task of comparing the performance of different solvers. The snippet below shows its basic usage:
-
-```python
-from miplearn import BenchmarkRunner, LearningSolver
-
-# Create train and test instances
-train_instances = [...]
-test_instances  = [...]
-
-# Training phase...
-training_solver = LearningSolver(...)
-training_solver.parallel_solve(train_instances, n_jobs=10)
-
-# Test phase...
-test_solvers = {
-    "Baseline": LearningSolver(...), # each solver may have different parameters
-    "Strategy A": LearningSolver(...), 
-    "Strategy B": LearningSolver(...),
-    "Strategy C": LearningSolver(...),
-}
-benchmark = BenchmarkRunner(test_solvers)
-benchmark.fit(train_instances)
-benchmark.parallel_solve(test_instances, n_jobs=2)
-print(benchmark.raw_results())
-```
-
-The method `fit` trains the ML models for each individual solver. The method `parallel_solve` solves the test instances in parallel, and collects solver statistics such as running time and optimal value. Finally, `raw_results` produces a table of results (Pandas DataFrame) with the following columns:
-
-* **Solver,** the name of the solver.
-* **Instance,** the sequence number identifying the instance.
-* **Wallclock Time,** the wallclock running time (in seconds) spent by the solver;
-* **Lower Bound,** the best lower bound obtained by the solver;
-* **Upper Bound,** the best upper bound obtained by the solver;
-* **Gap,** the relative MIP integrality gap at the end of the optimization;
-* **Nodes,** the number of explored branch-and-bound nodes.
-
-In addition to the above, there is also a "Relative" version of most columns, where the raw number is compared to the solver which provided the best performance. The *Relative Wallclock Time* for example, indicates how many times slower this run was when compared to the best time achieved by any solver when processing this instance. For example, if this run took 10 seconds, but the fastest solver took only 5 seconds to solve the same instance, the relative wallclock time would be 2.
-
-
-### Saving and loading benchmark results
-
-When iteratively exploring new formulations, encoding and solver parameters, it is often desirable to avoid repeating parts of the benchmark suite. For example, if the baseline solver has not been changed, there is no need to evaluate its performance again and again when making small changes to the remaining solvers. `BenchmarkRunner` provides the methods `save_results` and `load_results`, which can be used to avoid this repetition, as the next example shows:
-
-```python
-# Benchmark baseline solvers and save results to a file.
-benchmark = BenchmarkRunner(baseline_solvers)
-benchmark.parallel_solve(test_instances)
-benchmark.save_results("baseline_results.csv")
-
-# Benchmark remaining solvers, loading baseline results from file.
-benchmark = BenchmarkRunner(alternative_solvers)
-benchmark.load_results("baseline_results.csv")
-benchmark.fit(training_instances)
-benchmark.parallel_solve(test_instances)
-```
-
diff --git a/docs/usage.md b/docs/usage.md
index 4d17d48..04c6426 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -2,12 +2,12 @@
 
 ## 1. Installation
 
-In these docs, we describe the Python/Pyomo version of the package, although a [Julia/JuMP version](https://github.com/ANL-CEEESA/MIPLearn.jl) is also available. A mixed-integer solver is also required and its Python bindings must be properly installed. Supported solvers are currently CPLEX and Gurobi.
+In these docs, we describe the Python/Pyomo version of the package, although a [Julia/JuMP version](https://github.com/ANL-CEEESA/MIPLearn.jl) is also available. A mixed-integer solver is also required and its Python bindings must be properly installed. Supported solvers are currently CPLEX, Gurobi and XPRESS.
 
 To install MIPLearn, run: 
 
 ```bash
-pip3 install miplearn
+pip3 install --upgrade miplearn==0.2.*
 ```
 
 After installation, the package `miplearn` should become available to Python. It can be imported
@@ -176,11 +176,12 @@ Instance files must be pickled instance objects. The method `solve` loads at mos
 
 
 ```python
+import pickle
 from miplearn import LearningSolver
 
 # Construct and pickle 600 problem instances
 for i in range(600):
-    instance = CustomInstance([...])
+    instance = MyProblemInstance([...])
     with open("instance_%03d.pkl" % i, "w") as file:
         pickle.dump(instance, obj)
         
@@ -202,22 +203,50 @@ solver.parallel_solve(test_instances, n_jobs=4)
 ```
 
 
-By default, `solve` and `parallel_solve` modify files in place. That is, after the instances are loaded from disk and solved, MIPLearn writes them back to the disk, overwriting the original files. To write to an alternative file instead, the argument `output` may be used. In `solve`, this argument should be a single filename. In `parallel_solve`, it should be a list, containing exactly as many filenames as instances. If `output` is `None`, the modifications are simply discarded. This can be useful, for example, during benchmarks.
+By default, `solve` and `parallel_solve` modify files in place. That is, after the instances are loaded from disk and solved, MIPLearn writes them back to the disk, overwriting the original files. To write to an alternative file instead, use the arguments `output_filename` (in `solve`) and `output_filenames` (in `parallel_solve`). To discard the modifications instead, use `discard_outputs=True`. This can be useful, for example, during benchmarks.
 
 ```python
-# Solve a single instance file and store the output to another file
-solver.solve("knapsack_1.orig.pkl", output="knapsack_1.solved.pkl")
+# Solve a single instance file and write the output to another file
+solver.solve("knapsack_1.orig.pkl", output_filename="knapsack_1.solved.pkl")
 
 # Solve a list of instance files
 instances = ["knapsack_%03d.orig.pkl" % i for i in range(100)]
 output = ["knapsack_%03d.solved.pkl" % i for i in range(100)]
-solver.parallel_solve(instances, output=output)
+solver.parallel_solve(instances, output_filenames=output)
 
 # Solve instances and discard solutions and training data
-solver.parallel_solve(instances, output=None)
+solver.parallel_solve(instances, discard_outputs=True)
+```
+
+## 7. Running benchmarks
+
+MIPLearn provides the utility class `BenchmarkRunner`, which simplifies the task of comparing the performance of different solvers. The snippet below shows its basic usage:
+
+```python
+from miplearn import BenchmarkRunner, LearningSolver
+
+# Create train and test instances
+train_instances = [...]
+test_instances  = [...]
+
+# Training phase...
+training_solver = LearningSolver(...)
+training_solver.parallel_solve(train_instances, n_jobs=10)
+
+# Test phase...
+benchmark = BenchmarkRunner({
+    "Baseline": LearningSolver(...),
+    "Strategy A": LearningSolver(...),
+    "Strategy B": LearningSolver(...),
+    "Strategy C": LearningSolver(...),
+})
+benchmark.fit(train_instances)
+benchmark.parallel_solve(test_instances, n_jobs=5)
+benchmark.write_csv("results.csv")
 ```
 
+The method `fit` trains the ML models for each individual solver. The method `parallel_solve` solves the test instances in parallel, and collects solver statistics such as running time and optimal value. Finally, `write_csv` produces a table of results. The columns in the CSV file depend on the components added to the solver.
 
-## 7. Current Limitations
+## 8. Current Limitations
 
-* Only binary and continuous decision variables are currently supported. General integer variables are not currently supported by all solver components.
+* Only binary and continuous decision variables are currently supported. General integer variables are not currently supported by some solver components.
diff --git a/miplearn/benchmark.py b/miplearn/benchmark.py
index 6303278..acbe895 100644
--- a/miplearn/benchmark.py
+++ b/miplearn/benchmark.py
@@ -4,40 +4,71 @@
 
 import logging
 import os
-from copy import deepcopy
+from typing import Dict, Union, List
 
 import pandas as pd
-from tqdm.auto import tqdm
 
+from miplearn.instance import Instance
 from miplearn.solvers.learning import LearningSolver
+from miplearn.types import LearningSolveStats
 
 
 class BenchmarkRunner:
-    def __init__(self, solvers):
-        assert isinstance(solvers, dict)
-        for solver in solvers.values():
-            assert isinstance(solver, LearningSolver)
-        self.solvers = solvers
-        self.results = None
-
-    def solve(self, instances, tee=False):
-        for (solver_name, solver) in self.solvers.items():
-            for i in tqdm(range(len((instances)))):
-                results = solver.solve(deepcopy(instances[i]), tee=tee)
-                self._push_result(
-                    results,
-                    solver=solver,
-                    solver_name=solver_name,
-                    instance=i,
-                )
+    """
+    Utility class that simplifies the task of comparing the performance of different
+    solvers.
+
+    Example
+    -------
+    ```python
+    benchmark = BenchmarkRunner({
+        "Baseline": LearningSolver(...),
+        "Strategy A": LearningSolver(...),
+        "Strategy B": LearningSolver(...),
+        "Strategy C": LearningSolver(...),
+    })
+    benchmark.fit(train_instances)
+    benchmark.parallel_solve(test_instances, n_jobs=5)
+    benchmark.save_results("result.csv")
+    ```
+
+    Parameters
+    ----------
+    solvers: Dict[str, LearningSolver]
+        Dictionary containing the solvers to compare. Solvers may have different
+        arguments and components. The key should be the name of the solver. It
+        appears in the exported tables of results.
+    """
+
+    def __init__(self, solvers: Dict[str, LearningSolver]) -> None:
+        self.solvers: Dict[str, LearningSolver] = solvers
+        self.results = pd.DataFrame(
+            columns=[
+                "Solver",
+                "Instance",
+            ]
+        )
 
     def parallel_solve(
         self,
-        instances,
-        n_jobs=1,
-        n_trials=1,
-        index_offset=0,
-    ):
+        instances: Union[List[str], List[Instance]],
+        n_jobs: int = 1,
+        n_trials: int = 3,
+    ) -> None:
+        """
+        Solves the given instances in parallel and collect benchmark statistics.
+
+        Parameters
+        ----------
+        instances: Union[List[str], List[Instance]]
+            List of instances to solve. This can either be a list of instances
+            already loaded in memory, or a list of filenames pointing to pickled (and
+            optionally gzipped) files.
+        n_jobs: int
+            List of instances to solve in parallel at a time.
+        n_trials: int
+            How many times each instance should be solved.
+        """
         self._silence_miplearn_logger()
         trials = instances * n_trials
         for (solver_name, solver) in self.solvers.items():
@@ -48,68 +79,44 @@ class BenchmarkRunner:
                 discard_outputs=True,
             )
             for i in range(len(trials)):
-                idx = (i % len(instances)) + index_offset
-                self._push_result(
-                    results[i],
-                    solver=solver,
-                    solver_name=solver_name,
-                    instance=idx,
-                )
+                idx = i % len(instances)
+                results[i]["Solver"] = solver_name
+                results[i]["Instance"] = idx
+                self.results = self.results.append(pd.DataFrame([results[i]]))
         self._restore_miplearn_logger()
 
-    def raw_results(self):
-        return self.results
+    def write_csv(self, filename: str) -> None:
+        """
+        Writes the collected results to a CSV file.
 
-    def save_results(self, filename):
+        Parameters
+        ----------
+        filename: str
+            The name of the file.
+        """
         os.makedirs(os.path.dirname(filename), exist_ok=True)
         self.results.to_csv(filename)
 
-    def load_results(self, filename):
-        self.results = pd.concat([self.results, pd.read_csv(filename, index_col=0)])
+    def fit(self, instances: Union[List[str], List[Instance]]) -> None:
+        """
+        Trains all solvers with the provided training instances.
 
-    def load_state(self, filename):
-        for (solver_name, solver) in self.solvers.items():
-            solver.load_state(filename)
+        Parameters
+        ----------
+        instances:  Union[List[str], List[Instance]]
+            List of training instances. This can either be a list of instances
+            already loaded in memory, or a list of filenames pointing to pickled (and
+            optionally gzipped) files.
 
-    def fit(self, training_instances):
+        """
         for (solver_name, solver) in self.solvers.items():
-            solver.fit(training_instances)
-
-    @staticmethod
-    def _compute_gap(ub, lb):
-        if lb is None or ub is None or lb * ub < 0:
-            # solver did not find a solution and/or bound, use maximum gap possible
-            return 1.0
-        elif abs(ub - lb) < 1e-6:
-            # avoid division by zero when ub = lb = 0
-            return 0.0
-        else:
-            # divide by max(abs(ub),abs(lb)) to ensure gap <= 1
-            return (ub - lb) / max(abs(ub), abs(lb))
-
-    def _push_result(self, result, solver, solver_name, instance):
-        if self.results is None:
-            self.results = pd.DataFrame(
-                # Show the following columns first in the CSV file
-                columns=[
-                    "Solver",
-                    "Instance",
-                ]
-            )
-        result["Solver"] = solver_name
-        result["Instance"] = instance
-        result["Gap"] = self._compute_gap(
-            ub=result["Upper bound"],
-            lb=result["Lower bound"],
-        )
-        result["Mode"] = solver.mode
-        self.results = self.results.append(pd.DataFrame([result]))
+            solver.fit(instances)
 
-    def _silence_miplearn_logger(self):
+    def _silence_miplearn_logger(self) -> None:
         miplearn_logger = logging.getLogger("miplearn")
         self.prev_log_level = miplearn_logger.getEffectiveLevel()
         miplearn_logger.setLevel(logging.WARNING)
 
-    def _restore_miplearn_logger(self):
+    def _restore_miplearn_logger(self) -> None:
         miplearn_logger = logging.getLogger("miplearn")
         miplearn_logger.setLevel(self.prev_log_level)
diff --git a/miplearn/components/component.py b/miplearn/components/component.py
index a51f936..054916f 100644
--- a/miplearn/components/component.py
+++ b/miplearn/components/component.py
@@ -6,7 +6,7 @@ from abc import ABC, abstractmethod
 from typing import Any, List, Union, TYPE_CHECKING
 
 from miplearn.instance import Instance
-from miplearn.types import MIPSolveStats, TrainingSample
+from miplearn.types import LearningSolveStats, TrainingSample
 
 if TYPE_CHECKING:
     from miplearn.solvers.learning import LearningSolver
@@ -47,7 +47,7 @@ class Component(ABC):
         solver: "LearningSolver",
         instance: Instance,
         model: Any,
-        stats: MIPSolveStats,
+        stats: LearningSolveStats,
         training_data: TrainingSample,
     ) -> None:
         """
@@ -61,13 +61,13 @@ class Component(ABC):
             The instance being solved.
         model: Any
             The concrete optimization model being solved.
-        stats: dict
+        stats: LearningSolveStats
             A dictionary containing statistics about the solution process, such as
             number of nodes explored and running time. Components are free to add
             their own statistics here. For example, PrimalSolutionComponent adds
             statistics regarding the number of predicted variables. All statistics in
             this dictionary are exported to the benchmark CSV file.
-        training_data: dict
+        training_data: TrainingSample
             A dictionary containing data that may be useful for training machine
             learning models and accelerating the solution process. Components are
             free to add their own training data here. For example,
diff --git a/miplearn/solvers/learning.py b/miplearn/solvers/learning.py
index ecdc511..754bbd5 100644
--- a/miplearn/solvers/learning.py
+++ b/miplearn/solvers/learning.py
@@ -20,7 +20,7 @@ from miplearn.instance import Instance
 from miplearn.solvers import _RedirectOutput
 from miplearn.solvers.internal import InternalSolver
 from miplearn.solvers.pyomo.gurobi import GurobiPyomoSolver
-from miplearn.types import MIPSolveStats, TrainingSample
+from miplearn.types import MIPSolveStats, TrainingSample, LearningSolveStats
 
 logger = logging.getLogger(__name__)
 
@@ -127,7 +127,7 @@ class LearningSolver:
         output_filename: Optional[str] = None,
         discard_output: bool = False,
         tee: bool = False,
-    ) -> MIPSolveStats:
+    ) -> LearningSolveStats:
 
         # Load instance from file, if necessary
         filename = None
@@ -203,15 +203,24 @@ class LearningSolver:
 
         # Solve MILP
         logger.info("Solving MILP...")
-        stats = self.internal_solver.solve(
-            tee=tee,
-            iteration_cb=iteration_cb_wrapper,
-            lazy_cb=lazy_cb,
+        stats = cast(
+            LearningSolveStats,
+            self.internal_solver.solve(
+                tee=tee,
+                iteration_cb=iteration_cb_wrapper,
+                lazy_cb=lazy_cb,
+            ),
         )
         if "LP value" in training_sample.keys():
             stats["LP value"] = training_sample["LP value"]
+        stats["Solver"] = "default"
+        stats["Gap"] = self._compute_gap(
+            ub=stats["Upper bound"],
+            lb=stats["Lower bound"],
+        )
+        stats["Mode"] = self.mode
 
-        # Read MIP solution and bounds
+        # Add some information to training_sample
         training_sample["Lower bound"] = stats["Lower bound"]
         training_sample["Upper bound"] = stats["Upper bound"]
         training_sample["MIP log"] = stats["Log"]
@@ -242,7 +251,7 @@ class LearningSolver:
         output_filename: Optional[str] = None,
         discard_output: bool = False,
         tee: bool = False,
-    ) -> MIPSolveStats:
+    ) -> LearningSolveStats:
         """
         Solves the given instance. If trained machine-learning models are
         available, they will be used to accelerate the solution process.
@@ -275,7 +284,7 @@ class LearningSolver:
 
         Returns
         -------
-        MIPSolveStats
+        LearningSolveStats
             A dictionary of solver statistics containing at least the following
             keys: "Lower bound", "Upper bound", "Wallclock time", "Nodes",
             "Sense", "Log", "Warm start value" and "LP value".
@@ -311,7 +320,7 @@ class LearningSolver:
         label: str = "Solve",
         output_filenames: Optional[List[str]] = None,
         discard_outputs: bool = False,
-    ) -> List[MIPSolveStats]:
+    ) -> List[LearningSolveStats]:
         """
         Solves multiple instances in parallel.
 
@@ -338,7 +347,7 @@ class LearningSolver:
 
         Returns
         -------
-        List[MIPSolveStats]
+        List[LearningSolveStats]
             List of solver statistics, with one entry for each provided instance.
             The list is the same you would obtain by calling
             `[solver.solve(p) for p in instances]`
@@ -384,3 +393,15 @@ class LearningSolver:
     def __getstate__(self) -> Dict:
         self.internal_solver = None
         return self.__dict__
+
+    @staticmethod
+    def _compute_gap(ub: Optional[float], lb: Optional[float]) -> Optional[float]:
+        if lb is None or ub is None or lb * ub < 0:
+            # solver did not find a solution and/or bound
+            return None
+        elif abs(ub - lb) < 1e-6:
+            # avoid division by zero when ub = lb = 0
+            return 0.0
+        else:
+            # divide by max(abs(ub),abs(lb)) to ensure gap <= 1
+            return (ub - lb) / max(abs(ub), abs(lb))
diff --git a/miplearn/solvers/tests/test_learning_solver.py b/miplearn/solvers/tests/test_learning_solver.py
index f706f49..b5b1838 100644
--- a/miplearn/solvers/tests/test_learning_solver.py
+++ b/miplearn/solvers/tests/test_learning_solver.py
@@ -130,3 +130,13 @@ def test_simulate_perfect():
         )
         stats = solver.solve(tmp.name)
         assert stats["Lower bound"] == stats["Predicted LB"]
+
+
+def test_gap():
+    assert LearningSolver._compute_gap(ub=0.0, lb=0.0) == 0.0
+    assert LearningSolver._compute_gap(ub=1.0, lb=0.5) == 0.5
+    assert LearningSolver._compute_gap(ub=1.0, lb=1.0) == 0.0
+    assert LearningSolver._compute_gap(ub=1.0, lb=-1.0) is None
+    assert LearningSolver._compute_gap(ub=1.0, lb=None) is None
+    assert LearningSolver._compute_gap(ub=None, lb=1.0) is None
+    assert LearningSolver._compute_gap(ub=None, lb=None) is None
diff --git a/miplearn/tests/test_benchmark.py b/miplearn/tests/test_benchmark.py
index 9e8011f..e2950c8 100644
--- a/miplearn/tests/test_benchmark.py
+++ b/miplearn/tests/test_benchmark.py
@@ -29,21 +29,7 @@ def test_benchmark():
     benchmark = BenchmarkRunner(test_solvers)
     benchmark.fit(train_instances)
     benchmark.parallel_solve(test_instances, n_jobs=2, n_trials=2)
-    assert benchmark.raw_results().values.shape == (12, 14)
+    assert benchmark.results.values.shape == (12, 14)
 
-    benchmark.save_results("/tmp/benchmark.csv")
+    benchmark.write_csv("/tmp/benchmark.csv")
     assert os.path.isfile("/tmp/benchmark.csv")
-
-    benchmark = BenchmarkRunner(test_solvers)
-    benchmark.load_results("/tmp/benchmark.csv")
-    assert benchmark.raw_results().values.shape == (12, 14)
-
-
-def test_gap():
-    assert BenchmarkRunner._compute_gap(ub=0.0, lb=0.0) == 0.0
-    assert BenchmarkRunner._compute_gap(ub=1.0, lb=0.5) == 0.5
-    assert BenchmarkRunner._compute_gap(ub=1.0, lb=1.0) == 0.0
-    assert BenchmarkRunner._compute_gap(ub=1.0, lb=-1.0) == 1.0
-    assert BenchmarkRunner._compute_gap(ub=1.0, lb=None) == 1.0
-    assert BenchmarkRunner._compute_gap(ub=None, lb=1.0) == 1.0
-    assert BenchmarkRunner._compute_gap(ub=None, lb=None) == 1.0
diff --git a/miplearn/types.py b/miplearn/types.py
index cd112ba..e7ec102 100644
--- a/miplearn/types.py
+++ b/miplearn/types.py
@@ -47,6 +47,25 @@ MIPSolveStats = TypedDict(
     },
 )
 
+LearningSolveStats = TypedDict(
+    "LearningSolveStats",
+    {
+        "Gap": Optional[float],
+        "Instance": Union[str, int],
+        "LP value": Optional[float],
+        "Log": str,
+        "Lower bound": Optional[float],
+        "Mode": str,
+        "Nodes": Optional[int],
+        "Sense": str,
+        "Solver": str,
+        "Upper bound": Optional[float],
+        "Wallclock time": float,
+        "Warm start value": Optional[float],
+    },
+    total=False,
+)
+
 IterationCallback = Callable[[], bool]
 
 LazyCallback = Callable[[Any, Any], None]
diff --git a/mkdocs.yml b/mkdocs.yml
index 97daca1..df6410f 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -6,10 +6,10 @@ edit_uri: edit/dev/docs/
 nav:
     - Home: index.md
     - Usage: usage.md
-    - Benchmark: benchmark.md
     - Problems: problems.md
     - Customization: customization.md
     - About: about.md
+    - API: api/miplearn/index.html
 plugins:
   - search
 markdown_extensions:
diff --git a/requirements.txt b/requirements.txt
index 910f30e..a22ddab 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -15,3 +15,4 @@ tqdm~=4.54
 black==20.8b1
 pre-commit~=2.9
 mypy==0.790
+pdoc3==0.7.*