MIPLearn v0.3

2025-12-06 01:18:52 -06:00 · 2023-06-08 11:25:39 -05:00
parent 6cc253a903
commit 1ea989d48a
172 changed files with 10495 additions and 24812 deletions
--- a/miplearn/init.py
+++ b/miplearn/init.py
@@ -1,31 +1,3 @@
 #  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
+#  Copyright (C) 2020-2022, UChicago Argonne, LLC. All rights reserved.
 #  Released under the modified BSD license. See COPYING.md for more details.
-
-from .benchmark import BenchmarkRunner
-from .classifiers import Classifier, Regressor
-from .classifiers.adaptive import AdaptiveClassifier
-from .classifiers.sklearn import ScikitLearnRegressor, ScikitLearnClassifier
-from .classifiers.threshold import MinPrecisionThreshold
-from .components.component import Component
-from .components.dynamic_lazy import DynamicLazyConstraintsComponent
-from .components.dynamic_user_cuts import UserCutsComponent
-from .components.objective import ObjectiveValueComponent
-from .components.primal import PrimalSolutionComponent
-from .components.static_lazy import StaticLazyConstraintsComponent
-from .instance.base import Instance
-from .instance.picklegz import (
-    PickleGzInstance,
-    write_pickle_gz,
-    read_pickle_gz,
-    write_pickle_gz_multiple,
-    save,
-    load,
-)
-from .log import setup_logger
-from .solvers.gurobi import GurobiSolver
-from .solvers.internal import InternalSolver
-from .solvers.learning import LearningSolver
-from .solvers.pyomo.base import BasePyomoSolver
-from .solvers.pyomo.cplex import CplexPyomoSolver
-from .solvers.pyomo.gurobi import GurobiPyomoSolver
--- a/miplearn/benchmark.py
+++ b/miplearn/benchmark.py
@@ -1,264 +0,0 @@
-#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
-#  Released under the modified BSD license. See COPYING.md for more details.
-
-import logging
-import os
-from typing import Dict, List, Any, Optional, Callable
-
-import pandas as pd
-
-from miplearn.components.component import Component
-from miplearn.instance.base import Instance
-from miplearn.solvers.learning import LearningSolver, FileInstanceWrapper
-from miplearn.solvers.pyomo.gurobi import GurobiPyomoSolver
-from sklearn.utils._testing import ignore_warnings
-from sklearn.exceptions import ConvergenceWarning
-
-
-logger = logging.getLogger(__name__)
-
-
-class BenchmarkRunner:
-    """
-    Utility class that simplifies the task of comparing the performance of different
-    solvers.
-
-    Parameters
-    ----------
-    solvers: Dict[str, LearningSolver]
-        Dictionary containing the solvers to compare. Solvers may have different
-        arguments and components. The key should be the name of the solver. It
-        appears in the exported tables of results.
-    """
-
-    def __init__(self, solvers: Dict[str, LearningSolver]) -> None:
-        self.solvers: Dict[str, LearningSolver] = solvers
-        self.results = pd.DataFrame(
-            columns=[
-                "Solver",
-                "Instance",
-            ]
-        )
-
-    def parallel_solve(
-        self,
-        filenames: List[str],
-        build_model: Callable,
-        n_jobs: int = 1,
-        n_trials: int = 1,
-        progress: bool = False,
-    ) -> None:
-        self._silence_miplearn_logger()
-        trials = filenames * n_trials
-        for (solver_name, solver) in self.solvers.items():
-            results = solver.parallel_solve(
-                trials,
-                build_model,
-                n_jobs=n_jobs,
-                label="benchmark (%s)" % solver_name,
-                progress=progress,
-            )
-            for i in range(len(trials)):
-                idx = i % len(filenames)
-                results[i]["Solver"] = solver_name
-                results[i]["Instance"] = idx
-                self.results = self.results.append(pd.DataFrame([results[i]]))
-        self._restore_miplearn_logger()
-
-    def write_csv(self, filename: str) -> None:
-        """
-        Writes the collected results to a CSV file.
-
-        Parameters
-        ----------
-        filename: str
-            The name of the file.
-        """
-        os.makedirs(os.path.dirname(filename), exist_ok=True)
-        self.results.to_csv(filename)
-
-    def fit(
-        self,
-        filenames: List[str],
-        build_model: Callable,
-        progress: bool = False,
-        n_jobs: int = 1,
-    ) -> None:
-        components = []
-        instances: List[Instance] = [
-            FileInstanceWrapper(f, build_model, mode="r") for f in filenames
-        ]
-        for (solver_name, solver) in self.solvers.items():
-            if solver_name == "baseline":
-                continue
-            components += solver.components.values()
-        Component.fit_multiple(
-            components,
-            instances,
-            n_jobs=n_jobs,
-            progress=progress,
-        )
-
-    def _silence_miplearn_logger(self) -> None:
-        miplearn_logger = logging.getLogger("miplearn")
-        self.prev_log_level = miplearn_logger.getEffectiveLevel()
-        miplearn_logger.setLevel(logging.WARNING)
-
-    def _restore_miplearn_logger(self) -> None:
-        miplearn_logger = logging.getLogger("miplearn")
-        miplearn_logger.setLevel(self.prev_log_level)
-
-    def write_svg(
-        self,
-        output: Optional[str] = None,
-    ) -> None:
-        import matplotlib.pyplot as plt
-        import pandas as pd
-        import seaborn as sns
-
-        sns.set_style("whitegrid")
-        sns.set_palette("Blues_r")
-        groups = self.results.groupby("Instance")
-        best_lower_bound = groups["mip_lower_bound"].transform("max")
-        best_upper_bound = groups["mip_upper_bound"].transform("min")
-        self.results["Relative lower bound"] = self.results["mip_lower_bound"] / best_lower_bound
-        self.results["Relative upper bound"] = self.results["mip_upper_bound"] / best_upper_bound
-
-        if (self.results["mip_sense"] == "min").any():
-            primal_column = "Relative upper bound"
-            obj_column = "mip_upper_bound"
-            predicted_obj_column = "Objective: Predicted upper bound"
-        else:
-            primal_column = "Relative lower bound"
-            obj_column = "mip_lower_bound"
-            predicted_obj_column = "Objective: Predicted lower bound"
-
-        palette = {
-            "baseline": "#9b59b6",
-            "ml-exact": "#3498db",
-            "ml-heuristic": "#95a5a6",
-        }
-        fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(
-            nrows=2,
-            ncols=2,
-            figsize=(8, 8),
-        )
-
-        # Wallclock time
-        sns.stripplot(
-            x="Solver",
-            y="mip_wallclock_time",
-            data=self.results,
-            ax=ax1,
-            jitter=0.25,
-            palette=palette,
-            size=2.0,
-        )
-        sns.barplot(
-            x="Solver",
-            y="mip_wallclock_time",
-            data=self.results,
-            ax=ax1,
-            errwidth=0.0,
-            alpha=0.4,
-            palette=palette,
-        )
-        ax1.set(ylabel="Wallclock time (s)")
-
-        # Gap
-        sns.stripplot(
-            x="Solver",
-            y="Gap",
-            jitter=0.25,
-            data=self.results[self.results["Solver"] != "ml-heuristic"],
-            ax=ax2,
-            palette=palette,
-            size=2.0,
-        )
-        ax2.set(ylabel="Relative MIP gap")
-
-        # Relative primal bound
-        sns.stripplot(
-            x="Solver",
-            y=primal_column,
-            jitter=0.25,
-            data=self.results[self.results["Solver"] == "ml-heuristic"],
-            ax=ax3,
-            palette=palette,
-            size=2.0,
-        )
-        sns.scatterplot(
-            x=obj_column,
-            y=predicted_obj_column,
-            hue="Solver",
-            data=self.results[self.results["Solver"] == "ml-exact"],
-            ax=ax4,
-            palette=palette,
-            size=2.0,
-        )
-
-        # Predicted vs actual primal bound
-        xlim, ylim = ax4.get_xlim(), ax4.get_ylim()
-        ax4.plot(
-            [-1e10, 1e10],
-            [-1e10, 1e10],
-            ls="-",
-            color="#cccccc",
-        )
-        ax4.set_xlim(xlim)
-        ax4.set_ylim(xlim)
-        ax4.get_legend().remove()
-        ax4.set(
-            ylabel="Predicted optimal value",
-            xlabel="Actual optimal value",
-        )
-
-        fig.tight_layout()
-        plt.savefig(output)
-
-
-@ignore_warnings(category=ConvergenceWarning)
-def run_benchmarks(
-    train_instances: List[Instance],
-    test_instances: List[Instance],
-    n_jobs: int = 4,
-    n_trials: int = 1,
-    progress: bool = False,
-    solver: Any = None,
-) -> None:
-    if solver is None:
-        solver = GurobiPyomoSolver()
-    benchmark = BenchmarkRunner(
-        solvers={
-            "baseline": LearningSolver(
-                solver=solver.clone(),
-            ),
-            "ml-exact": LearningSolver(
-                solver=solver.clone(),
-            ),
-            "ml-heuristic": LearningSolver(
-                solver=solver.clone(),
-                mode="heuristic",
-            ),
-        }
-    )
-    benchmark.solvers["baseline"].parallel_solve(
-        train_instances,
-        n_jobs=n_jobs,
-        progress=progress,
-    )
-    benchmark.fit(
-        train_instances,
-        n_jobs=n_jobs,
-        progress=progress,
-    )
-    benchmark.parallel_solve(
-        test_instances,
-        n_jobs=n_jobs,
-        n_trials=n_trials,
-        progress=progress,
-    )
-    plot(benchmark.results)
-
-
--- a/miplearn/classifiers/init.py
+++ b/miplearn/classifiers/init.py
@@ -1,163 +1,3 @@
 #  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
+#  Copyright (C) 2020-2022, UChicago Argonne, LLC. All rights reserved.
 #  Released under the modified BSD license. See COPYING.md for more details.
-
-from abc import ABC, abstractmethod
-from typing import Optional
-
-import numpy as np
-
-
-class Classifier(ABC):
-    """
-    A Classifier decides which class each sample belongs to, based on historical
-    data.
-    """
-
-    def __init__(self) -> None:
-        self.n_features: Optional[int] = None
-        self.n_classes: Optional[int] = None
-
-    @abstractmethod
-    def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
-        """
-        Trains the classifier.
-
-        Parameters
-        ----------
-        x_train: np.ndarray
-            An array of features with shape (`n_samples`, `n_features`). Each entry
-            must be a float.
-        y_train: np.ndarray
-            An array of labels with shape (`n_samples`, `n_classes`). Each entry must be
-            a bool, and there must be exactly one True element in each row.
-        """
-        assert isinstance(x_train, np.ndarray)
-        assert isinstance(y_train, np.ndarray)
-        assert x_train.dtype in [
-            np.float16,
-            np.float32,
-            np.float64,
-        ], f"x_train.dtype should be float. Found {x_train.dtype} instead."
-        assert y_train.dtype == np.bool8
-        assert len(x_train.shape) == 2
-        assert len(y_train.shape) == 2
-        (n_samples_x, n_features) = x_train.shape
-        (n_samples_y, n_classes) = y_train.shape
-        assert n_samples_y == n_samples_x
-        self.n_features = n_features
-        self.n_classes = n_classes
-
-    @abstractmethod
-    def predict_proba(self, x_test: np.ndarray) -> np.ndarray:
-        """
-        Predicts the probability of each sample belonging to each class. Must be called
-        after fit.
-
-        Parameters
-        ----------
-        x_test: np.ndarray
-            An array of features with shape (`n_samples`, `n_features`). The number of
-            features in `x_test` must match the number of features in `x_train` provided
-            to `fit`.
-
-        Returns
-        -------
-        np.ndarray
-            An array of predicted probabilities with shape (`n_samples`, `n_classes`),
-            where `n_classes` is the number of columns in `y_train` provided to `fit`.
-        """
-        assert self.n_features is not None
-        assert isinstance(x_test, np.ndarray)
-        assert len(x_test.shape) == 2
-        (n_samples, n_features_x) = x_test.shape
-        assert n_features_x == self.n_features, (
-            f"Test and training data have different number of "
-            f"features: {n_features_x} != {self.n_features}"
-        )
-        return np.ndarray([])
-
-    @abstractmethod
-    def clone(self) -> "Classifier":
-        """
-        Returns an unfitted copy of this classifier with the same hyperparameters.
-        """
-        pass
-
-
-class Regressor(ABC):
-    """
-    A Regressor tries to predict the values of some continous variables, given the
-    values of other variables.
-    """
-
-    def __init__(self) -> None:
-        self.n_inputs: Optional[int] = None
-
-    @abstractmethod
-    def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
-        """
-        Trains the regressor.
-
-        Parameters
-        ----------
-        x_train: np.ndarray
-            An array of inputs with shape (`n_samples`, `n_inputs`). Each entry must be
-            a float.
-        y_train: np.ndarray
-            An array of outputs with shape (`n_samples`, `n_outputs`). Each entry must
-            be a float.
-        """
-        assert isinstance(x_train, np.ndarray)
-        assert isinstance(y_train, np.ndarray)
-        assert x_train.dtype in [np.float16, np.float32, np.float64]
-        assert y_train.dtype in [np.float16, np.float32, np.float64]
-        assert len(x_train.shape) == 2, (
-            f"Parameter x_train should be a square matrix. "
-            f"Found {x_train.shape} ndarray instead."
-        )
-        assert len(y_train.shape) == 2, (
-            f"Parameter y_train should be a square matrix. "
-            f"Found {y_train.shape} ndarray instead."
-        )
-        (n_samples_x, n_inputs) = x_train.shape
-        (n_samples_y, n_outputs) = y_train.shape
-        assert n_samples_y == n_samples_x
-        self.n_inputs = n_inputs
-
-    @abstractmethod
-    def predict(self, x_test: np.ndarray) -> np.ndarray:
-        """
-        Predicts the values of the output variables. Must be called after fit.
-
-        Parameters
-        ----------
-        x_test: np.ndarray
-            An array of inputs with shape (`n_samples`, `n_inputs`), where `n_inputs`
-            must match the number of columns in `x_train` provided to `fit`.
-
-        Returns
-        -------
-        np.ndarray
-            An array of outputs  with shape (`n_samples`, `n_outputs`), where
-            `n_outputs` is the number of columns in `y_train` provided to `fit`.
-        """
-        assert self.n_inputs is not None
-        assert isinstance(x_test, np.ndarray), (
-            f"Parameter x_train must be np.ndarray. "
-            f"Found {x_test.__class__.__name__} instead."
-        )
-        assert len(x_test.shape) == 2
-        (n_samples, n_inputs_x) = x_test.shape
-        assert n_inputs_x == self.n_inputs, (
-            f"Test and training data have different number of "
-            f"inputs: {n_inputs_x} != {self.n_inputs}"
-        )
-        return np.ndarray([])
-
-    @abstractmethod
-    def clone(self) -> "Regressor":
-        """
-        Returns an unfitted copy of this regressor with the same hyperparameters.
-        """
-        pass
--- a/miplearn/classifiers/adaptive.py
+++ b/miplearn/classifiers/adaptive.py
@@ -1,135 +0,0 @@
-#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
-#  Released under the modified BSD license. See COPYING.md for more details.
-
-import logging
-from typing import Dict, Optional
-
-import numpy as np
-from sklearn.ensemble import RandomForestClassifier
-from sklearn.linear_model import LogisticRegression
-from sklearn.metrics import roc_auc_score
-from sklearn.model_selection import cross_val_predict
-from sklearn.neighbors import KNeighborsClassifier
-from sklearn.pipeline import make_pipeline
-from sklearn.preprocessing import StandardScaler
-
-from miplearn.classifiers import Classifier
-from miplearn.classifiers.counting import CountingClassifier
-from miplearn.classifiers.sklearn import ScikitLearnClassifier
-
-logger = logging.getLogger(__name__)
-
-
-class CandidateClassifierSpecs:
-    """
-    Specifications describing how to construct a certain classifier, and under
-    which circumstances it can be used.
-
-    Parameters
-    ----------
-    min_samples: int
-        Minimum number of samples for this classifier to be considered.
-    classifier: Callable[[], Classifier]
-        Callable that constructs the classifier.
-    """
-
-    def __init__(
-        self,
-        classifier: Classifier,
-        min_samples: int = 0,
-    ) -> None:
-        self.min_samples = min_samples
-        self.classifier = classifier
-
-
-class AdaptiveClassifier(Classifier):
-    """
-    A meta-classifier which dynamically selects what actual classifier to use
-    based on its cross-validation score on a particular training data set.
-
-    Parameters
-    ----------
-    candidates: Dict[str, CandidateClassifierSpecs]
-        A dictionary of candidate classifiers to consider, mapping the name of the
-        candidate to its specs, which describes how to construct it and under what
-        scenarios. If no candidates are provided, uses a fixed set of defaults,
-        which includes `CountingClassifier`, `KNeighborsClassifier` and
-        `LogisticRegression`.
-    """
-
-    def __init__(
-        self,
-        candidates: Optional[Dict[str, CandidateClassifierSpecs]] = None,
-    ) -> None:
-        super().__init__()
-        if candidates is None:
-            candidates = {
-                "forest(5,10)": CandidateClassifierSpecs(
-                    classifier=ScikitLearnClassifier(
-                        RandomForestClassifier(
-                            n_estimators=5,
-                            min_samples_split=10,
-                        ),
-                    ),
-                    min_samples=100,
-                ),
-                "knn(100)": CandidateClassifierSpecs(
-                    classifier=ScikitLearnClassifier(
-                        KNeighborsClassifier(n_neighbors=100)
-                    ),
-                    min_samples=100,
-                ),
-                "logistic": CandidateClassifierSpecs(
-                    classifier=ScikitLearnClassifier(
-                        make_pipeline(
-                            StandardScaler(),
-                            LogisticRegression(),
-                        )
-                    ),
-                    min_samples=30,
-                ),
-                "counting": CandidateClassifierSpecs(
-                    classifier=CountingClassifier(),
-                ),
-            }
-        self.candidates = candidates
-        self.classifier: Optional[Classifier] = None
-
-    def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
-        super().fit(x_train, y_train)
-        n_samples = x_train.shape[0]
-        assert y_train.shape == (n_samples, 2)
-
-        # If almost all samples belong to the same class, return a fixed prediction and
-        # skip all the other steps.
-        if y_train[:, 0].mean() > 0.99 or y_train[:, 1].mean() > 0.99:
-            self.classifier = CountingClassifier()
-            self.classifier.fit(x_train, y_train)
-            return
-
-        best_name, best_clf, best_score = None, None, -float("inf")
-        for (name, specs) in self.candidates.items():
-            if n_samples < specs.min_samples:
-                continue
-            clf = specs.classifier.clone()
-            if isinstance(clf, ScikitLearnClassifier):
-                proba = cross_val_predict(clf.inner_clf, x_train, y_train[:, 1])
-            else:
-                clf.fit(x_train, y_train)
-                proba = clf.predict_proba(x_train)[:, 1]
-            score = roc_auc_score(y_train[:, 1], proba)
-            if score > best_score:
-                best_name, best_clf, best_score = name, clf, score
-        logger.debug("Best classifier: %s (score=%.3f)" % (best_name, best_score))
-        if isinstance(best_clf, ScikitLearnClassifier):
-            best_clf.fit(x_train, y_train)
-        self.classifier = best_clf
-
-    def predict_proba(self, x_test: np.ndarray) -> np.ndarray:
-        super().predict_proba(x_test)
-        assert self.classifier is not None
-        return self.classifier.predict_proba(x_test)
-
-    def clone(self) -> "AdaptiveClassifier":
-        return AdaptiveClassifier(self.candidates)
--- a/miplearn/classifiers/counting.py
+++ b/miplearn/classifiers/counting.py
@@ -1,45 +0,0 @@
-#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
-#  Released under the modified BSD license. See COPYING.md for more details.
-from typing import Optional, cast
-
-import numpy as np
-
-from miplearn.classifiers import Classifier
-
-
-class CountingClassifier(Classifier):
-    """
-
-    A classifier that generates constant predictions, based only on the frequency of
-    the training labels. For example, suppose `y_train` is given by:
-    ```python
-    y_train = np.array([
-        [True, False],
-        [False, True],
-        [False, True],
-    ])
-    ```
-    Then `predict_proba` always returns `[0.33 0.66]` for every sample, regardless of
-    `x_train`. It essentially counts how many times each label appeared, hence the name.
-
-    """
-
-    def __init__(self) -> None:
-        super().__init__()
-        self.mean: Optional[np.ndarray] = None
-
-    def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
-        super().fit(x_train, y_train)
-        self.mean = cast(np.ndarray, np.mean(y_train, axis=0))
-
-    def predict_proba(self, x_test: np.ndarray) -> np.ndarray:
-        super().predict_proba(x_test)
-        n_samples = x_test.shape[0]
-        return np.array([self.mean for _ in range(n_samples)])
-
-    def __repr__(self) -> str:
-        return "CountingClassifier(mean=%s)" % self.mean
-
-    def clone(self) -> "CountingClassifier":
-        return CountingClassifier()
--- a/miplearn/classifiers/cv.py
+++ b/miplearn/classifiers/cv.py
@@ -1,132 +0,0 @@
-#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
-#  Released under the modified BSD license. See COPYING.md for more details.
-
-import logging
-from typing import Optional, List
-
-import numpy as np
-from sklearn.dummy import DummyClassifier
-from sklearn.linear_model import LogisticRegression
-from sklearn.model_selection import cross_val_score
-
-from miplearn.classifiers import Classifier
-from miplearn.classifiers.sklearn import ScikitLearnClassifier
-
-logger = logging.getLogger(__name__)
-
-
-class CrossValidatedClassifier(Classifier):
-    """
-    A meta-classifier that, upon training, evaluates the performance of another
-    candidate classifier on the training data set, using k-fold cross validation,
-    then either adopts it, if its cv-score is high enough, or returns constant
-    predictions for every x_test, otherwise.
-
-    Parameters
-    ----------
-    classifier: Callable[[], ScikitLearnClassifier]
-        A callable that constructs the candidate classifier.
-    threshold: float
-        Number from zero to one indicating how well must the candidate classifier
-        perform to be adopted. The threshold is specified in comparison to a dummy
-        classifier trained on the same dataset. For example, a threshold of 0.0
-        indicates that any classifier as good as the dummy predictor is acceptable. A
-        threshold of 1.0 indicates that only classifiers with perfect
-        cross-validation scores are acceptable. Other numbers are a linear
-        interpolation of these two extremes.
-    constant: Optional[List[bool]]
-        If the candidate classifier fails to meet the threshold, use a dummy classifier
-        which always returns this prediction instead. The list should have exactly as
-        many elements as the number of columns of `x_train` provided to `fit`.
-    cv: int
-        Number of folds.
-    scoring: str
-        Scoring function.
-    """
-
-    def __init__(
-        self,
-        classifier: ScikitLearnClassifier = ScikitLearnClassifier(LogisticRegression()),
-        threshold: float = 0.75,
-        constant: Optional[List[bool]] = None,
-        cv: int = 5,
-        scoring: str = "accuracy",
-    ):
-        super().__init__()
-        if constant is None:
-            constant = [True, False]
-        self.n_classes = len(constant)
-        self.classifier: Optional[ScikitLearnClassifier] = None
-        self.classifier_prototype = classifier
-        self.constant: List[bool] = constant
-        self.threshold = threshold
-        self.cv = cv
-        self.scoring = scoring
-
-    def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
-        super().fit(x_train, y_train)
-        (n_samples, n_classes) = x_train.shape
-        assert n_classes == self.n_classes
-
-        # Calculate dummy score and absolute score threshold
-        y_train_avg = np.average(y_train)
-        dummy_score = max(y_train_avg, 1 - y_train_avg)
-        absolute_threshold = 1.0 * self.threshold + dummy_score * (1 - self.threshold)
-
-        # Calculate cross validation score and decide which classifier to use
-        clf = self.classifier_prototype.clone()
-        assert clf is not None
-        assert isinstance(clf, ScikitLearnClassifier), (
-            f"The provided classifier callable must return a ScikitLearnClassifier. "
-            f"Found {clf.__class__.__name__} instead. If this is a scikit-learn "
-            f"classifier, you must wrap it with ScikitLearnClassifier."
-        )
-
-        cv_score = float(
-            np.mean(
-                cross_val_score(
-                    clf.inner_clf,
-                    x_train,
-                    y_train[:, 1],
-                    cv=self.cv,
-                    scoring=self.scoring,
-                )
-            )
-        )
-        if cv_score >= absolute_threshold:
-            logger.debug(
-                "cv_score is above threshold (%.2f >= %.2f); keeping"
-                % (cv_score, absolute_threshold)
-            )
-            self.classifier = clf
-        else:
-            logger.debug(
-                "cv_score is below threshold (%.2f < %.2f); discarding"
-                % (cv_score, absolute_threshold)
-            )
-            self.classifier = ScikitLearnClassifier(
-                DummyClassifier(
-                    strategy="constant",
-                    constant=self.constant[1],
-                )
-            )
-
-        # Train chosen classifier
-        assert self.classifier is not None
-        assert isinstance(self.classifier, ScikitLearnClassifier)
-        self.classifier.fit(x_train, y_train)
-
-    def predict_proba(self, x_test: np.ndarray) -> np.ndarray:
-        super().predict_proba(x_test)
-        assert self.classifier is not None
-        return self.classifier.predict_proba(x_test)
-
-    def clone(self) -> "CrossValidatedClassifier":
-        return CrossValidatedClassifier(
-            classifier=self.classifier_prototype,
-            threshold=self.threshold,
-            constant=self.constant,
-            cv=self.cv,
-            scoring=self.scoring,
-        )
--- a/miplearn/classifiers/minprob.py
+++ b/miplearn/classifiers/minprob.py
@@ -0,0 +1,61 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020-2022, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+from typing import List, Any, Callable, Optional
+
+import numpy as np
+import sklearn
+from sklearn.base import BaseEstimator
+from sklearn.utils.multiclass import unique_labels
+
+
+class MinProbabilityClassifier(BaseEstimator):
+    """
+    Meta-classifier that returns NaN for predictions made by a base classifier that
+    have probability below a given threshold. More specifically, this meta-classifier
+    calls base_clf.predict_proba and compares the result against the provided
+    thresholds. If the probability for one of the classes is above its threshold,
+    the meta-classifier returns that prediction. Otherwise, it returns NaN.
+    """
+
+    def __init__(
+        self,
+        base_clf: Any,
+        thresholds: List[float],
+        clone_fn: Callable[[Any], Any] = sklearn.base.clone,
+    ) -> None:
+        assert len(thresholds) == 2
+        self.base_clf = base_clf
+        self.thresholds = thresholds
+        self.clone_fn = clone_fn
+        self.clf_: Optional[Any] = None
+        self.classes_: Optional[List[Any]] = None
+
+    def fit(self, x: np.ndarray, y: np.ndarray) -> None:
+        assert len(y.shape) == 1
+        assert len(x.shape) == 2
+        classes = unique_labels(y)
+        assert len(classes) == len(self.thresholds)
+
+        self.clf_ = self.clone_fn(self.base_clf)
+        self.clf_.fit(x, y)
+        self.classes_ = self.clf_.classes_
+
+    def predict(self, x: np.ndarray) -> np.ndarray:
+        assert self.clf_ is not None
+        assert self.classes_ is not None
+
+        y_proba = self.clf_.predict_proba(x)
+        assert len(y_proba.shape) == 2
+        assert y_proba.shape[0] == x.shape[0]
+        assert y_proba.shape[1] == 2
+        n_samples = x.shape[0]
+
+        y_pred = []
+        for sample_idx in range(n_samples):
+            yi = float("nan")
+            for (class_idx, class_val) in enumerate(self.classes_):
+                if y_proba[sample_idx, class_idx] >= self.thresholds[class_idx]:
+                    yi = class_val
+            y_pred.append(yi)
+        return np.array(y_pred)
--- a/miplearn/classifiers/singleclass.py
+++ b/miplearn/classifiers/singleclass.py
@@ -0,0 +1,51 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020-2022, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+from typing import Callable, Optional
+
+import numpy as np
+import sklearn.base
+from sklearn.base import BaseEstimator
+from sklearn.utils.multiclass import unique_labels
+
+
+class SingleClassFix(BaseEstimator):
+    """
+    Some sklearn classifiers, such as logistic regression, have issues with datasets
+    that contain a single class. This meta-classifier fixes the issue. If the
+    training data contains a single class, this meta-classifier always returns that
+    class as a prediction. Otherwise, it fits the provided base classifier,
+    and returns its predictions instead.
+    """
+
+    def __init__(
+        self,
+        base_clf: BaseEstimator,
+        clone_fn: Callable = sklearn.base.clone,
+    ):
+        self.base_clf = base_clf
+        self.clf_: Optional[BaseEstimator] = None
+        self.constant_ = None
+        self.classes_ = None
+        self.clone_fn = clone_fn
+
+    def fit(self, x: np.ndarray, y: np.ndarray) -> None:
+        classes = unique_labels(y)
+        if len(classes) == 1:
+            assert classes[0] is not None
+            self.clf_ = None
+            self.constant_ = classes[0]
+            self.classes_ = classes
+        else:
+            self.clf_ = self.clone_fn(self.base_clf)
+            assert self.clf_ is not None
+            self.clf_.fit(x, y)
+            self.constant_ = None
+            self.classes_ = self.clf_.classes_
+
+    def predict(self, x: np.ndarray) -> np.ndarray:
+        if self.constant_ is not None:
+            return np.full(x.shape[0], self.constant_)
+        else:
+            assert self.clf_ is not None
+            return self.clf_.predict(x)
--- a/miplearn/classifiers/sklearn.py
+++ b/miplearn/classifiers/sklearn.py
@@ -1,93 +0,0 @@
-#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
-#  Released under the modified BSD license. See COPYING.md for more details.
-
-from typing import Optional, Any, cast
-
-import numpy as np
-import sklearn
-
-from miplearn.classifiers import Classifier, Regressor
-
-
-class ScikitLearnClassifier(Classifier):
-    """
-    Wrapper for ScikitLearn classifiers, which makes sure inputs and outputs have the
-    correct dimensions and types.
-    """
-
-    def __init__(self, clf: Any) -> None:
-        super().__init__()
-        self.inner_clf = clf
-        self.constant: Optional[np.ndarray] = None
-
-    def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
-        super().fit(x_train, y_train)
-        (n_samples, n_classes) = y_train.shape
-        assert n_classes == 2, (
-            f"Scikit-learn classifiers must have exactly two classes. "
-            f"{n_classes} classes were provided instead."
-        )
-
-        # When all samples belong to the same class, sklearn's predict_proba returns
-        # an array with a single column. The following check avoid this strange
-        # behavior.
-        mean = cast(np.ndarray, y_train.astype(float).mean(axis=0))
-        if mean.max() == 1.0:
-            self.constant = mean
-            return
-
-        self.inner_clf.fit(x_train, y_train[:, 1])
-
-    def predict_proba(self, x_test: np.ndarray) -> np.ndarray:
-        super().predict_proba(x_test)
-        n_samples = x_test.shape[0]
-        if self.constant is not None:
-            return np.array([self.constant for n in range(n_samples)])
-        sklearn_proba = self.inner_clf.predict_proba(x_test)
-        if isinstance(sklearn_proba, list):
-            assert len(sklearn_proba) == self.n_classes
-            for pb in sklearn_proba:
-                assert isinstance(pb, np.ndarray)
-                assert pb.dtype in [np.float16, np.float32, np.float64]
-                assert pb.shape == (n_samples, 2)
-            proba = np.hstack([pb[:, [1]] for pb in sklearn_proba])
-            assert proba.shape == (n_samples, self.n_classes)
-            return proba
-        else:
-            assert isinstance(sklearn_proba, np.ndarray)
-            assert sklearn_proba.shape == (n_samples, 2)
-            return sklearn_proba
-
-    def clone(self) -> "ScikitLearnClassifier":
-        return ScikitLearnClassifier(
-            clf=sklearn.base.clone(self.inner_clf),
-        )
-
-
-class ScikitLearnRegressor(Regressor):
-    """
-    Wrapper for ScikitLearn regressors, which makes sure inputs and outputs have the
-    correct dimensions and types.
-    """
-
-    def __init__(self, reg: Any) -> None:
-        super().__init__()
-        self.inner_reg = reg
-
-    def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None:
-        super().fit(x_train, y_train)
-        self.inner_reg.fit(x_train, y_train)
-
-    def predict(self, x_test: np.ndarray) -> np.ndarray:
-        super().predict(x_test)
-        n_samples = x_test.shape[0]
-        sklearn_pred = self.inner_reg.predict(x_test)
-        assert isinstance(sklearn_pred, np.ndarray)
-        assert sklearn_pred.shape[0] == n_samples
-        return sklearn_pred
-
-    def clone(self) -> "ScikitLearnRegressor":
-        return ScikitLearnRegressor(
-            reg=sklearn.base.clone(self.inner_reg),
-        )
--- a/miplearn/classifiers/threshold.py
+++ b/miplearn/classifiers/threshold.py
@@ -1,143 +0,0 @@
-#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
-#  Released under the modified BSD license. See COPYING.md for more details.
-
-from abc import abstractmethod, ABC
-from typing import Optional, List
-
-import numpy as np
-from sklearn.metrics._ranking import _binary_clf_curve
-from sklearn.model_selection import cross_val_predict
-
-from miplearn.classifiers.sklearn import ScikitLearnClassifier
-from miplearn.classifiers.adaptive import AdaptiveClassifier
-from miplearn.classifiers import Classifier
-
-
-class Threshold(ABC):
-    """
-    Solver components ask the machine learning models how confident are they on each
-    prediction they make, then automatically discard all predictions that have low
-    confidence. A Threshold specifies how confident should the ML models be for a
-    prediction to be considered trustworthy.
-
-    To model dynamic thresholds, which automatically adjust themselves during
-    training to reach some desired target (such as minimum precision, or minimum
-    recall), thresholds behave somewhat similar to ML models themselves, with `fit`
-    and `predict` methods.
-    """
-
-    @abstractmethod
-    def fit(
-        self,
-        clf: Classifier,
-        x_train: np.ndarray,
-        y_train: np.ndarray,
-    ) -> None:
-        """
-        Given a trained binary classifier `clf`, calibrates itself based on the
-        classifier's performance on the given training data set.
-        """
-        assert isinstance(clf, Classifier)
-        assert isinstance(x_train, np.ndarray)
-        assert isinstance(y_train, np.ndarray)
-        n_samples = x_train.shape[0]
-        assert y_train.shape[0] == n_samples
-
-    @abstractmethod
-    def predict(self, x_test: np.ndarray) -> List[float]:
-        """
-        Returns the minimum probability for a machine learning prediction to be
-        considered trustworthy. There is one value for each label.
-        """
-        pass
-
-    @abstractmethod
-    def clone(self) -> "Threshold":
-        """
-        Returns an unfitted copy of this threshold with the same hyperparameters.
-        """
-        pass
-
-
-class MinProbabilityThreshold(Threshold):
-    """
-    A threshold which considers predictions trustworthy if their probability of being
-    correct, as computed by the machine learning models, are above a fixed value.
-    """
-
-    def __init__(self, min_probability: List[float]):
-        self.min_probability = min_probability
-
-    def fit(self, clf: Classifier, x_train: np.ndarray, y_train: np.ndarray) -> None:
-        pass
-
-    def predict(self, x_test: np.ndarray) -> List[float]:
-        return self.min_probability
-
-    def clone(self) -> "MinProbabilityThreshold":
-        return MinProbabilityThreshold(self.min_probability)
-
-
-class MinPrecisionThreshold(Threshold):
-    """
-    A dynamic threshold which automatically adjusts itself during training to ensure
-    that the component achieves at least a given precision `p` on the training data
-    set. Note that increasing a component's minimum precision may reduce its recall.
-    """
-
-    def __init__(self, min_precision: List[float]) -> None:
-        self.min_precision = min_precision
-        self._computed_threshold: Optional[List[float]] = None
-
-    def fit(
-        self,
-        clf: Classifier,
-        x_train: np.ndarray,
-        y_train: np.ndarray,
-    ) -> None:
-        super().fit(clf, x_train, y_train)
-        (n_samples, n_classes) = y_train.shape
-        if isinstance(clf, AdaptiveClassifier) and isinstance(
-            clf.classifier, ScikitLearnClassifier
-        ):
-            proba = cross_val_predict(
-                clf.classifier.inner_clf,
-                x_train,
-                y_train[:, 1],
-                method="predict_proba",
-            )
-        else:
-            proba = clf.predict_proba(x_train)
-        self._computed_threshold = [
-            self._compute(
-                y_train[:, i],
-                proba[:, i],
-                self.min_precision[i],
-            )
-            for i in range(n_classes)
-        ]
-
-    def predict(self, x_test: np.ndarray) -> List[float]:
-        assert self._computed_threshold is not None
-        return self._computed_threshold
-
-    @staticmethod
-    def _compute(
-        y_actual: np.ndarray,
-        y_prob: np.ndarray,
-        min_precision: float,
-        min_recall: float = 0.1,
-    ) -> float:
-        fps, tps, thresholds = _binary_clf_curve(y_actual, y_prob)
-        precision = tps / (tps + fps)
-        recall = tps / tps[-1]
-        for k in reversed(range(len(precision))):
-            if precision[k] >= min_precision and recall[k] >= min_recall:
-                return thresholds[k]
-        return float("inf")
-
-    def clone(self) -> "MinPrecisionThreshold":
-        return MinPrecisionThreshold(
-            min_precision=self.min_precision,
-        )
--- a/miplearn/collectors/init.py
+++ b/miplearn/collectors/init.py
--- a/miplearn/collectors/basic.py
+++ b/miplearn/collectors/basic.py
@@ -0,0 +1,86 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020-2022, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+
+import json
+import os
+from io import StringIO
+from os.path import exists
+from typing import Callable, List
+
+from ..h5 import H5File
+from ..io import _RedirectOutput, gzip, _to_h5_filename
+from ..parallel import p_umap
+
+
+class BasicCollector:
+    def collect(
+        self,
+        filenames: List[str],
+        build_model: Callable,
+        n_jobs: int = 1,
+        progress: bool = False,
+    ) -> None:
+        def _collect(data_filename):
+            h5_filename = _to_h5_filename(data_filename)
+            mps_filename = h5_filename.replace(".h5", ".mps")
+
+            if exists(h5_filename):
+                # Try to read optimal solution
+                mip_var_values = None
+                try:
+                    with H5File(h5_filename, "r") as h5:
+                        mip_var_values = h5.get_array("mip_var_values")
+                except:
+                    pass
+
+                if mip_var_values is None:
+                    print(f"Removing empty/corrupted h5 file: {h5_filename}")
+                    os.remove(h5_filename)
+                else:
+                    return
+
+            with H5File(h5_filename, "w") as h5:
+                streams = [StringIO()]
+                with _RedirectOutput(streams):
+                    # Load and extract static features
+                    model = build_model(data_filename)
+                    model.extract_after_load(h5)
+
+                    # Solve LP relaxation
+                    relaxed = model.relax()
+                    relaxed.optimize()
+                    relaxed.extract_after_lp(h5)
+
+                    # Solve MIP
+                    model.optimize()
+                    model.extract_after_mip(h5)
+
+                    # Add lazy constraints to model
+                    if (
+                        hasattr(model, "fix_violations")
+                        and model.fix_violations is not None
+                    ):
+                        model.fix_violations(model, model.violations_, "aot")
+                        h5.put_scalar(
+                            "mip_constr_violations", json.dumps(model.violations_)
+                        )
+
+                    # Save MPS file
+                    model.write(mps_filename)
+                    gzip(mps_filename)
+
+                h5.put_scalar("mip_log", streams[0].getvalue())
+
+        if n_jobs > 1:
+            p_umap(
+                _collect,
+                filenames,
+                num_cpus=n_jobs,
+                desc="collect",
+                smoothing=0,
+                disable=not progress,
+            )
+        else:
+            for filename in filenames:
+                _collect(filename)
--- a/miplearn/collectors/lazy.py
+++ b/miplearn/collectors/lazy.py
@@ -0,0 +1,117 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020-2022, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+
+from io import StringIO
+from typing import Callable
+
+import gurobipy as gp
+import numpy as np
+from gurobipy import GRB, LinExpr
+
+from ..h5 import H5File
+from ..io import _RedirectOutput
+
+
+class LazyCollector:
+    def __init__(
+        self,
+        min_constrs: int = 100_000,
+        time_limit: float = 900,
+    ) -> None:
+        self.min_constrs = min_constrs
+        self.time_limit = time_limit
+
+    def collect(
+        self, data_filename: str, build_model: Callable, tol: float = 1e-6
+    ) -> None:
+        h5_filename = f"{data_filename}.h5"
+        with H5File(h5_filename, "r+") as h5:
+            streams = [StringIO()]
+            lazy = None
+            with _RedirectOutput(streams):
+                slacks = h5.get_array("mip_constr_slacks")
+                assert slacks is not None
+
+                # Check minimum problem size
+                if len(slacks) < self.min_constrs:
+                    print("Problem is too small. Skipping.")
+                    h5.put_array("mip_constr_lazy", np.zeros(len(slacks)))
+                    return
+
+                # Load model
+                print("Loading model...")
+                model = build_model(data_filename)
+                model.params.LazyConstraints = True
+                model.params.timeLimit = self.time_limit
+                gp_constrs = np.array(model.getConstrs())
+                gp_vars = np.array(model.getVars())
+
+                # Load constraints
+                lhs = h5.get_sparse("static_constr_lhs")
+                rhs = h5.get_array("static_constr_rhs")
+                sense = h5.get_array("static_constr_sense")
+                assert lhs is not None
+                assert rhs is not None
+                assert sense is not None
+                lhs_csr = lhs.tocsr()
+                lhs_csc = lhs.tocsc()
+                constr_idx = np.array(range(len(rhs)))
+                lazy = np.zeros(len(rhs))
+
+                # Drop loose constraints
+                selected = (slacks > 0) & ((sense == b"<") | (sense == b">"))
+                loose_constrs = gp_constrs[selected]
+                print(
+                    f"Removing {len(loose_constrs):,d} constraints (out of {len(rhs):,d})..."
+                )
+                model.remove(list(loose_constrs))
+
+                # Filter to constraints that were dropped
+                lhs_csr = lhs_csr[selected, :]
+                lhs_csc = lhs_csc[selected, :]
+                rhs = rhs[selected]
+                sense = sense[selected]
+                constr_idx = constr_idx[selected]
+                lazy[selected] = 1
+
+                # Load warm start
+                var_names = h5.get_array("static_var_names")
+                var_values = h5.get_array("mip_var_values")
+                assert var_values is not None
+                assert var_names is not None
+                for (var_idx, var_name) in enumerate(var_names):
+                    var = model.getVarByName(var_name.decode())
+                    var.start = var_values[var_idx]
+
+                print("Solving MIP with lazy constraints callback...")
+
+                def callback(model: gp.Model, where: int) -> None:
+                    assert rhs is not None
+                    assert lazy is not None
+                    assert sense is not None
+
+                    if where == GRB.Callback.MIPSOL:
+                        x_val = np.array(model.cbGetSolution(model.getVars()))
+                        slack = lhs_csc * x_val - rhs
+                        slack[sense == b">"] *= -1
+                        is_violated = slack > tol
+
+                        for (j, rhs_j) in enumerate(rhs):
+                            if is_violated[j]:
+                                lazy[constr_idx[j]] = 0
+                                expr = LinExpr(
+                                    lhs_csr[j, :].data, gp_vars[lhs_csr[j, :].indices]
+                                )
+                                if sense[j] == b"<":
+                                    model.cbLazy(expr <= rhs_j)
+                                elif sense[j] == b">":
+                                    model.cbLazy(expr >= rhs_j)
+                                else:
+                                    raise RuntimeError(f"Unknown sense: {sense[j]}")
+
+                model.optimize(callback)
+                print(f"Marking {lazy.sum():,.0f} constraints as lazy...")
+
+            h5.put_array("mip_constr_lazy", lazy)
+            h5.put_scalar("mip_constr_lazy_log", streams[0].getvalue())
--- a/miplearn/collectors/priority.py
+++ b/miplearn/collectors/priority.py
@@ -0,0 +1,49 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020-2022, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+
+import os
+import subprocess
+from typing import Callable
+
+from ..h5 import H5File
+
+
+class BranchPriorityCollector:
+    def __init__(
+        self,
+        time_limit: float = 900.0,
+        print_interval: int = 1,
+        node_limit: int = 500,
+    ) -> None:
+        self.time_limit = time_limit
+        self.print_interval = print_interval
+        self.node_limit = node_limit
+
+    def collect(self, data_filename: str, _: Callable) -> None:
+        basename = data_filename.replace(".pkl.gz", "")
+        env = os.environ.copy()
+        env["JULIA_NUM_THREADS"] = "1"
+        ret = subprocess.run(
+            [
+                "julia",
+                "--project=.",
+                "-e",
+                (
+                    f"using CPLEX, JuMP, MIPLearn.BB; "
+                    f"BB.solve!("
+                    f'    optimizer_with_attributes(CPLEX.Optimizer, "CPXPARAM_Threads" => 1),'
+                    f'    "{basename}",'
+                    f"    print_interval={self.print_interval},"
+                    f"    time_limit={self.time_limit:.2f},"
+                    f"    node_limit={self.node_limit},"
+                    f")"
+                ),
+            ],
+            check=True,
+            capture_output=True,
+            env=env,
+        )
+        h5_filename = f"{basename}.h5"
+        with H5File(h5_filename, "r+") as h5:
+            h5.put_scalar("bb_log", ret.stdout)
--- a/miplearn/components/init.py
+++ b/miplearn/components/init.py
@@ -1,47 +1,3 @@
 #  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
+#  Copyright (C) 2020-2022, UChicago Argonne, LLC. All rights reserved.
 #  Released under the modified BSD license. See COPYING.md for more details.
-from typing import Dict
-
-
-def classifier_evaluation_dict(
-    tp: int,
-    tn: int,
-    fp: int,
-    fn: int,
-) -> Dict[str, float]:
-    p = tp + fn
-    n = fp + tn
-    d: Dict = {
-        "Predicted positive": fp + tp,
-        "Predicted negative": fn + tn,
-        "Condition positive": p,
-        "Condition negative": n,
-        "True positive": tp,
-        "True negative": tn,
-        "False positive": fp,
-        "False negative": fn,
-        "Accuracy": (tp + tn) / (p + n),
-        "F1 score": (2 * tp) / (2 * tp + fp + fn),
-    }
-
-    if p > 0:
-        d["Recall"] = tp / p
-    else:
-        d["Recall"] = 1.0
-
-    if tp + fp > 0:
-        d["Precision"] = tp / (tp + fp)
-    else:
-        d["Precision"] = 1.0
-
-    t = (p + n) / 100.0
-    d["Predicted positive (%)"] = d["Predicted positive"] / t
-    d["Predicted negative (%)"] = d["Predicted negative"] / t
-    d["Condition positive (%)"] = d["Condition positive"] / t
-    d["Condition negative (%)"] = d["Condition negative"] / t
-    d["True positive (%)"] = d["True positive"] / t
-    d["True negative (%)"] = d["True negative"] / t
-    d["False positive (%)"] = d["False positive"] / t
-    d["False negative (%)"] = d["False negative"] / t
-    return d
--- a/miplearn/components/component.py
+++ b/miplearn/components/component.py
@@ -1,269 +0,0 @@
-#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
-#  Released under the modified BSD license. See COPYING.md for more details.
-
-from typing import Any, List, TYPE_CHECKING, Tuple, Dict, Optional
-
-import numpy as np
-from tqdm.auto import tqdm
-from p_tqdm import p_umap
-
-from miplearn.features.sample import Sample
-from miplearn.instance.base import Instance
-from miplearn.types import LearningSolveStats, Category
-
-if TYPE_CHECKING:
-    from miplearn.solvers.learning import LearningSolver
-
-
-# noinspection PyMethodMayBeStatic
-class Component:
-    """
-    A Component is an object which adds functionality to a LearningSolver.
-
-    For better code maintainability, LearningSolver simply delegates most of its
-    functionality to Components. Each Component is responsible for exactly one ML
-    strategy.
-    """
-
-    def after_solve_lp(
-        self,
-        solver: "LearningSolver",
-        instance: Instance,
-        model: Any,
-        stats: LearningSolveStats,
-        sample: Sample,
-    ) -> None:
-        """
-        Method called by LearningSolver after the root LP relaxation is solved.
-        See before_solve_lp for a description of the parameters.
-        """
-        return
-
-    def after_solve_mip(
-        self,
-        solver: "LearningSolver",
-        instance: Instance,
-        model: Any,
-        stats: LearningSolveStats,
-        sample: Sample,
-    ) -> None:
-        """
-        Method called by LearningSolver after the MIP is solved.
-        See before_solve_lp for a description of the parameters.
-        """
-        return
-
-    def before_solve_lp(
-        self,
-        solver: "LearningSolver",
-        instance: Instance,
-        model: Any,
-        stats: LearningSolveStats,
-        sample: Sample,
-    ) -> None:
-        """
-        Method called by LearningSolver before the root LP relaxation is solved.
-
-        Parameters
-        ----------
-        solver: LearningSolver
-            The solver calling this method.
-        instance: Instance
-            The instance being solved.
-        model
-            The concrete optimization model being solved.
-        stats: LearningSolveStats
-            A dictionary containing statistics about the solution process, such as
-            number of nodes explored and running time. Components are free to add
-            their own statistics here. For example, PrimalSolutionComponent adds
-            statistics regarding the number of predicted variables. All statistics in
-            this dictionary are exported to the benchmark CSV file.
-        sample: miplearn.features.Sample
-            An object containing data that may be useful for training machine
-            learning models and accelerating the solution process. Components are
-            free to add their own training data here.
-        """
-        return
-
-    def before_solve_mip(
-        self,
-        solver: "LearningSolver",
-        instance: Instance,
-        model: Any,
-        stats: LearningSolveStats,
-        sample: Sample,
-    ) -> None:
-        """
-        Method called by LearningSolver before the MIP is solved.
-        See before_solve_lp for a description of the parameters.
-        """
-        return
-
-    def fit_xy(
-        self,
-        x: Dict[Category, np.ndarray],
-        y: Dict[Category, np.ndarray],
-    ) -> None:
-        """
-        Given two dictionaries x and y, mapping the name of the category to matrices
-        of features and targets, this function does two things. First, for each
-        category, it creates a clone of the prototype regressor/classifier. Second,
-        it passes (x[category], y[category]) to the clone's fit method.
-        """
-        return
-
-    def iteration_cb(
-        self,
-        solver: "LearningSolver",
-        instance: Instance,
-        model: Any,
-    ) -> bool:
-        """
-        Method called by LearningSolver at the end of each iteration.
-
-        After solving the MIP, LearningSolver calls `iteration_cb` of each component,
-        giving them a chance to modify the problem and resolve it before the solution
-        process ends. For example, the lazy constraint component uses `iteration_cb`
-        to check that all lazy constraints are satisfied.
-
-        If `iteration_cb` returns False for all components, the solution process
-        ends. If it retunrs True for any component, the MIP is solved again.
-
-        Parameters
-        ----------
-        solver: LearningSolver
-            The solver calling this method.
-        instance: Instance
-            The instance being solved.
-        model: Any
-            The concrete optimization model being solved.
-        """
-        return False
-
-    def lazy_cb(
-        self,
-        solver: "LearningSolver",
-        instance: Instance,
-        model: Any,
-    ) -> None:
-        return
-
-    def sample_evaluate(
-        self,
-        instance: Optional[Instance],
-        sample: Sample,
-    ) -> Dict[str, Dict[str, float]]:
-        return {}
-
-    def sample_xy(
-        self,
-        instance: Optional[Instance],
-        sample: Sample,
-    ) -> Tuple[Dict, Dict]:
-        """
-        Returns a pair of x and y dictionaries containing, respectively, the matrices
-        of ML features and the labels for the sample. If the training sample does not
-        include label information, returns (x, {}).
-        """
-        pass
-
-    def pre_fit(self, pre: List[Any]) -> None:
-        pass
-
-    def user_cut_cb(
-        self,
-        solver: "LearningSolver",
-        instance: Instance,
-        model: Any,
-    ) -> None:
-        return
-
-    def pre_sample_xy(self, instance: Instance, sample: Sample) -> Any:
-        pass
-
-    @staticmethod
-    def fit_multiple(
-        components: List["Component"],
-        instances: List[Instance],
-        n_jobs: int = 1,
-        progress: bool = False,
-    ) -> None:
-
-        # Part I: Pre-fit
-        def _pre_sample_xy(instance: Instance) -> Dict:
-            pre_instance: Dict = {}
-            for (cidx, comp) in enumerate(components):
-                pre_instance[cidx] = []
-            instance.load()
-            for sample in instance.get_samples():
-                for (cidx, comp) in enumerate(components):
-                    pre_instance[cidx].append(comp.pre_sample_xy(instance, sample))
-            instance.free()
-            return pre_instance
-
-        if n_jobs == 1:
-            pre = [_pre_sample_xy(instance) for instance in instances]
-        else:
-            pre = p_umap(
-                _pre_sample_xy,
-                instances,
-                num_cpus=n_jobs,
-                desc="pre-sample-xy",
-                disable=not progress,
-            )
-        pre_combined: Dict = {}
-        for (cidx, comp) in enumerate(components):
-            pre_combined[cidx] = []
-            for p in pre:
-                pre_combined[cidx].extend(p[cidx])
-        for (cidx, comp) in enumerate(components):
-            comp.pre_fit(pre_combined[cidx])
-
-        # Part II: Fit
-        def _sample_xy(instance: Instance) -> Tuple[Dict, Dict]:
-            x_instance: Dict = {}
-            y_instance: Dict = {}
-            for (cidx, comp) in enumerate(components):
-                x_instance[cidx] = {}
-                y_instance[cidx] = {}
-            instance.load()
-            for sample in instance.get_samples():
-                for (cidx, comp) in enumerate(components):
-                    x = x_instance[cidx]
-                    y = y_instance[cidx]
-                    x_sample, y_sample = comp.sample_xy(instance, sample)
-                    for cat in x_sample.keys():
-                        if cat not in x:
-                            x[cat] = []
-                            y[cat] = []
-                        x[cat] += x_sample[cat]
-                        y[cat] += y_sample[cat]
-            instance.free()
-            return x_instance, y_instance
-
-        if n_jobs == 1:
-            xy = [_sample_xy(instance) for instance in instances]
-        else:
-            xy = p_umap(_sample_xy, instances, desc="sample-xy", disable=not progress)
-
-        for (cidx, comp) in enumerate(
-            tqdm(
-                components,
-                desc="fit",
-                disable=not progress,
-            )
-        ):
-            x_comp: Dict = {}
-            y_comp: Dict = {}
-            for (x, y) in xy:
-                for cat in x[cidx].keys():
-                    if cat not in x_comp:
-                        x_comp[cat] = []
-                        y_comp[cat] = []
-                    x_comp[cat].extend(x[cidx][cat])
-                    y_comp[cat].extend(y[cidx][cat])
-            for cat in x_comp.keys():
-                x_comp[cat] = np.array(x_comp[cat], dtype=np.float32)
-                y_comp[cat] = np.array(y_comp[cat])
-            comp.fit_xy(x_comp, y_comp)
--- a/miplearn/components/dynamic_common.py
+++ b/miplearn/components/dynamic_common.py
@@ -1,184 +0,0 @@
-#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
-#  Released under the modified BSD license. See COPYING.md for more details.
-import json
-import logging
-from typing import Dict, List, Tuple, Optional, Any, Set
-
-import numpy as np
-from overrides import overrides
-
-from miplearn.features.extractor import FeaturesExtractor
-from miplearn.classifiers import Classifier
-from miplearn.classifiers.threshold import Threshold
-from miplearn.components import classifier_evaluation_dict
-from miplearn.components.component import Component
-from miplearn.features.sample import Sample
-from miplearn.instance.base import Instance
-from miplearn.types import ConstraintCategory, ConstraintName
-
-logger = logging.getLogger(__name__)
-
-
-class DynamicConstraintsComponent(Component):
-    """
-    Base component used by both DynamicLazyConstraintsComponent and UserCutsComponent.
-    """
-
-    def __init__(
-        self,
-        attr: str,
-        classifier: Classifier,
-        threshold: Threshold,
-    ):
-        assert isinstance(classifier, Classifier)
-        self.threshold_prototype: Threshold = threshold
-        self.classifier_prototype: Classifier = classifier
-        self.classifiers: Dict[ConstraintCategory, Classifier] = {}
-        self.thresholds: Dict[ConstraintCategory, Threshold] = {}
-        self.known_violations: Dict[ConstraintName, Any] = {}
-        self.attr = attr
-
-    def sample_xy_with_cids(
-        self,
-        instance: Optional[Instance],
-        sample: Sample,
-    ) -> Tuple[
-        Dict[ConstraintCategory, List[List[float]]],
-        Dict[ConstraintCategory, List[List[bool]]],
-        Dict[ConstraintCategory, List[ConstraintName]],
-    ]:
-        if len(self.known_violations) == 0:
-            return {}, {}, {}
-        assert instance is not None
-        x: Dict[ConstraintCategory, List[List[float]]] = {}
-        y: Dict[ConstraintCategory, List[List[bool]]] = {}
-        cids: Dict[ConstraintCategory, List[ConstraintName]] = {}
-        known_cids = np.array(sorted(list(self.known_violations.keys())), dtype="S")
-
-        enforced_cids = None
-        enforced_encoded = sample.get_scalar(self.attr)
-        if enforced_encoded is not None:
-            enforced = self.decode(enforced_encoded)
-            enforced_cids = list(enforced.keys())
-
-        # Get user-provided constraint features
-        (
-            constr_features,
-            constr_categories,
-            constr_lazy,
-        ) = FeaturesExtractor._extract_user_features_constrs(instance, known_cids)
-
-        # Augment with instance features
-        instance_features = sample.get_array("static_instance_features")
-        assert instance_features is not None
-        constr_features = np.hstack(
-            [
-                instance_features.reshape(1, -1).repeat(len(known_cids), axis=0),
-                constr_features,
-            ]
-        )
-
-        categories = np.unique(constr_categories)
-        for c in categories:
-            x[c] = constr_features[constr_categories == c].tolist()
-            cids[c] = known_cids[constr_categories == c].tolist()
-            if enforced_cids is not None:
-                tmp = np.isin(cids[c], enforced_cids).reshape(-1, 1)
-                y[c] = np.hstack([~tmp, tmp]).tolist()  # type: ignore
-
-        return x, y, cids
-
-    @overrides
-    def sample_xy(
-        self,
-        instance: Optional[Instance],
-        sample: Sample,
-    ) -> Tuple[Dict, Dict]:
-        x, y, _ = self.sample_xy_with_cids(instance, sample)
-        return x, y
-
-    @overrides
-    def pre_fit(self, pre: List[Any]) -> None:
-        assert pre is not None
-        self.known_violations.clear()
-        for violations in pre:
-            for (vname, vdata) in violations.items():
-                self.known_violations[vname] = vdata
-
-    def sample_predict(
-        self,
-        instance: Instance,
-        sample: Sample,
-    ) -> List[ConstraintName]:
-        pred: List[ConstraintName] = []
-        if len(self.known_violations) == 0:
-            logger.info("Classifiers not fitted. Skipping.")
-            return pred
-        x, _, cids = self.sample_xy_with_cids(instance, sample)
-        for category in x.keys():
-            assert category in self.classifiers
-            assert category in self.thresholds
-            clf = self.classifiers[category]
-            thr = self.thresholds[category]
-            nx = np.array(x[category])
-            proba = clf.predict_proba(nx)
-            t = thr.predict(nx)
-            for i in range(proba.shape[0]):
-                if proba[i][1] > t[1]:
-                    pred += [cids[category][i]]
-        return pred
-
-    @overrides
-    def pre_sample_xy(self, instance: Instance, sample: Sample) -> Any:
-        attr_encoded = sample.get_scalar(self.attr)
-        assert attr_encoded is not None
-        return self.decode(attr_encoded)
-
-    @overrides
-    def fit_xy(
-        self,
-        x: Dict[ConstraintCategory, np.ndarray],
-        y: Dict[ConstraintCategory, np.ndarray],
-    ) -> None:
-        for category in x.keys():
-            self.classifiers[category] = self.classifier_prototype.clone()
-            self.thresholds[category] = self.threshold_prototype.clone()
-            npx = np.array(x[category])
-            npy = np.array(y[category])
-            self.classifiers[category].fit(npx, npy)
-            self.thresholds[category].fit(self.classifiers[category], npx, npy)
-
-    @overrides
-    def sample_evaluate(
-        self,
-        instance: Instance,
-        sample: Sample,
-    ) -> Dict[str, float]:
-        attr_encoded = sample.get_scalar(self.attr)
-        assert attr_encoded is not None
-        actual_violations = DynamicConstraintsComponent.decode(attr_encoded)
-        actual = set(actual_violations.keys())
-        pred = set(self.sample_predict(instance, sample))
-        tp, tn, fp, fn = 0, 0, 0, 0
-        for cid in self.known_violations.keys():
-            if cid in pred:
-                if cid in actual:
-                    tp += 1
-                else:
-                    fp += 1
-            else:
-                if cid in actual:
-                    fn += 1
-                else:
-                    tn += 1
-        return classifier_evaluation_dict(tp=tp, tn=tn, fp=fp, fn=fn)
-
-    @staticmethod
-    def encode(violations: Dict[ConstraintName, Any]) -> str:
-        return json.dumps({k.decode(): v for (k, v) in violations.items()})
-
-    @staticmethod
-    def decode(violations_encoded: str) -> Dict[ConstraintName, Any]:
-        violations = json.loads(violations_encoded)
-        return {k.encode(): v for (k, v) in violations.items()}
--- a/miplearn/components/dynamic_lazy.py
+++ b/miplearn/components/dynamic_lazy.py
@@ -1,223 +0,0 @@
-#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
-#  Released under the modified BSD license. See COPYING.md for more details.
-import json
-import logging
-from typing import Dict, List, TYPE_CHECKING, Tuple, Any, Optional
-
-import numpy as np
-from overrides import overrides
-from tqdm.auto import tqdm
-
-from miplearn.classifiers import Classifier
-from miplearn.classifiers.counting import CountingClassifier
-from miplearn.classifiers.threshold import MinProbabilityThreshold, Threshold
-from miplearn.components.component import Component
-from miplearn.components.dynamic_common import DynamicConstraintsComponent
-from miplearn.features.sample import Sample, Hdf5Sample
-from miplearn.instance.base import Instance
-from miplearn.types import LearningSolveStats, ConstraintName, ConstraintCategory
-from p_tqdm import p_map
-
-logger = logging.getLogger(__name__)
-
-if TYPE_CHECKING:
-    from miplearn.solvers.learning import LearningSolver
-
-
-class DynamicLazyConstraintsComponent(Component):
-    """
-    A component that predicts which lazy constraints to enforce.
-    """
-
-    def __init__(
-        self,
-        classifier: Classifier = CountingClassifier(),
-        threshold: Threshold = MinProbabilityThreshold([0, 0.05]),
-    ):
-        self.dynamic: DynamicConstraintsComponent = DynamicConstraintsComponent(
-            classifier=classifier,
-            threshold=threshold,
-            attr="mip_constr_lazy",
-        )
-        self.classifiers = self.dynamic.classifiers
-        self.thresholds = self.dynamic.thresholds
-        self.known_violations = self.dynamic.known_violations
-        self.lazy_enforced: Dict[ConstraintName, Any] = {}
-        self.n_iterations: int = 0
-
-    @staticmethod
-    def enforce(
-        violations: Dict[ConstraintName, Any],
-        instance: Instance,
-        model: Any,
-        solver: "LearningSolver",
-    ) -> None:
-        assert solver.internal_solver is not None
-        for (vname, vdata) in violations.items():
-            instance.enforce_lazy_constraint(solver.internal_solver, model, vdata)
-
-    @overrides
-    def before_solve_mip(
-        self,
-        solver: "LearningSolver",
-        instance: Instance,
-        model: Any,
-        stats: LearningSolveStats,
-        sample: Sample,
-    ) -> None:
-        self.lazy_enforced.clear()
-        logger.info("Predicting violated (dynamic) lazy constraints...")
-        vnames = self.dynamic.sample_predict(instance, sample)
-        violations = {c: self.dynamic.known_violations[c] for c in vnames}
-        logger.info("Enforcing %d lazy constraints..." % len(vnames))
-        self.enforce(violations, instance, model, solver)
-        self.n_iterations = 0
-
-    @overrides
-    def after_solve_mip(
-        self,
-        solver: "LearningSolver",
-        instance: Instance,
-        model: Any,
-        stats: LearningSolveStats,
-        sample: Sample,
-    ) -> None:
-        sample.put_scalar("mip_constr_lazy", self.dynamic.encode(self.lazy_enforced))
-        stats["LazyDynamic: Added in callback"] = len(self.lazy_enforced)
-        stats["LazyDynamic: Iterations"] = self.n_iterations
-
-    @overrides
-    def iteration_cb(
-        self,
-        solver: "LearningSolver",
-        instance: Instance,
-        model: Any,
-    ) -> bool:
-        assert solver.internal_solver is not None
-        logger.debug("Finding violated lazy constraints...")
-        violations = instance.find_violated_lazy_constraints(
-            solver.internal_solver,
-            model,
-        )
-        if len(violations) == 0:
-            logger.debug("No violations found")
-            return False
-        else:
-            self.n_iterations += 1
-            for v in violations:
-                self.lazy_enforced[v] = violations[v]
-            logger.debug("    %d violations found" % len(violations))
-            self.enforce(violations, instance, model, solver)
-            return True
-
-    # Delegate ML methods to self.dynamic
-    # -------------------------------------------------------------------
-    @overrides
-    def sample_xy(
-        self,
-        instance: Optional[Instance],
-        sample: Sample,
-    ) -> Tuple[Dict, Dict]:
-        return self.dynamic.sample_xy(instance, sample)
-
-    @overrides
-    def pre_fit(self, pre: List[Any]) -> None:
-        self.dynamic.pre_fit(pre)
-
-    def sample_predict(
-        self,
-        instance: Instance,
-        sample: Sample,
-    ) -> List[ConstraintName]:
-        return self.dynamic.sample_predict(instance, sample)
-
-    @overrides
-    def pre_sample_xy(self, instance: Instance, sample: Sample) -> Any:
-        return self.dynamic.pre_sample_xy(instance, sample)
-
-    @overrides
-    def fit_xy(
-        self,
-        x: Dict[ConstraintCategory, np.ndarray],
-        y: Dict[ConstraintCategory, np.ndarray],
-    ) -> None:
-        self.dynamic.fit_xy(x, y)
-
-    @overrides
-    def sample_evaluate(
-        self,
-        instance: Instance,
-        sample: Sample,
-    ) -> Dict[ConstraintCategory, Dict[str, float]]:
-        return self.dynamic.sample_evaluate(instance, sample)
-
-    # ------------------------------------------------------------------------------------------------------------------
-    # NEW API
-    # ------------------------------------------------------------------------------------------------------------------
-    @staticmethod
-    def extract(filenames, progress=True, known_cids=None):
-        enforced_cids, features = [], []
-        freeze_known_cids = True
-        if known_cids is None:
-            known_cids = set()
-            freeze_known_cids = False
-        for filename in tqdm(
-            filenames,
-            desc="extract (1/2)",
-            disable=not progress,
-        ):
-            with Hdf5Sample(filename, mode="r") as sample:
-                features.append(sample.get_array("lp_var_values"))
-                cids = frozenset(
-                    DynamicConstraintsComponent.decode(
-                        sample.get_scalar("mip_constr_lazy")
-                    ).keys()
-                )
-                enforced_cids.append(cids)
-                if not freeze_known_cids:
-                    known_cids.update(cids)
-
-        x, y, cat, cdata = [], [], [], {}
-        for (j, cid) in enumerate(known_cids):
-            cdata[cid] = json.loads(cid.decode())
-            for i in range(len(features)):
-                cat.append(cid)
-                x.append(features[i])
-                if cid in enforced_cids[i]:
-                    y.append([0, 1])
-                else:
-                    y.append([1, 0])
-        x = np.vstack(x)
-        y = np.vstack(y)
-        cat = np.array(cat)
-        x_dict, y_dict = DynamicLazyConstraintsComponent._split(
-            x,
-            y,
-            cat,
-            progress=progress,
-        )
-        return x_dict, y_dict, cdata
-
-    @staticmethod
-    def _split(x, y, cat, progress=False):
-        # Sort data by categories
-        pi = np.argsort(cat, kind="stable")
-        x = x[pi]
-        y = y[pi]
-        cat = cat[pi]
-
-        # Split categories
-        x_dict = {}
-        y_dict = {}
-        start = 0
-        for end in tqdm(
-            range(len(cat) + 1),
-            desc="extract (2/2)",
-            disable=not progress,
-        ):
-            if (end >= len(cat)) or (cat[start] != cat[end]):
-                x_dict[cat[start]] = x[start:end, :]
-                y_dict[cat[start]] = y[start:end, :]
-                start = end
-        return x_dict, y_dict
--- a/miplearn/components/dynamic_user_cuts.py
+++ b/miplearn/components/dynamic_user_cuts.py
@@ -1,133 +0,0 @@
-#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
-#  Released under the modified BSD license. See COPYING.md for more details.
-import logging
-from typing import Any, TYPE_CHECKING, Tuple, Dict, List
-
-import numpy as np
-from overrides import overrides
-
-from miplearn.classifiers import Classifier
-from miplearn.classifiers.counting import CountingClassifier
-from miplearn.classifiers.threshold import Threshold, MinProbabilityThreshold
-from miplearn.components.component import Component
-from miplearn.components.dynamic_common import DynamicConstraintsComponent
-from miplearn.features.sample import Sample
-from miplearn.instance.base import Instance
-from miplearn.types import LearningSolveStats, ConstraintName, ConstraintCategory
-
-logger = logging.getLogger(__name__)
-
-if TYPE_CHECKING:
-    from miplearn.solvers.learning import LearningSolver
-
-
-class UserCutsComponent(Component):
-    def __init__(
-        self,
-        classifier: Classifier = CountingClassifier(),
-        threshold: Threshold = MinProbabilityThreshold([0.50, 0.50]),
-    ) -> None:
-        self.dynamic = DynamicConstraintsComponent(
-            classifier=classifier,
-            threshold=threshold,
-            attr="mip_user_cuts",
-        )
-        self.enforced: Dict[ConstraintName, Any] = {}
-        self.n_added_in_callback = 0
-
-    @overrides
-    def before_solve_mip(
-        self,
-        solver: "LearningSolver",
-        instance: "Instance",
-        model: Any,
-        stats: LearningSolveStats,
-        sample: Sample,
-    ) -> None:
-        assert solver.internal_solver is not None
-        self.enforced.clear()
-        self.n_added_in_callback = 0
-        logger.info("Predicting violated user cuts...")
-        vnames = self.dynamic.sample_predict(instance, sample)
-        logger.info("Enforcing %d user cuts ahead-of-time..." % len(vnames))
-        for vname in vnames:
-            vdata = self.dynamic.known_violations[vname]
-            instance.enforce_user_cut(solver.internal_solver, model, vdata)
-        stats["UserCuts: Added ahead-of-time"] = len(vnames)
-
-    @overrides
-    def user_cut_cb(
-        self,
-        solver: "LearningSolver",
-        instance: "Instance",
-        model: Any,
-    ) -> None:
-        assert solver.internal_solver is not None
-        logger.debug("Finding violated user cuts...")
-        violations = instance.find_violated_user_cuts(model)
-        logger.debug(f"Found {len(violations)} violated user cuts")
-        logger.debug("Building violated user cuts...")
-        for (vname, vdata) in violations.items():
-            if vname in self.enforced:
-                continue
-            instance.enforce_user_cut(solver.internal_solver, model, vdata)
-            self.enforced[vname] = vdata
-            self.n_added_in_callback += 1
-        if len(violations) > 0:
-            logger.debug(f"Added {len(violations)} violated user cuts")
-
-    @overrides
-    def after_solve_mip(
-        self,
-        solver: "LearningSolver",
-        instance: "Instance",
-        model: Any,
-        stats: LearningSolveStats,
-        sample: Sample,
-    ) -> None:
-        sample.put_scalar("mip_user_cuts", self.dynamic.encode(self.enforced))
-        stats["UserCuts: Added in callback"] = self.n_added_in_callback
-        if self.n_added_in_callback > 0:
-            logger.info(f"{self.n_added_in_callback} user cuts added in callback")
-
-    # Delegate ML methods to self.dynamic
-    # -------------------------------------------------------------------
-    @overrides
-    def sample_xy(
-        self,
-        instance: "Instance",
-        sample: Sample,
-    ) -> Tuple[Dict, Dict]:
-        return self.dynamic.sample_xy(instance, sample)
-
-    @overrides
-    def pre_fit(self, pre: List[Any]) -> None:
-        self.dynamic.pre_fit(pre)
-
-    def sample_predict(
-        self,
-        instance: "Instance",
-        sample: Sample,
-    ) -> List[ConstraintName]:
-        return self.dynamic.sample_predict(instance, sample)
-
-    @overrides
-    def pre_sample_xy(self, instance: Instance, sample: Sample) -> Any:
-        return self.dynamic.pre_sample_xy(instance, sample)
-
-    @overrides
-    def fit_xy(
-        self,
-        x: Dict[ConstraintCategory, np.ndarray],
-        y: Dict[ConstraintCategory, np.ndarray],
-    ) -> None:
-        self.dynamic.fit_xy(x, y)
-
-    @overrides
-    def sample_evaluate(
-        self,
-        instance: "Instance",
-        sample: Sample,
-    ) -> Dict[ConstraintCategory, Dict[ConstraintName, float]]:
-        return self.dynamic.sample_evaluate(instance, sample)
--- a/miplearn/components/lazy.py
+++ b/miplearn/components/lazy.py
@@ -0,0 +1,43 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020-2022, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+
+import json
+from typing import Any, Dict, List
+
+import gurobipy as gp
+
+from ..h5 import H5File
+
+
+class ExpertLazyComponent:
+    def __init__(self) -> None:
+        pass
+
+    def fit(self, train_h5: List[str]) -> None:
+        pass
+
+    def before_mip(self, test_h5: str, model: gp.Model, stats: Dict[str, Any]) -> None:
+        with H5File(test_h5, "r") as h5:
+            constr_names = h5.get_array("static_constr_names")
+            constr_lazy = h5.get_array("mip_constr_lazy")
+            constr_violations = h5.get_scalar("mip_constr_violations")
+
+            assert constr_names is not None
+            assert constr_violations is not None
+
+            # Static lazy constraints
+            n_static_lazy = 0
+            if constr_lazy is not None:
+                for (constr_idx, constr_name) in enumerate(constr_names):
+                    if constr_lazy[constr_idx]:
+                        constr = model.getConstrByName(constr_name.decode())
+                        constr.lazy = 3
+                        n_static_lazy += 1
+            stats.update({"Static lazy constraints": n_static_lazy})
+
+            # Dynamic lazy constraints
+            if hasattr(model, "_fix_violations"):
+                violations = json.loads(constr_violations)
+                model._fix_violations(model, violations, "aot")
+                stats.update({"Dynamic lazy constraints": len(violations)})
--- a/miplearn/components/objective.py
+++ b/miplearn/components/objective.py
@@ -1,126 +0,0 @@
-#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
-#  Released under the modified BSD license. See COPYING.md for more details.
-
-import logging
-from typing import List, Dict, Any, TYPE_CHECKING, Tuple, Optional, cast
-
-import numpy as np
-from overrides import overrides
-from sklearn.linear_model import LinearRegression
-
-from miplearn.classifiers import Regressor
-from miplearn.classifiers.sklearn import ScikitLearnRegressor
-from miplearn.components.component import Component
-from miplearn.features.sample import Sample
-from miplearn.instance.base import Instance
-from miplearn.types import LearningSolveStats
-
-if TYPE_CHECKING:
-    from miplearn.solvers.learning import LearningSolver
-
-logger = logging.getLogger(__name__)
-
-
-class ObjectiveValueComponent(Component):
-    """
-    A Component which predicts the optimal objective value of the problem.
-    """
-
-    def __init__(
-        self,
-        regressor: Regressor = ScikitLearnRegressor(LinearRegression()),
-    ) -> None:
-        assert isinstance(regressor, Regressor)
-        self.regressors: Dict[str, Regressor] = {}
-        self.regressor_prototype = regressor
-
-    @overrides
-    def before_solve_mip(
-        self,
-        solver: "LearningSolver",
-        instance: Instance,
-        model: Any,
-        stats: LearningSolveStats,
-        sample: Sample,
-    ) -> None:
-        logger.info("Predicting optimal value...")
-        pred = self.sample_predict(sample)
-        for (c, v) in pred.items():
-            logger.info(f"Predicted {c.lower()}: %.6e" % v)
-            stats[f"Objective: Predicted {c.lower()}"] = v  # type: ignore
-
-    @overrides
-    def fit_xy(
-        self,
-        x: Dict[str, np.ndarray],
-        y: Dict[str, np.ndarray],
-    ) -> None:
-        for c in ["Upper bound", "Lower bound"]:
-            if c in y:
-                self.regressors[c] = self.regressor_prototype.clone()
-                self.regressors[c].fit(x[c], y[c])
-
-    def sample_predict(self, sample: Sample) -> Dict[str, float]:
-        pred: Dict[str, float] = {}
-        x, _ = self.sample_xy(None, sample)
-        for c in ["Upper bound", "Lower bound"]:
-            if c in self.regressors is not None:
-                pred[c] = self.regressors[c].predict(np.array(x[c]))[0, 0]
-            else:
-                logger.info(f"{c} regressor not fitted. Skipping.")
-        return pred
-
-    @overrides
-    def sample_xy(
-        self,
-        _: Optional[Instance],
-        sample: Sample,
-    ) -> Tuple[Dict[str, List[List[float]]], Dict[str, List[List[float]]]]:
-        lp_instance_features_np = sample.get_array("lp_instance_features")
-        if lp_instance_features_np is None:
-            lp_instance_features_np = sample.get_array("static_instance_features")
-        assert lp_instance_features_np is not None
-        lp_instance_features = cast(List[float], lp_instance_features_np.tolist())
-
-        # Features
-        x: Dict[str, List[List[float]]] = {
-            "Upper bound": [lp_instance_features],
-            "Lower bound": [lp_instance_features],
-        }
-
-        # Labels
-        y: Dict[str, List[List[float]]] = {}
-        mip_lower_bound = sample.get_scalar("mip_lower_bound")
-        mip_upper_bound = sample.get_scalar("mip_upper_bound")
-        if mip_lower_bound is not None:
-            y["Lower bound"] = [[mip_lower_bound]]
-        if mip_upper_bound is not None:
-            y["Upper bound"] = [[mip_upper_bound]]
-
-        return x, y
-
-    @overrides
-    def sample_evaluate(
-        self,
-        instance: Instance,
-        sample: Sample,
-    ) -> Dict[str, Dict[str, float]]:
-        def compare(y_pred: float, y_actual: float) -> Dict[str, float]:
-            err = np.round(abs(y_pred - y_actual), 8)
-            return {
-                "Actual value": y_actual,
-                "Predicted value": y_pred,
-                "Absolute error": err,
-                "Relative error": err / y_actual,
-            }
-
-        result: Dict[str, Dict[str, float]] = {}
-        pred = self.sample_predict(sample)
-        actual_ub = sample.get_scalar("mip_upper_bound")
-        actual_lb = sample.get_scalar("mip_lower_bound")
-        if actual_ub is not None:
-            result["Upper bound"] = compare(pred["Upper bound"], actual_ub)
-        if actual_lb is not None:
-            result["Lower bound"] = compare(pred["Lower bound"], actual_lb)
-        return result
--- a/miplearn/components/primal.py
+++ b/miplearn/components/primal.py
@@ -1,341 +0,0 @@
-#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
-#  Released under the modified BSD license. See COPYING.md for more details.
-
-import logging
-from typing import Dict, List, Any, TYPE_CHECKING, Tuple, Optional
-
-import numpy as np
-from overrides import overrides
-
-from miplearn.classifiers import Classifier
-from miplearn.classifiers.adaptive import AdaptiveClassifier
-from miplearn.classifiers.threshold import MinPrecisionThreshold, Threshold
-from miplearn.components import classifier_evaluation_dict
-from miplearn.components.component import Component
-from miplearn.features.sample import Sample
-from miplearn.instance.base import Instance
-from miplearn.types import (
-    LearningSolveStats,
-    Category,
-    Solution,
-)
-from miplearn.features.sample import Hdf5Sample
-from p_tqdm import p_map
-from tqdm.auto import tqdm
-
-logger = logging.getLogger(__name__)
-
-if TYPE_CHECKING:
-    from miplearn.solvers.learning import LearningSolver
-
-
-class PrimalSolutionComponent(Component):
-    """
-    A component that predicts the optimal primal values for the binary decision
-    variables.
-
-    In exact mode, predicted primal solutions are provided to the solver as MIP
-    starts. In heuristic mode, this component fixes the decision variables to their
-    predicted values.
-    """
-
-    def __init__(
-        self,
-        classifier: Classifier = AdaptiveClassifier(),
-        mode: str = "exact",
-        threshold: Threshold = MinPrecisionThreshold([0.99, 0.99]),
-    ) -> None:
-        assert isinstance(classifier, Classifier)
-        assert isinstance(threshold, Threshold)
-        assert mode in ["exact", "heuristic"]
-        self.mode = mode
-        self.classifiers: Dict[Category, Classifier] = {}
-        self.thresholds: Dict[Category, Threshold] = {}
-        self.threshold_prototype = threshold
-        self.classifier_prototype = classifier
-
-    @overrides
-    def before_solve_mip(
-        self,
-        solver: "LearningSolver",
-        instance: Instance,
-        model: Any,
-        stats: LearningSolveStats,
-        sample: Sample,
-    ) -> None:
-        logger.info("Predicting primal solution...")
-
-        # Do nothing if models are not trained
-        if len(self.classifiers) == 0:
-            logger.info("Classifiers not fitted. Skipping.")
-            return
-
-        # Predict solution and provide it to the solver
-        solution = self.sample_predict(sample)
-        assert solver.internal_solver is not None
-        if self.mode == "heuristic":
-            solver.internal_solver.fix(solution)
-        else:
-            solver.internal_solver.set_warm_start(solution)
-
-        # Update statistics
-        stats["Primal: Free"] = 0
-        stats["Primal: Zero"] = 0
-        stats["Primal: One"] = 0
-        for (var_name, value) in solution.items():
-            if value is None:
-                stats["Primal: Free"] += 1
-            else:
-                if value < 0.5:
-                    stats["Primal: Zero"] += 1
-                else:
-                    stats["Primal: One"] += 1
-        logger.info(
-            f"Predicted: free: {stats['Primal: Free']}, "
-            f"zero: {stats['Primal: Zero']}, "
-            f"one: {stats['Primal: One']}"
-        )
-
-    def sample_predict(self, sample: Sample) -> Solution:
-        var_names = sample.get_array("static_var_names")
-        var_categories = sample.get_array("static_var_categories")
-        var_types = sample.get_array("static_var_types")
-        assert var_names is not None
-        assert var_categories is not None
-        assert var_types is not None
-
-        # Compute y_pred
-        x, _ = self.sample_xy(None, sample)
-        y_pred = {}
-        for category in x.keys():
-            assert category in self.classifiers, (
-                f"Classifier for category {category} has not been trained. "
-                f"Please call component.fit before component.predict."
-            )
-            xc = np.array(x[category])
-            proba = self.classifiers[category].predict_proba(xc)
-            thr = self.thresholds[category].predict(xc)
-            y_pred[category] = np.vstack(
-                [
-                    proba[:, 0] >= thr[0],
-                    proba[:, 1] >= thr[1],
-                ]
-            ).T
-
-        # Convert y_pred into solution
-        solution: Solution = {v: None for v in var_names}
-        category_offset: Dict[Category, int] = {cat: 0 for cat in x.keys()}
-        for (i, var_name) in enumerate(var_names):
-            if var_types[i] != b"B":
-                continue
-            category = var_categories[i]
-            if category not in category_offset:
-                continue
-            offset = category_offset[category]
-            category_offset[category] += 1
-            if y_pred[category][offset, 0]:
-                solution[var_name] = 0.0
-            if y_pred[category][offset, 1]:
-                solution[var_name] = 1.0
-
-        return solution
-
-    @overrides
-    def sample_xy(
-        self,
-        _: Optional[Instance],
-        sample: Sample,
-    ) -> Tuple[Dict[Category, List[List[float]]], Dict[Category, List[List[float]]]]:
-        x: Dict = {}
-        y: Dict = {}
-        instance_features = sample.get_array("static_instance_features")
-        mip_var_values = sample.get_array("mip_var_values")
-        lp_var_values = sample.get_array("lp_var_values")
-        var_features = sample.get_array("lp_var_features")
-        var_names = sample.get_array("static_var_names")
-        var_types = sample.get_array("static_var_types")
-        var_categories = sample.get_array("static_var_categories")
-        if var_features is None:
-            var_features = sample.get_array("static_var_features")
-        assert instance_features is not None
-        assert var_features is not None
-        assert var_names is not None
-        assert var_types is not None
-        assert var_categories is not None
-
-        for (i, var_name) in enumerate(var_names):
-            # Skip non-binary variables
-            if var_types[i] != b"B":
-                continue
-
-            # Initialize categories
-            category = var_categories[i]
-            if len(category) == 0:
-                continue
-            if category not in x.keys():
-                x[category] = []
-                y[category] = []
-
-            # Features
-            features = list(instance_features)
-            features.extend(var_features[i])
-            if lp_var_values is not None:
-                features.extend(lp_var_values)
-            x[category].append(features)
-
-            # Labels
-            if mip_var_values is not None:
-                opt_value = mip_var_values[i]
-                assert opt_value is not None
-                y[category].append([opt_value < 0.5, opt_value >= 0.5])
-        return x, y
-
-    @overrides
-    def sample_evaluate(
-        self,
-        _: Optional[Instance],
-        sample: Sample,
-    ) -> Dict[str, Dict[str, float]]:
-        mip_var_values = sample.get_array("mip_var_values")
-        var_names = sample.get_array("static_var_names")
-        assert mip_var_values is not None
-        assert var_names is not None
-
-        solution_actual = {
-            var_name: mip_var_values[i] for (i, var_name) in enumerate(var_names)
-        }
-        solution_pred = self.sample_predict(sample)
-        vars_all, vars_one, vars_zero = set(), set(), set()
-        pred_one_positive, pred_zero_positive = set(), set()
-        for (var_name, value_actual) in solution_actual.items():
-            vars_all.add(var_name)
-            if value_actual > 0.5:
-                vars_one.add(var_name)
-            else:
-                vars_zero.add(var_name)
-            value_pred = solution_pred[var_name]
-            if value_pred is not None:
-                if value_pred > 0.5:
-                    pred_one_positive.add(var_name)
-                else:
-                    pred_zero_positive.add(var_name)
-        pred_one_negative = vars_all - pred_one_positive
-        pred_zero_negative = vars_all - pred_zero_positive
-        return {
-            "0": classifier_evaluation_dict(
-                tp=len(pred_zero_positive & vars_zero),
-                tn=len(pred_zero_negative & vars_one),
-                fp=len(pred_zero_positive & vars_one),
-                fn=len(pred_zero_negative & vars_zero),
-            ),
-            "1": classifier_evaluation_dict(
-                tp=len(pred_one_positive & vars_one),
-                tn=len(pred_one_negative & vars_zero),
-                fp=len(pred_one_positive & vars_zero),
-                fn=len(pred_one_negative & vars_one),
-            ),
-        }
-
-    @overrides
-    def fit_xy(
-        self,
-        x: Dict[Category, np.ndarray],
-        y: Dict[Category, np.ndarray],
-        progress: bool = False,
-    ) -> None:
-        for category in tqdm(x.keys(), desc="fit", disable=not progress):
-            clf = self.classifier_prototype.clone()
-            thr = self.threshold_prototype.clone()
-            clf.fit(x[category], y[category])
-            thr.fit(clf, x[category], y[category])
-            self.classifiers[category] = clf
-            self.thresholds[category] = thr
-
-    # ------------------------------------------------------------------------------------------------------------------
-    # NEW API
-    # ------------------------------------------------------------------------------------------------------------------
-
-    def fit(
-        self,
-        x: Dict[Category, np.ndarray],
-        y: Dict[Category, np.ndarray],
-        progress: bool = False,
-    ) -> None:
-        for category in tqdm(x.keys(), desc="fit", disable=not progress):
-            clf = self.classifier_prototype.clone()
-            thr = self.threshold_prototype.clone()
-            clf.fit(x[category], y[category])
-            thr.fit(clf, x[category], y[category])
-            self.classifiers[category] = clf
-            self.thresholds[category] = thr
-
-    def predict(self, x):
-        y_pred = {}
-        for category in x.keys():
-            assert category in self.classifiers, (
-                f"Classifier for category {category} has not been trained. "
-                f"Please call component.fit before component.predict."
-            )
-            xc = np.array(x[category])
-            proba = self.classifiers[category].predict_proba(xc)
-            thr = self.thresholds[category].predict(xc)
-            y_pred[category] = np.vstack(
-                [
-                    proba[:, 0] >= thr[0],
-                    proba[:, 1] >= thr[1],
-                ]
-            ).T
-        return y_pred
-
-    @staticmethod
-    def extract(
-        filenames: List[str],
-        progress: bool = False,
-    ):
-        x, y, cat = [], [], []
-
-        # Read data
-        for filename in tqdm(
-            filenames,
-            desc="extract (1/2)",
-            disable=not progress,
-        ):
-            with Hdf5Sample(filename, mode="r") as sample:
-                mip_var_values = sample.get_array("mip_var_values")
-                var_features = sample.get_array("lp_var_features")
-                var_types = sample.get_array("static_var_types")
-                var_categories = sample.get_array("static_var_categories")
-                assert var_features is not None
-                assert var_types is not None
-                assert var_categories is not None
-                x.append(var_features)
-                y.append([mip_var_values < 0.5, mip_var_values > 0.5])
-                cat.extend(var_categories)
-
-        # Convert to numpy arrays
-        x = np.vstack(x)
-        y = np.hstack(y).T
-        cat = np.array(cat)
-
-        # Sort data by categories
-        pi = np.argsort(cat, kind="stable")
-        x = x[pi]
-        y = y[pi]
-        cat = cat[pi]
-
-        # Split categories
-        x_dict = {}
-        y_dict = {}
-        start = 0
-        for end in tqdm(
-            range(len(cat) + 1),
-            desc="extract (2/2)",
-            disable=not progress,
-        ):
-            if (end >= len(cat)) or (cat[start] != cat[end]):
-                x_dict[cat[start]] = x[start:end, :]
-                y_dict[cat[start]] = y[start:end, :]
-                start = end
-
-        return x_dict, y_dict
--- a/miplearn/components/primal/init.py
+++ b/miplearn/components/primal/init.py
@@ -0,0 +1,29 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020-2022, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+from typing import Tuple
+
+import numpy as np
+
+from miplearn.h5 import H5File
+
+
+def _extract_bin_var_names_values(
+    h5: H5File,
+) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    bin_var_names, bin_var_indices = _extract_bin_var_names(h5)
+    var_values = h5.get_array("mip_var_values")
+    assert var_values is not None
+    bin_var_values = var_values[bin_var_indices].astype(int)
+    return bin_var_names, bin_var_values, bin_var_indices
+
+
+def _extract_bin_var_names(h5: H5File) -> Tuple[np.ndarray, np.ndarray]:
+    var_types = h5.get_array("static_var_types")
+    var_names = h5.get_array("static_var_names")
+    assert var_types is not None
+    assert var_names is not None
+    bin_var_indices = np.where(var_types == b"B")[0]
+    bin_var_names = var_names[bin_var_indices]
+    assert len(bin_var_names.shape) == 1
+    return bin_var_names, bin_var_indices
--- a/miplearn/components/primal/actions.py
+++ b/miplearn/components/primal/actions.py
@@ -0,0 +1,93 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020-2022, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+import logging
+from abc import ABC, abstractmethod
+from typing import Optional, Dict
+
+import numpy as np
+
+from miplearn.solvers.abstract import AbstractModel
+
+logger = logging.getLogger()
+
+
+class PrimalComponentAction(ABC):
+    @abstractmethod
+    def perform(
+        self,
+        model: AbstractModel,
+        var_names: np.ndarray,
+        var_values: np.ndarray,
+        stats: Optional[Dict],
+    ) -> None:
+        pass
+
+
+class SetWarmStart(PrimalComponentAction):
+    def perform(
+        self,
+        model: AbstractModel,
+        var_names: np.ndarray,
+        var_values: np.ndarray,
+        stats: Optional[Dict],
+    ) -> None:
+        logger.info("Setting warm starts...")
+        model.set_warm_starts(var_names, var_values, stats)
+
+
+class FixVariables(PrimalComponentAction):
+    def perform(
+        self,
+        model: AbstractModel,
+        var_names: np.ndarray,
+        var_values: np.ndarray,
+        stats: Optional[Dict],
+    ) -> None:
+        logger.info("Fixing variables...")
+        assert len(var_values.shape) == 2
+        assert var_values.shape[0] == 1
+        var_values = var_values.reshape(-1)
+        model.fix_variables(var_names, var_values, stats)
+        if stats is not None:
+            stats["Heuristic"] = True
+
+
+class EnforceProximity(PrimalComponentAction):
+    def __init__(self, tol: float) -> None:
+        self.tol = tol
+
+    def perform(
+        self,
+        model: AbstractModel,
+        var_names: np.ndarray,
+        var_values: np.ndarray,
+        stats: Optional[Dict],
+    ) -> None:
+        assert len(var_values.shape) == 2
+        assert var_values.shape[0] == 1
+        var_values = var_values.reshape(-1)
+
+        constr_lhs = []
+        constr_vars = []
+        constr_rhs = 0.0
+        for (i, var_name) in enumerate(var_names):
+            if np.isnan(var_values[i]):
+                continue
+            constr_lhs.append(1.0 if var_values[i] < 0.5 else -1.0)
+            constr_rhs -= var_values[i]
+            constr_vars.append(var_name)
+
+        constr_rhs += len(constr_vars) * self.tol
+        logger.info(
+            f"Adding proximity constraint (tol={self.tol}, nz={len(constr_vars)})..."
+        )
+
+        model.add_constrs(
+            np.array(constr_vars),
+            np.array([constr_lhs]),
+            np.array(["<"], dtype="S"),
+            np.array([constr_rhs]),
+        )
+        if stats is not None:
+            stats["Heuristic"] = True
--- a/miplearn/components/primal/expert.py
+++ b/miplearn/components/primal/expert.py
@@ -0,0 +1,32 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020-2022, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+
+import logging
+from typing import Any, Dict, List
+
+from . import _extract_bin_var_names_values
+from .actions import PrimalComponentAction
+from ...solvers.abstract import AbstractModel
+from ...h5 import H5File
+
+logger = logging.getLogger(__name__)
+
+
+class ExpertPrimalComponent:
+    def __init__(self, action: PrimalComponentAction):
+        self.action = action
+
+    """
+    Component that predicts warm starts by peeking at the optimal solution.
+    """
+
+    def fit(self, train_h5: List[str]) -> None:
+        pass
+
+    def before_mip(
+        self, test_h5: str, model: AbstractModel, stats: Dict[str, Any]
+    ) -> None:
+        with H5File(test_h5, "r") as h5:
+            names, values, _ = _extract_bin_var_names_values(h5)
+            self.action.perform(model, names, values.reshape(1, -1), stats)
--- a/miplearn/components/primal/indep.py
+++ b/miplearn/components/primal/indep.py
@@ -0,0 +1,129 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020-2022, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+import logging
+from typing import Any, Dict, List, Callable, Optional
+
+import numpy as np
+import sklearn
+
+from miplearn.components.primal import (
+    _extract_bin_var_names_values,
+    _extract_bin_var_names,
+)
+from miplearn.components.primal.actions import PrimalComponentAction
+from miplearn.extractors.abstract import FeaturesExtractor
+from miplearn.solvers.abstract import AbstractModel
+from miplearn.h5 import H5File
+
+logger = logging.getLogger(__name__)
+
+
+class IndependentVarsPrimalComponent:
+    def __init__(
+        self,
+        base_clf: Any,
+        extractor: FeaturesExtractor,
+        action: PrimalComponentAction,
+        clone_fn: Callable[[Any], Any] = sklearn.clone,
+    ):
+        self.base_clf = base_clf
+        self.extractor = extractor
+        self.clf_: Dict[bytes, Any] = {}
+        self.bin_var_names_: Optional[np.ndarray] = None
+        self.n_features_: Optional[int] = None
+        self.clone_fn = clone_fn
+        self.action = action
+
+    def fit(self, train_h5: List[str]) -> None:
+        logger.info("Reading training data...")
+        self.bin_var_names_ = None
+        n_bin_vars: Optional[int] = None
+        n_vars: Optional[int] = None
+        x, y = [], []
+        for h5_filename in train_h5:
+            with H5File(h5_filename, "r") as h5:
+                # Get number of variables
+                var_types = h5.get_array("static_var_types")
+                assert var_types is not None
+                n_vars = len(var_types)
+
+                # Extract features
+                (
+                    bin_var_names,
+                    bin_var_values,
+                    bin_var_indices,
+                ) = _extract_bin_var_names_values(h5)
+
+                # Store/check variable names
+                if self.bin_var_names_ is None:
+                    self.bin_var_names_ = bin_var_names
+                    n_bin_vars = len(self.bin_var_names_)
+                else:
+                    assert np.all(bin_var_names == self.bin_var_names_)
+
+                # Build x and y vectors
+                x_sample = self.extractor.get_var_features(h5)
+                assert len(x_sample.shape) == 2
+                assert x_sample.shape[0] == n_vars
+                x_sample = x_sample[bin_var_indices]
+                if self.n_features_ is None:
+                    self.n_features_ = x_sample.shape[1]
+                else:
+                    assert x_sample.shape[1] == self.n_features_
+                x.append(x_sample)
+                y.append(bin_var_values)
+
+        assert n_bin_vars is not None
+        assert self.bin_var_names_ is not None
+
+        logger.info("Constructing matrices...")
+        x_np = np.vstack(x)
+        y_np = np.hstack(y)
+        n_samples = len(train_h5) * n_bin_vars
+        assert x_np.shape == (n_samples, self.n_features_)
+        assert y_np.shape == (n_samples,)
+        logger.info(
+            f"Dataset has {n_bin_vars} binary variables, "
+            f"{len(train_h5):,d} samples per variable, "
+            f"{self.n_features_:,d} features, 1 target and 2 classes"
+        )
+
+        logger.info(f"Training {n_bin_vars} classifiers...")
+        self.clf_ = {}
+        for (var_idx, var_name) in enumerate(self.bin_var_names_):
+            self.clf_[var_name] = self.clone_fn(self.base_clf)
+            self.clf_[var_name].fit(
+                x_np[var_idx::n_bin_vars, :], y_np[var_idx::n_bin_vars]
+            )
+
+        logger.info("Done fitting.")
+
+    def before_mip(
+        self, test_h5: str, model: AbstractModel, stats: Dict[str, Any]
+    ) -> None:
+        assert self.bin_var_names_ is not None
+        assert self.n_features_ is not None
+
+        # Read features
+        with H5File(test_h5, "r") as h5:
+            x_sample = self.extractor.get_var_features(h5)
+            bin_var_names, bin_var_indices = _extract_bin_var_names(h5)
+            assert np.all(bin_var_names == self.bin_var_names_)
+            x_sample = x_sample[bin_var_indices]
+
+        assert x_sample.shape == (len(self.bin_var_names_), self.n_features_)
+
+        # Predict optimal solution
+        logger.info("Predicting warm starts...")
+        y_pred = []
+        for (var_idx, var_name) in enumerate(self.bin_var_names_):
+            x_var = x_sample[var_idx, :].reshape(1, -1)
+            y_var = self.clf_[var_name].predict(x_var)
+            assert y_var.shape == (1,)
+            y_pred.append(y_var[0])
+
+        # Construct warm starts, based on prediction
+        y_pred_np = np.array(y_pred).reshape(1, -1)
+        assert y_pred_np.shape == (1, len(self.bin_var_names_))
+        self.action.perform(model, self.bin_var_names_, y_pred_np, stats)
--- a/miplearn/components/primal/joint.py
+++ b/miplearn/components/primal/joint.py
@@ -0,0 +1,88 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020-2022, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+import logging
+from typing import List, Dict, Any, Optional
+
+import numpy as np
+
+from miplearn.components.primal import _extract_bin_var_names_values
+from miplearn.components.primal.actions import PrimalComponentAction
+from miplearn.extractors.abstract import FeaturesExtractor
+from miplearn.solvers.abstract import AbstractModel
+from miplearn.h5 import H5File
+
+logger = logging.getLogger(__name__)
+
+
+class JointVarsPrimalComponent:
+    def __init__(
+        self, clf: Any, extractor: FeaturesExtractor, action: PrimalComponentAction
+    ):
+        self.clf = clf
+        self.extractor = extractor
+        self.bin_var_names_: Optional[np.ndarray] = None
+        self.action = action
+
+    def fit(self, train_h5: List[str]) -> None:
+        logger.info("Reading training data...")
+        self.bin_var_names_ = None
+        x, y, n_samples, n_features = [], [], len(train_h5), None
+        for h5_filename in train_h5:
+            with H5File(h5_filename, "r") as h5:
+                bin_var_names, bin_var_values, _ = _extract_bin_var_names_values(h5)
+
+                # Store/check variable names
+                if self.bin_var_names_ is None:
+                    self.bin_var_names_ = bin_var_names
+                else:
+                    assert np.all(bin_var_names == self.bin_var_names_)
+
+                # Build x and y vectors
+                x_sample = self.extractor.get_instance_features(h5)
+                assert len(x_sample.shape) == 1
+                if n_features is None:
+                    n_features = len(x_sample)
+                else:
+                    assert len(x_sample) == n_features
+                x.append(x_sample)
+                y.append(bin_var_values)
+        assert self.bin_var_names_ is not None
+
+        logger.info("Constructing matrices...")
+        x_np = np.vstack(x)
+        y_np = np.array(y)
+        assert len(x_np.shape) == 2
+        assert x_np.shape[0] == n_samples
+        assert x_np.shape[1] == n_features
+        assert y_np.shape == (n_samples, len(self.bin_var_names_))
+        logger.info(
+            f"Dataset has {n_samples:,d} samples, "
+            f"{n_features:,d} features and {y_np.shape[1]:,d} targets"
+        )
+
+        logger.info("Training classifier...")
+        self.clf.fit(x_np, y_np)
+
+        logger.info("Done fitting.")
+
+    def before_mip(
+        self, test_h5: str, model: AbstractModel, stats: Dict[str, Any]
+    ) -> None:
+        assert self.bin_var_names_ is not None
+
+        # Read features
+        with H5File(test_h5, "r") as h5:
+            x_sample = self.extractor.get_instance_features(h5)
+        assert len(x_sample.shape) == 1
+        x_sample = x_sample.reshape(1, -1)
+
+        # Predict optimal solution
+        logger.info("Predicting warm starts...")
+        y_pred = self.clf.predict(x_sample)
+        assert len(y_pred.shape) == 2
+        assert y_pred.shape[0] == 1
+        assert y_pred.shape[1] == len(self.bin_var_names_)
+
+        # Construct warm starts, based on prediction
+        self.action.perform(model, self.bin_var_names_, y_pred, stats)
--- a/miplearn/components/primal/mem.py
+++ b/miplearn/components/primal/mem.py
@@ -0,0 +1,167 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020-2022, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+
+import logging
+from abc import ABC, abstractmethod
+from typing import List, Dict, Any, Optional, Tuple
+
+import numpy as np
+
+from . import _extract_bin_var_names_values
+from .actions import PrimalComponentAction
+from ...extractors.abstract import FeaturesExtractor
+from ...solvers.abstract import AbstractModel
+from ...h5 import H5File
+
+logger = logging.getLogger()
+
+
+class SolutionConstructor(ABC):
+    @abstractmethod
+    def construct(self, y_proba: np.ndarray, solutions: np.ndarray) -> np.ndarray:
+        pass
+
+
+class MemorizingPrimalComponent:
+    """
+    Component that memorizes all solutions seen during training, then fits a
+    single classifier to predict which of the memorized solutions should be
+    provided to the solver. Optionally combines multiple memorized solutions
+    into a single, partial one.
+    """
+
+    def __init__(
+        self,
+        clf: Any,
+        extractor: FeaturesExtractor,
+        constructor: SolutionConstructor,
+        action: PrimalComponentAction,
+    ) -> None:
+        assert clf is not None
+        self.clf = clf
+        self.extractor = extractor
+        self.constructor = constructor
+        self.solutions_: Optional[np.ndarray] = None
+        self.bin_var_names_: Optional[np.ndarray] = None
+        self.action = action
+
+    def fit(self, train_h5: List[str]) -> None:
+        logger.info("Reading training data...")
+        n_samples = len(train_h5)
+        solutions_ = []
+        self.bin_var_names_ = None
+        x, y, n_features = [], [], None
+        solution_to_idx: Dict[Tuple, int] = {}
+        for h5_filename in train_h5:
+            with H5File(h5_filename, "r") as h5:
+                bin_var_names, bin_var_values, _ = _extract_bin_var_names_values(h5)
+
+                # Store/check variable names
+                if self.bin_var_names_ is None:
+                    self.bin_var_names_ = bin_var_names
+                else:
+                    assert np.all(bin_var_names == self.bin_var_names_)
+
+                # Store solution
+                sol = tuple(np.where(bin_var_values)[0])
+                if sol not in solution_to_idx:
+                    solutions_.append(bin_var_values)
+                    solution_to_idx[sol] = len(solution_to_idx)
+                y.append(solution_to_idx[sol])
+
+                # Extract features
+                x_sample = self.extractor.get_instance_features(h5)
+                assert len(x_sample.shape) == 1
+                if n_features is None:
+                    n_features = len(x_sample)
+                else:
+                    assert len(x_sample) == n_features
+                x.append(x_sample)
+
+        logger.info("Constructing matrices...")
+        x_np = np.vstack(x)
+        y_np = np.array(y)
+        assert len(x_np.shape) == 2
+        assert x_np.shape[0] == n_samples
+        assert x_np.shape[1] == n_features
+        assert y_np.shape == (n_samples,)
+        self.solutions_ = np.array(solutions_)
+        n_classes = len(solution_to_idx)
+        logger.info(
+            f"Dataset has {n_samples:,d} samples, "
+            f"{n_features:,d} features and {n_classes:,d} classes"
+        )
+
+        logger.info("Training classifier...")
+        self.clf.fit(x_np, y_np)
+
+        logger.info("Done fitting.")
+
+    def before_mip(
+        self, test_h5: str, model: AbstractModel, stats: Dict[str, Any]
+    ) -> None:
+        assert self.solutions_ is not None
+        assert self.bin_var_names_ is not None
+
+        # Read features
+        with H5File(test_h5, "r") as h5:
+            x_sample = self.extractor.get_instance_features(h5)
+        assert len(x_sample.shape) == 1
+        x_sample = x_sample.reshape(1, -1)
+
+        # Predict optimal solution
+        logger.info("Predicting primal solution...")
+        y_proba = self.clf.predict_proba(x_sample)
+        assert len(y_proba.shape) == 2
+        assert y_proba.shape[0] == 1
+        assert y_proba.shape[1] == len(self.solutions_)
+
+        # Construct warm starts, based on prediction
+        starts = self.constructor.construct(y_proba[0, :], self.solutions_)
+        self.action.perform(model, self.bin_var_names_, starts, stats)
+
+
+class SelectTopSolutions(SolutionConstructor):
+    """
+    Warm start construction strategy that selects and returns the top k solutions.
+    """
+
+    def __init__(self, k: int) -> None:
+        self.k = k
+
+    def construct(self, y_proba: np.ndarray, solutions: np.ndarray) -> np.ndarray:
+        # Check arguments
+        assert len(y_proba.shape) == 1
+        assert len(solutions.shape) == 2
+        assert len(y_proba) == solutions.shape[0]
+
+        # Select top k solutions
+        ind = np.argsort(-y_proba, kind="stable")
+        selected = ind[: min(self.k, len(ind))]
+        return solutions[selected, :]
+
+
+class MergeTopSolutions(SolutionConstructor):
+    """
+    Warm start construction strategy that first selects the top k solutions,
+    then merges them into a single solution.
+
+    To merge the solutions, the strategy first computes the mean optimal value of each
+    decision variable, then: (i) sets the variable to zero if the mean is below
+    thresholds[0]; (ii) sets the variable to one if the mean is above thresholds[1];
+    (iii) leaves the variable free otherwise.
+    """
+
+    def __init__(self, k: int, thresholds: List[float]):
+        assert len(thresholds) == 2
+        self.k = k
+        self.thresholds = thresholds
+
+    def construct(self, y_proba: np.ndarray, solutions: np.ndarray) -> np.ndarray:
+        filtered = SelectTopSolutions(self.k).construct(y_proba, solutions)
+        mean = filtered.mean(axis=0)
+        start = np.full((1, solutions.shape[1]), float("nan"))
+        start[0, mean <= self.thresholds[0]] = 0
+        start[0, mean >= self.thresholds[1]] = 1
+        return start
--- a/miplearn/components/priority.py
+++ b/miplearn/components/priority.py
@@ -0,0 +1,31 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020-2022, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+
+from math import log
+from typing import List, Dict, Any
+import numpy as np
+
+import gurobipy as gp
+
+from ..h5 import H5File
+
+
+class ExpertBranchPriorityComponent:
+    def __init__(self) -> None:
+        pass
+
+    def fit(self, train_h5: List[str]) -> None:
+        pass
+
+    def before_mip(self, test_h5: str, model: gp.Model, _: Dict[str, Any]) -> None:
+        with H5File(test_h5, "r") as h5:
+            var_names = h5.get_array("static_var_names")
+            var_priority = h5.get_array("bb_var_priority")
+            assert var_priority is not None
+            assert var_names is not None
+
+            for (var_idx, var_name) in enumerate(var_names):
+                if np.isfinite(var_priority[var_idx]):
+                    var = model.getVarByName(var_name.decode())
+                    var.branchPriority = int(log(1 + var_priority[var_idx]))
--- a/miplearn/components/static_lazy.py
+++ b/miplearn/components/static_lazy.py
@@ -1,252 +0,0 @@
-#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
-#  Released under the modified BSD license. See COPYING.md for more details.
-
-import logging
-from typing import Dict, Tuple, List, Any, TYPE_CHECKING, Set, Optional
-
-import numpy as np
-from overrides import overrides
-
-from miplearn.classifiers import Classifier
-from miplearn.classifiers.counting import CountingClassifier
-from miplearn.classifiers.threshold import MinProbabilityThreshold, Threshold
-from miplearn.components.component import Component
-from miplearn.features.sample import Sample
-from miplearn.solvers.internal import Constraints
-from miplearn.instance.base import Instance
-from miplearn.types import LearningSolveStats, ConstraintName, ConstraintCategory
-
-logger = logging.getLogger(__name__)
-
-if TYPE_CHECKING:
-    from miplearn.solvers.learning import LearningSolver
-
-
-class LazyConstraint:
-    def __init__(self, cid: ConstraintName, obj: Any) -> None:
-        self.cid = cid
-        self.obj = obj
-
-
-class StaticLazyConstraintsComponent(Component):
-    """
-    Component that decides which of the constraints tagged as lazy should
-    be kept in the formulation, and which should be removed.
-    """
-
-    def __init__(
-        self,
-        classifier: Classifier = CountingClassifier(),
-        threshold: Threshold = MinProbabilityThreshold([0.50, 0.50]),
-        violation_tolerance: float = -0.5,
-    ) -> None:
-        assert isinstance(classifier, Classifier)
-        self.classifier_prototype: Classifier = classifier
-        self.threshold_prototype: Threshold = threshold
-        self.classifiers: Dict[ConstraintCategory, Classifier] = {}
-        self.thresholds: Dict[ConstraintCategory, Threshold] = {}
-        self.pool: Constraints = Constraints()
-        self.violation_tolerance: float = violation_tolerance
-        self.enforced_cids: Set[ConstraintName] = set()
-        self.n_restored: int = 0
-        self.n_iterations: int = 0
-
-    @overrides
-    def after_solve_mip(
-        self,
-        solver: "LearningSolver",
-        instance: "Instance",
-        model: Any,
-        stats: LearningSolveStats,
-        sample: Sample,
-    ) -> None:
-        sample.put_array(
-            "mip_constr_lazy_enforced",
-            np.array(list(self.enforced_cids), dtype="S"),
-        )
-        stats["LazyStatic: Restored"] = self.n_restored
-        stats["LazyStatic: Iterations"] = self.n_iterations
-
-    @overrides
-    def before_solve_mip(
-        self,
-        solver: "LearningSolver",
-        instance: "Instance",
-        model: Any,
-        stats: LearningSolveStats,
-        sample: Sample,
-    ) -> None:
-        assert solver.internal_solver is not None
-        static_lazy_count = sample.get_scalar("static_constr_lazy_count")
-        assert static_lazy_count is not None
-
-        logger.info("Predicting violated (static) lazy constraints...")
-        if static_lazy_count == 0:
-            logger.info("Instance does not have static lazy constraints. Skipping.")
-        self.enforced_cids = set(self.sample_predict(sample))
-        logger.info("Moving lazy constraints to the pool...")
-        constraints = Constraints.from_sample(sample)
-        assert constraints.lazy is not None
-        assert constraints.names is not None
-        selected = [
-            (constraints.lazy[i] and constraints.names[i] not in self.enforced_cids)
-            for i in range(len(constraints.lazy))
-        ]
-        n_removed = sum(selected)
-        n_kept = sum(constraints.lazy) - n_removed
-        self.pool = constraints[selected]
-        assert self.pool.names is not None
-        solver.internal_solver.remove_constraints(self.pool.names)
-        logger.info(f"{n_kept} lazy constraints kept; {n_removed} moved to the pool")
-        stats["LazyStatic: Removed"] = n_removed
-        stats["LazyStatic: Kept"] = n_kept
-        stats["LazyStatic: Restored"] = 0
-        self.n_restored = 0
-        self.n_iterations = 0
-
-    @overrides
-    def fit_xy(
-        self,
-        x: Dict[ConstraintCategory, np.ndarray],
-        y: Dict[ConstraintCategory, np.ndarray],
-    ) -> None:
-        for c in y.keys():
-            assert c in x
-            self.classifiers[c] = self.classifier_prototype.clone()
-            self.thresholds[c] = self.threshold_prototype.clone()
-            self.classifiers[c].fit(x[c], y[c])
-            self.thresholds[c].fit(self.classifiers[c], x[c], y[c])
-
-    @overrides
-    def iteration_cb(
-        self,
-        solver: "LearningSolver",
-        instance: "Instance",
-        model: Any,
-    ) -> bool:
-        if solver.use_lazy_cb:
-            return False
-        else:
-            return self._check_and_add(solver)
-
-    @overrides
-    def lazy_cb(
-        self,
-        solver: "LearningSolver",
-        instance: "Instance",
-        model: Any,
-    ) -> None:
-        self._check_and_add(solver)
-
-    def sample_predict(self, sample: Sample) -> List[ConstraintName]:
-        x, y, cids = self._sample_xy_with_cids(sample)
-        enforced_cids: List[ConstraintName] = []
-        for category in x.keys():
-            if category not in self.classifiers:
-                continue
-            npx = np.array(x[category])
-            proba = self.classifiers[category].predict_proba(npx)
-            thr = self.thresholds[category].predict(npx)
-            pred = list(proba[:, 1] > thr[1])
-            for (i, is_selected) in enumerate(pred):
-                if is_selected:
-                    enforced_cids += [cids[category][i]]
-        return enforced_cids
-
-    @overrides
-    def sample_xy(
-        self,
-        _: Optional[Instance],
-        sample: Sample,
-    ) -> Tuple[
-        Dict[ConstraintCategory, List[List[float]]],
-        Dict[ConstraintCategory, List[List[float]]],
-    ]:
-        x, y, __ = self._sample_xy_with_cids(sample)
-        return x, y
-
-    def _check_and_add(self, solver: "LearningSolver") -> bool:
-        assert solver.internal_solver is not None
-        assert self.pool.names is not None
-        if len(self.pool.names) == 0:
-            logger.info("Lazy constraint pool is empty. Skipping violation check.")
-            return False
-        self.n_iterations += 1
-        logger.info("Finding violated lazy constraints...")
-        is_satisfied = solver.internal_solver.are_constraints_satisfied(
-            self.pool,
-            tol=self.violation_tolerance,
-        )
-        is_violated = [not i for i in is_satisfied]
-        violated_constraints = self.pool[is_violated]
-        satisfied_constraints = self.pool[is_satisfied]
-        self.pool = satisfied_constraints
-        assert violated_constraints.names is not None
-        assert satisfied_constraints.names is not None
-        n_violated = len(violated_constraints.names)
-        n_satisfied = len(satisfied_constraints.names)
-        logger.info(f"Found {n_violated} violated lazy constraints found")
-        if n_violated > 0:
-            logger.info(
-                f"Enforcing {n_violated} lazy constraints; "
-                f"{n_satisfied} left in the pool..."
-            )
-            solver.internal_solver.add_constraints(violated_constraints)
-            for (i, name) in enumerate(violated_constraints.names):
-                self.enforced_cids.add(name)
-                self.n_restored += 1
-            return True
-        else:
-            return False
-
-    def _sample_xy_with_cids(
-        self, sample: Sample
-    ) -> Tuple[
-        Dict[ConstraintCategory, List[List[float]]],
-        Dict[ConstraintCategory, List[List[float]]],
-        Dict[ConstraintCategory, List[ConstraintName]],
-    ]:
-        x: Dict[ConstraintCategory, List[List[float]]] = {}
-        y: Dict[ConstraintCategory, List[List[float]]] = {}
-        cids: Dict[ConstraintCategory, List[ConstraintName]] = {}
-        instance_features = sample.get_array("static_instance_features")
-        constr_features = sample.get_array("lp_constr_features")
-        constr_names = sample.get_array("static_constr_names")
-        constr_categories = sample.get_array("static_constr_categories")
-        constr_lazy = sample.get_array("static_constr_lazy")
-        lazy_enforced = sample.get_array("mip_constr_lazy_enforced")
-        if constr_features is None:
-            constr_features = sample.get_array("static_constr_features")
-
-        assert instance_features is not None
-        assert constr_features is not None
-        assert constr_names is not None
-        assert constr_categories is not None
-        assert constr_lazy is not None
-
-        for (cidx, cname) in enumerate(constr_names):
-            # Initialize categories
-            if not constr_lazy[cidx]:
-                continue
-            category = constr_categories[cidx]
-            if len(category) == 0:
-                continue
-            if category not in x:
-                x[category] = []
-                y[category] = []
-                cids[category] = []
-
-            # Features
-            features = list(instance_features)
-            features.extend(constr_features[cidx])
-            x[category].append(features)
-            cids[category].append(cname)
-
-            # Labels
-            if lazy_enforced is not None:
-                if cname in lazy_enforced:
-                    y[category] += [[False, True]]
-                else:
-                    y[category] += [[True, False]]
-        return x, y, cids
--- a/miplearn/extractors/AlvLouWeh2017.py
+++ b/miplearn/extractors/AlvLouWeh2017.py
@@ -0,0 +1,210 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020-2022, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+
+from typing import Tuple, Optional
+
+import numpy as np
+
+from miplearn.extractors.abstract import FeaturesExtractor
+from miplearn.h5 import H5File
+
+
+class AlvLouWeh2017Extractor(FeaturesExtractor):
+    def __init__(
+        self,
+        with_m1: bool = True,
+        with_m2: bool = True,
+        with_m3: bool = True,
+    ):
+        self.with_m1 = with_m1
+        self.with_m2 = with_m2
+        self.with_m3 = with_m3
+
+    def get_instance_features(self, h5: H5File) -> np.ndarray:
+        raise NotImplemented()
+
+    def get_var_features(self, h5: H5File) -> np.ndarray:
+        """
+        Computes static variable features described in:
+            Alvarez, A. M., Louveaux, Q., & Wehenkel, L. (2017). A machine learning-based
+            approximation of strong branching. INFORMS Journal on Computing, 29(1),
+            185-195.
+        """
+        A = h5.get_sparse("static_constr_lhs")
+        b = h5.get_array("static_constr_rhs")
+        c = h5.get_array("static_var_obj_coeffs")
+        c_sa_up = h5.get_array("lp_var_sa_obj_up")
+        c_sa_down = h5.get_array("lp_var_sa_obj_down")
+        values = h5.get_array("lp_var_values")
+
+        assert A is not None
+        assert b is not None
+        assert c is not None
+
+        nvars = len(c)
+        curr = 0
+        max_n_features = 40
+        features = np.zeros((nvars, max_n_features))
+
+        def push(v: np.ndarray) -> None:
+            nonlocal curr
+            assert v.shape == (nvars,), f"{v.shape} != ({nvars},)"
+            features[:, curr] = v
+            curr += 1
+
+        def push_sign_abs(v: np.ndarray) -> None:
+            assert v.shape == (nvars,), f"{v.shape} != ({nvars},)"
+            push(np.sign(v))
+            push(np.abs(v))
+
+        def maxmin(M: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
+            M_max = np.ravel(M.max(axis=0).todense())
+            M_min = np.ravel(M.min(axis=0).todense())
+            return M_max, M_min
+
+        with np.errstate(divide="ignore", invalid="ignore"):
+            # Feature 1
+            push(np.sign(c))
+
+            # Feature 2
+            c_pos_sum = c[c > 0].sum()
+            push(np.abs(c) / c_pos_sum)
+
+            # Feature 3
+            c_neg_sum = -c[c < 0].sum()
+            push(np.abs(c) / c_neg_sum)
+
+            if A is not None and self.with_m1:
+                # Compute A_ji / |b_j|
+                M1 = A.T.multiply(1.0 / np.abs(b)).T.tocsr()
+
+                # Select rows with positive b_j and compute max/min
+                M1_pos = M1[b > 0, :]
+                if M1_pos.shape[0] > 0:
+                    M1_pos_max = np.asarray(M1_pos.max(axis=0).todense()).flatten()
+                    M1_pos_min = np.asarray(M1_pos.min(axis=0).todense()).flatten()
+                else:
+                    M1_pos_max = np.zeros(nvars)
+                    M1_pos_min = np.zeros(nvars)
+
+                # Select rows with negative b_j and compute max/min
+                M1_neg = M1[b < 0, :]
+                if M1_neg.shape[0] > 0:
+                    M1_neg_max = np.asarray(M1_neg.max(axis=0).todense()).flatten()
+                    M1_neg_min = np.asarray(M1_neg.min(axis=0).todense()).flatten()
+                else:
+                    M1_neg_max = np.zeros(nvars)
+                    M1_neg_min = np.zeros(nvars)
+
+                # Features 4-11
+                push_sign_abs(M1_pos_min)
+                push_sign_abs(M1_pos_max)
+                push_sign_abs(M1_neg_min)
+                push_sign_abs(M1_neg_max)
+
+            if A is not None and self.with_m2:
+                # Compute |c_i| / A_ij
+                M2 = A.power(-1).multiply(np.abs(c)).tocsc()
+
+                # Compute max/min
+                M2_max, M2_min = maxmin(M2)
+
+                # Make copies of M2 and erase elements based on sign(c)
+                M2_pos_max = M2_max.copy()
+                M2_neg_max = M2_max.copy()
+                M2_pos_min = M2_min.copy()
+                M2_neg_min = M2_min.copy()
+                M2_pos_max[c <= 0] = 0
+                M2_pos_min[c <= 0] = 0
+                M2_neg_max[c >= 0] = 0
+                M2_neg_min[c >= 0] = 0
+
+                # Features 12-19
+                push_sign_abs(M2_pos_min)
+                push_sign_abs(M2_pos_max)
+                push_sign_abs(M2_neg_min)
+                push_sign_abs(M2_neg_max)
+
+            if A is not None and self.with_m3:
+                # Compute row sums
+                S_pos = A.maximum(0).sum(axis=1)
+                S_neg = np.abs(A.minimum(0).sum(axis=1))
+
+                # Divide A by positive and negative row sums
+                M3_pos = A.multiply(1 / S_pos).tocsr()
+                M3_neg = A.multiply(1 / S_neg).tocsr()
+
+                # Remove +inf and -inf generated by division by zero
+                M3_pos.data[~np.isfinite(M3_pos.data)] = 0.0
+                M3_neg.data[~np.isfinite(M3_neg.data)] = 0.0
+                M3_pos.eliminate_zeros()
+                M3_neg.eliminate_zeros()
+
+                # Split each matrix into positive and negative parts
+                M3_pos_pos = M3_pos.maximum(0)
+                M3_pos_neg = -(M3_pos.minimum(0))
+                M3_neg_pos = M3_neg.maximum(0)
+                M3_neg_neg = -(M3_neg.minimum(0))
+
+                # Calculate max/min
+                M3_pos_pos_max, M3_pos_pos_min = maxmin(M3_pos_pos)
+                M3_pos_neg_max, M3_pos_neg_min = maxmin(M3_pos_neg)
+                M3_neg_pos_max, M3_neg_pos_min = maxmin(M3_neg_pos)
+                M3_neg_neg_max, M3_neg_neg_min = maxmin(M3_neg_neg)
+
+                # Features 20-35
+                push_sign_abs(M3_pos_pos_max)
+                push_sign_abs(M3_pos_pos_min)
+                push_sign_abs(M3_pos_neg_max)
+                push_sign_abs(M3_pos_neg_min)
+                push_sign_abs(M3_neg_pos_max)
+                push_sign_abs(M3_neg_pos_min)
+                push_sign_abs(M3_neg_neg_max)
+                push_sign_abs(M3_neg_neg_min)
+
+            # Feature 36: only available during B&B
+
+            # Feature 37
+            if values is not None:
+                push(
+                    np.minimum(
+                        values - np.floor(values),
+                        np.ceil(values) - values,
+                    )
+                )
+
+            # Features 38-43: only available during B&B
+
+            # Feature 44
+            if c_sa_up is not None:
+                assert c_sa_down is not None
+
+                # Features 44 and 46
+                push(np.sign(c_sa_up))
+                push(np.sign(c_sa_down))
+
+                # Feature 45 is duplicated
+
+                # Feature 47-48
+                push(np.log(c - c_sa_down / np.sign(c)))
+                push(np.log(c - c_sa_up / np.sign(c)))
+
+                # Features 49-64: only available during B&B
+
+        features = features[:, 0:curr]
+        _fix_infinity(features)
+        return features
+
+    def get_constr_features(self, h5: H5File) -> np.ndarray:
+        raise NotImplemented()
+
+
+def _fix_infinity(m: Optional[np.ndarray]) -> None:
+    if m is None:
+        return
+    masked = np.ma.masked_invalid(m)  # type: ignore
+    max_values = np.max(masked, axis=0)
+    min_values = np.min(masked, axis=0)
+    m[:] = np.maximum(np.minimum(m, max_values), min_values)
+    m[~np.isfinite(m)] = 0.0
--- a/miplearn/extractors/init.py
+++ b/miplearn/extractors/init.py
--- a/miplearn/extractors/abstract.py
+++ b/miplearn/extractors/abstract.py
@@ -0,0 +1,19 @@
+from abc import ABC, abstractmethod
+
+import numpy as np
+
+from miplearn.h5 import H5File
+
+
+class FeaturesExtractor(ABC):
+    @abstractmethod
+    def get_instance_features(self, h5: H5File) -> np.ndarray:
+        pass
+
+    @abstractmethod
+    def get_var_features(self, h5: H5File) -> np.ndarray:
+        pass
+
+    @abstractmethod
+    def get_constr_features(self, h5: H5File) -> np.ndarray:
+        pass
--- a/miplearn/extractors/dummy.py
+++ b/miplearn/extractors/dummy.py
@@ -0,0 +1,24 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020-2022, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+import numpy as np
+
+from miplearn.extractors.abstract import FeaturesExtractor
+from miplearn.h5 import H5File
+
+
+class DummyExtractor(FeaturesExtractor):
+    def get_instance_features(self, h5: H5File) -> np.ndarray:
+        return np.zeros(1)
+
+    def get_var_features(self, h5: H5File) -> np.ndarray:
+        var_types = h5.get_array("static_var_types")
+        assert var_types is not None
+        n_vars = len(var_types)
+        return np.zeros((n_vars, 1))
+
+    def get_constr_features(self, h5: H5File) -> np.ndarray:
+        constr_sense = h5.get_array("static_constr_sense")
+        assert constr_sense is not None
+        n_constr = len(constr_sense)
+        return np.zeros((n_constr, 1))
--- a/miplearn/extractors/fields.py
+++ b/miplearn/extractors/fields.py
@@ -0,0 +1,69 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020-2022, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+from typing import Optional, List
+
+import numpy as np
+
+from miplearn.extractors.abstract import FeaturesExtractor
+from miplearn.h5 import H5File
+
+
+class H5FieldsExtractor(FeaturesExtractor):
+    def __init__(
+        self,
+        instance_fields: Optional[List[str]] = None,
+        var_fields: Optional[List[str]] = None,
+        constr_fields: Optional[List[str]] = None,
+    ):
+        self.instance_fields = instance_fields
+        self.var_fields = var_fields
+        self.constr_fields = constr_fields
+
+    def get_instance_features(self, h5: H5File) -> np.ndarray:
+        if self.instance_fields is None:
+            raise Exception("No instance fields provided")
+        x = []
+        for field in self.instance_fields:
+            try:
+                data = h5.get_array(field)
+            except ValueError:
+                data = h5.get_scalar(field)
+            assert data is not None
+            x.append(data)
+        x = np.hstack(x)
+        assert len(x.shape) == 1
+        return x
+
+    def get_var_features(self, h5: H5File) -> np.ndarray:
+        var_types = h5.get_array("static_var_types")
+        assert var_types is not None
+        n_vars = len(var_types)
+        if self.var_fields is None:
+            raise Exception("No var fields provided")
+        return self._extract(h5, self.var_fields, n_vars)
+
+    def get_constr_features(self, h5: H5File) -> np.ndarray:
+        constr_sense = h5.get_array("static_constr_sense")
+        assert constr_sense is not None
+        n_constr = len(constr_sense)
+        if self.constr_fields is None:
+            raise Exception("No constr fields provided")
+        return self._extract(h5, self.constr_fields, n_constr)
+
+    def _extract(self, h5, fields, n_expected):
+        x = []
+        for field in fields:
+            try:
+                data = h5.get_array(field)
+            except ValueError:
+                v = h5.get_scalar(field)
+                data = np.repeat(v, n_expected)
+            assert data is not None
+            assert len(data.shape) == 1
+            assert data.shape[0] == n_expected
+            x.append(data)
+        features = np.vstack(x).T
+        assert len(features.shape) == 2
+        assert features.shape[0] == n_expected
+        return features
--- a/miplearn/features/init.py
+++ b/miplearn/features/init.py
@@ -1,3 +0,0 @@
-#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
-#  Released under the modified BSD license. See COPYING.md for more details.
--- a/miplearn/features/extractor.py
+++ b/miplearn/features/extractor.py
@@ -1,504 +0,0 @@
-#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
-#  Released under the modified BSD license. See COPYING.md for more details.
-
-from math import log, isfinite
-from typing import TYPE_CHECKING, List, Tuple, Optional
-
-import numpy as np
-from scipy.sparse import coo_matrix
-
-from miplearn.features.sample import Sample
-from miplearn.solvers.internal import LPSolveStats
-
-if TYPE_CHECKING:
-    from miplearn.solvers.internal import InternalSolver
-    from miplearn.instance.base import Instance
-
-
-# noinspection PyPep8Naming
-class FeaturesExtractor:
-    def __init__(
-        self,
-        with_sa: bool = True,
-        with_lhs: bool = True,
-    ) -> None:
-        self.with_sa = with_sa
-        self.with_lhs = with_lhs
-        self.var_features_user: Optional[np.ndarray] = None
-
-    def extract_after_load_features(
-        self,
-        instance: "Instance",
-        solver: "InternalSolver",
-        sample: Sample,
-    ) -> None:
-        variables = solver.get_variables(with_static=True)
-        constraints = solver.get_constraints(with_static=True, with_lhs=self.with_lhs)
-        assert constraints.names is not None
-        sample.put_array("static_var_lower_bounds", variables.lower_bounds)
-        sample.put_array("static_var_names", variables.names)
-        sample.put_array("static_var_obj_coeffs", variables.obj_coeffs)
-        sample.put_array("static_var_types", variables.types)
-        sample.put_array("static_var_upper_bounds", variables.upper_bounds)
-        sample.put_array("static_constr_names", constraints.names)
-        sample.put_sparse("static_constr_lhs", constraints.lhs)
-        sample.put_array("static_constr_rhs", constraints.rhs)
-        sample.put_array("static_constr_senses", constraints.senses)
-
-        # Instance features
-        self._extract_user_features_instance(instance, sample)
-
-        # Constraint features
-        (
-            constr_features,
-            constr_categories,
-            constr_lazy,
-        ) = FeaturesExtractor._extract_user_features_constrs(
-            instance,
-            constraints.names,
-        )
-        sample.put_array("static_constr_features", constr_features)
-        sample.put_array("static_constr_categories", constr_categories)
-        sample.put_array("static_constr_lazy", constr_lazy)
-        sample.put_scalar("static_constr_lazy_count", int(constr_lazy.sum()))
-
-        # Variable features
-        (
-            vars_features_user,
-            var_categories,
-        ) = self._extract_user_features_vars(instance, sample)
-        self.var_features_user = vars_features_user
-        sample.put_array("static_var_categories", var_categories)
-        assert variables.lower_bounds is not None
-        assert variables.obj_coeffs is not None
-        assert variables.upper_bounds is not None
-        sample.put_array(
-            "static_var_features",
-            np.hstack(
-                [
-                    vars_features_user,
-                    self._compute_AlvLouWeh2017(
-                        A=constraints.lhs,
-                        b=constraints.rhs,
-                        c=variables.obj_coeffs,
-                    ),
-                ]
-            ),
-        )
-
-    def extract_after_lp_features(
-        self,
-        solver: "InternalSolver",
-        sample: Sample,
-        lp_stats: LPSolveStats,
-    ) -> None:
-        for (k, v) in lp_stats.__dict__.items():
-            sample.put_scalar(k, v)
-        variables = solver.get_variables(with_static=False, with_sa=self.with_sa)
-        constraints = solver.get_constraints(with_static=False, with_sa=self.with_sa)
-        sample.put_array("lp_var_basis_status", variables.basis_status)
-        sample.put_array("lp_var_reduced_costs", variables.reduced_costs)
-        sample.put_array("lp_var_sa_lb_down", variables.sa_lb_down)
-        sample.put_array("lp_var_sa_lb_up", variables.sa_lb_up)
-        sample.put_array("lp_var_sa_obj_down", variables.sa_obj_down)
-        sample.put_array("lp_var_sa_obj_up", variables.sa_obj_up)
-        sample.put_array("lp_var_sa_ub_down", variables.sa_ub_down)
-        sample.put_array("lp_var_sa_ub_up", variables.sa_ub_up)
-        sample.put_array("lp_var_values", variables.values)
-        sample.put_array("lp_constr_basis_status", constraints.basis_status)
-        sample.put_array("lp_constr_dual_values", constraints.dual_values)
-        sample.put_array("lp_constr_sa_rhs_down", constraints.sa_rhs_down)
-        sample.put_array("lp_constr_sa_rhs_up", constraints.sa_rhs_up)
-        sample.put_array("lp_constr_slacks", constraints.slacks)
-
-        # Variable features
-        lp_var_features_list = []
-        for f in [
-            self.var_features_user,
-            self._compute_AlvLouWeh2017(
-                A=sample.get_sparse("static_constr_lhs"),
-                b=sample.get_array("static_constr_rhs"),
-                c=sample.get_array("static_var_obj_coeffs"),
-                c_sa_up=variables.sa_obj_up,
-                c_sa_down=variables.sa_obj_down,
-                values=variables.values,
-            ),
-        ]:
-            if f is not None:
-                lp_var_features_list.append(f)
-        for f in [
-            variables.reduced_costs,
-            variables.sa_lb_down,
-            variables.sa_lb_up,
-            variables.sa_obj_down,
-            variables.sa_obj_up,
-            variables.sa_ub_down,
-            variables.sa_ub_up,
-            variables.values,
-        ]:
-            if f is not None:
-                lp_var_features_list.append(f.reshape(-1, 1))
-        lp_var_features = np.hstack(lp_var_features_list)
-        _fix_infinity(lp_var_features)
-        sample.put_array("lp_var_features", lp_var_features)
-
-        # Constraint features
-        lp_constr_features_list = []
-        for f in [sample.get_array("static_constr_features")]:
-            if f is not None:
-                lp_constr_features_list.append(f)
-        for f in [
-            sample.get_array("lp_constr_dual_values"),
-            sample.get_array("lp_constr_sa_rhs_down"),
-            sample.get_array("lp_constr_sa_rhs_up"),
-            sample.get_array("lp_constr_slacks"),
-        ]:
-            if f is not None:
-                lp_constr_features_list.append(f.reshape(-1, 1))
-        lp_constr_features = np.hstack(lp_constr_features_list)
-        _fix_infinity(lp_constr_features)
-        sample.put_array("lp_constr_features", lp_constr_features)
-
-        # Build lp_instance_features
-        static_instance_features = sample.get_array("static_instance_features")
-        assert static_instance_features is not None
-        assert lp_stats.lp_value is not None
-        assert lp_stats.lp_wallclock_time is not None
-        sample.put_array(
-            "lp_instance_features",
-            np.hstack(
-                [
-                    static_instance_features,
-                    lp_stats.lp_value,
-                    lp_stats.lp_wallclock_time,
-                ]
-            ),
-        )
-
-    def extract_after_mip_features(
-        self,
-        solver: "InternalSolver",
-        sample: Sample,
-    ) -> None:
-        variables = solver.get_variables(with_static=False, with_sa=False)
-        constraints = solver.get_constraints(with_static=False, with_sa=False)
-        sample.put_array("mip_var_values", variables.values)
-        sample.put_array("mip_constr_slacks", constraints.slacks)
-
-    # noinspection DuplicatedCode
-    def _extract_user_features_vars(
-        self,
-        instance: "Instance",
-        sample: Sample,
-    ) -> Tuple[np.ndarray, np.ndarray]:
-        # Query variable names
-        var_names = sample.get_array("static_var_names")
-        assert var_names is not None
-
-        # Query variable features
-        var_features = instance.get_variable_features(var_names)
-        assert isinstance(var_features, np.ndarray), (
-            f"Variable features must be a numpy array. "
-            f"Found {var_features.__class__} instead."
-        )
-        assert len(var_features.shape) == 2, (
-            f"Variable features must be 2-dimensional array. "
-            f"Found array with shape {var_features.shape} instead."
-        )
-        assert var_features.shape[0] == len(var_names), (
-            f"Variable features must have exactly {len(var_names)} rows. "
-            f"Found {var_features.shape[0]} rows instead."
-        )
-        assert var_features.dtype.kind in ["f"], (
-            f"Variable features must be floating point numbers. "
-            f"Found {var_features.dtype} instead."
-        )
-
-        # Query variable categories
-        var_categories = instance.get_variable_categories(var_names)
-        assert isinstance(var_categories, np.ndarray), (
-            f"Variable categories must be a numpy array. "
-            f"Found {var_categories.__class__} instead."
-        )
-        assert len(var_categories.shape) == 1, (
-            f"Variable categories must be a vector. "
-            f"Found array with shape {var_categories.shape} instead."
-        )
-        assert len(var_categories) == len(var_names), (
-            f"Variable categories must have exactly {len(var_names)} elements. "
-            f"Found {var_categories.shape[0]} elements instead."
-        )
-        assert var_categories.dtype.kind == "S", (
-            f"Variable categories must be a numpy array with dtype='S'. "
-            f"Found {var_categories.dtype} instead."
-        )
-        return var_features, var_categories
-
-    # noinspection DuplicatedCode
-    @classmethod
-    def _extract_user_features_constrs(
-        cls,
-        instance: "Instance",
-        constr_names: np.ndarray,
-    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
-        # Query constraint features
-        constr_features = instance.get_constraint_features(constr_names)
-        assert isinstance(constr_features, np.ndarray), (
-            f"get_constraint_features must return a numpy array. "
-            f"Found {constr_features.__class__} instead."
-        )
-        assert len(constr_features.shape) == 2, (
-            f"get_constraint_features must return a 2-dimensional array. "
-            f"Found array with shape {constr_features.shape} instead."
-        )
-        assert constr_features.shape[0] == len(constr_names), (
-            f"get_constraint_features must return an array with {len(constr_names)} "
-            f"rows. Found {constr_features.shape[0]} rows instead."
-        )
-        assert constr_features.dtype.kind in ["f"], (
-            f"get_constraint_features must return floating point numbers. "
-            f"Found {constr_features.dtype} instead."
-        )
-
-        # Query constraint categories
-        constr_categories = instance.get_constraint_categories(constr_names)
-        assert isinstance(constr_categories, np.ndarray), (
-            f"get_constraint_categories must return a numpy array. "
-            f"Found {constr_categories.__class__} instead."
-        )
-        assert len(constr_categories.shape) == 1, (
-            f"get_constraint_categories must return a vector. "
-            f"Found array with shape {constr_categories.shape} instead."
-        )
-        assert len(constr_categories) == len(constr_names), (
-            f"get_constraint_categories must return a vector with {len(constr_names)} "
-            f"elements. Found {constr_categories.shape[0]} elements instead."
-        )
-        assert constr_categories.dtype.kind == "S", (
-            f"get_constraint_categories must return a numpy array with dtype='S'. "
-            f"Found {constr_categories.dtype} instead."
-        )
-
-        # Query constraint lazy attribute
-        constr_lazy = instance.are_constraints_lazy(constr_names)
-        assert isinstance(constr_lazy, np.ndarray), (
-            f"are_constraints_lazy must return a numpy array. "
-            f"Found {constr_lazy.__class__} instead."
-        )
-        assert len(constr_lazy.shape) == 1, (
-            f"are_constraints_lazy must return a vector. "
-            f"Found array with shape {constr_lazy.shape} instead."
-        )
-        assert constr_lazy.shape[0] == len(constr_names), (
-            f"are_constraints_lazy must return a vector with {len(constr_names)} "
-            f"elements. Found {constr_lazy.shape[0]} elements instead."
-        )
-        assert constr_lazy.dtype.kind == "b", (
-            f"are_constraints_lazy must return a boolean array. "
-            f"Found {constr_lazy.dtype} instead."
-        )
-
-        return constr_features, constr_categories, constr_lazy
-
-    def _extract_user_features_instance(
-        self,
-        instance: "Instance",
-        sample: Sample,
-    ) -> None:
-        features = instance.get_instance_features()
-        assert isinstance(features, np.ndarray), (
-            f"Instance features must be a numpy array. "
-            f"Found {features.__class__} instead."
-        )
-        assert len(features.shape) == 1, (
-            f"Instance features must be a vector. "
-            f"Found array with shape {features.shape} instead."
-        )
-        assert features.dtype.kind in [
-            "f"
-        ], f"Instance features have unsupported {features.dtype}"
-        sample.put_array("static_instance_features", features)
-
-    @classmethod
-    def _compute_AlvLouWeh2017(
-        cls,
-        A: Optional[coo_matrix] = None,
-        b: Optional[np.ndarray] = None,
-        c: Optional[np.ndarray] = None,
-        c_sa_down: Optional[np.ndarray] = None,
-        c_sa_up: Optional[np.ndarray] = None,
-        values: Optional[np.ndarray] = None,
-        with_m1: bool = True,
-        with_m2: bool = True,
-        with_m3: bool = True,
-    ) -> np.ndarray:
-        """
-        Computes static variable features described in:
-            Alvarez, A. M., Louveaux, Q., & Wehenkel, L. (2017). A machine learning-based
-            approximation of strong branching. INFORMS Journal on Computing, 29(1),
-            185-195.
-        """
-        assert b is not None
-        assert c is not None
-        nvars = len(c)
-        curr = 0
-        max_n_features = 40
-        features = np.zeros((nvars, max_n_features))
-
-        def push(v: np.ndarray) -> None:
-            nonlocal curr
-            features[:, curr] = v
-            curr += 1
-
-        def push_sign_abs(v: np.ndarray) -> None:
-            push(np.sign(v))
-            push(np.abs(v))
-
-        def maxmin(M: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
-            M_max = np.ravel(M.max(axis=0).todense())
-            M_min = np.ravel(M.min(axis=0).todense())
-            return M_max, M_min
-
-        with np.errstate(divide="ignore", invalid="ignore"):
-            # Feature 1
-            push(np.sign(c))
-
-            # Feature 2
-            c_pos_sum = c[c > 0].sum()
-            push(np.abs(c) / c_pos_sum)
-
-            # Feature 3
-            c_neg_sum = -c[c < 0].sum()
-            push(np.abs(c) / c_neg_sum)
-
-            if A is not None and with_m1:
-                # Compute A_ji / |b_j|
-                M1 = A.T.multiply(1.0 / np.abs(b)).T.tocsr()
-
-                # Select rows with positive b_j and compute max/min
-                M1_pos = M1[b > 0, :]
-                if M1_pos.shape[0] > 0:
-                    M1_pos_max = M1_pos.max(axis=0).todense()
-                    M1_pos_min = M1_pos.min(axis=0).todense()
-                else:
-                    M1_pos_max = np.zeros(nvars)
-                    M1_pos_min = np.zeros(nvars)
-
-                # Select rows with negative b_j and compute max/min
-                M1_neg = M1[b < 0, :]
-                if M1_neg.shape[0] > 0:
-                    M1_neg_max = M1_neg.max(axis=0).todense()
-                    M1_neg_min = M1_neg.min(axis=0).todense()
-                else:
-                    M1_neg_max = np.zeros(nvars)
-                    M1_neg_min = np.zeros(nvars)
-
-                # Features 4-11
-                push_sign_abs(M1_pos_min)
-                push_sign_abs(M1_pos_max)
-                push_sign_abs(M1_neg_min)
-                push_sign_abs(M1_neg_max)
-
-            if A is not None and with_m2:
-                # Compute |c_i| / A_ij
-                M2 = A.power(-1).multiply(np.abs(c)).tocsc()
-
-                # Compute max/min
-                M2_max, M2_min = maxmin(M2)
-
-                # Make copies of M2 and erase elements based on sign(c)
-                M2_pos_max = M2_max.copy()
-                M2_neg_max = M2_max.copy()
-                M2_pos_min = M2_min.copy()
-                M2_neg_min = M2_min.copy()
-                M2_pos_max[c <= 0] = 0
-                M2_pos_min[c <= 0] = 0
-                M2_neg_max[c >= 0] = 0
-                M2_neg_min[c >= 0] = 0
-
-                # Features 12-19
-                push_sign_abs(M2_pos_min)
-                push_sign_abs(M2_pos_max)
-                push_sign_abs(M2_neg_min)
-                push_sign_abs(M2_neg_max)
-
-            if A is not None and with_m3:
-                # Compute row sums
-                S_pos = A.maximum(0).sum(axis=1)
-                S_neg = np.abs(A.minimum(0).sum(axis=1))
-
-                # Divide A by positive and negative row sums
-                M3_pos = A.multiply(1 / S_pos).tocsr()
-                M3_neg = A.multiply(1 / S_neg).tocsr()
-
-                # Remove +inf and -inf generated by division by zero
-                M3_pos.data[~np.isfinite(M3_pos.data)] = 0.0
-                M3_neg.data[~np.isfinite(M3_neg.data)] = 0.0
-                M3_pos.eliminate_zeros()
-                M3_neg.eliminate_zeros()
-
-                # Split each matrix into positive and negative parts
-                M3_pos_pos = M3_pos.maximum(0)
-                M3_pos_neg = -(M3_pos.minimum(0))
-                M3_neg_pos = M3_neg.maximum(0)
-                M3_neg_neg = -(M3_neg.minimum(0))
-
-                # Calculate max/min
-                M3_pos_pos_max, M3_pos_pos_min = maxmin(M3_pos_pos)
-                M3_pos_neg_max, M3_pos_neg_min = maxmin(M3_pos_neg)
-                M3_neg_pos_max, M3_neg_pos_min = maxmin(M3_neg_pos)
-                M3_neg_neg_max, M3_neg_neg_min = maxmin(M3_neg_neg)
-
-                # Features 20-35
-                push_sign_abs(M3_pos_pos_max)
-                push_sign_abs(M3_pos_pos_min)
-                push_sign_abs(M3_pos_neg_max)
-                push_sign_abs(M3_pos_neg_min)
-                push_sign_abs(M3_neg_pos_max)
-                push_sign_abs(M3_neg_pos_min)
-                push_sign_abs(M3_neg_neg_max)
-                push_sign_abs(M3_neg_neg_min)
-
-            # Feature 36: only available during B&B
-
-            # Feature 37
-            if values is not None:
-                push(
-                    np.minimum(
-                        values - np.floor(values),
-                        np.ceil(values) - values,
-                    )
-                )
-
-            # Features 38-43: only available during B&B
-
-            # Feature 44
-            if c_sa_up is not None:
-                assert c_sa_down is not None
-
-                # Features 44 and 46
-                push(np.sign(c_sa_up))
-                push(np.sign(c_sa_down))
-
-                # Feature 45 is duplicated
-
-                # Feature 47-48
-                push(np.log(c - c_sa_down / np.sign(c)))
-                push(np.log(c - c_sa_up / np.sign(c)))
-
-                # Features 49-64: only available during B&B
-
-        features = features[:, 0:curr]
-        _fix_infinity(features)
-        return features
-
-
-def _fix_infinity(m: Optional[np.ndarray]) -> None:
-    if m is None:
-        return
-    masked = np.ma.masked_invalid(m)
-    max_values = np.max(masked, axis=0)
-    min_values = np.min(masked, axis=0)
-    m[:] = np.maximum(np.minimum(m, max_values), min_values)
-    m[~np.isfinite(m)] = 0.0
--- a/miplearn/features/sample.py
+++ b/miplearn/features/sample.py
@@ -1,19 +1,18 @@
 #  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
+#  Copyright (C) 2020-2022, UChicago Argonne, LLC. All rights reserved.
 #  Released under the modified BSD license. See COPYING.md for more details.
-import warnings
-from abc import ABC, abstractmethod
-from copy import deepcopy
-from typing import Dict, Optional, Any, Union, List, Tuple, cast, Set
-from scipy.sparse import coo_matrix
+
+from types import TracebackType
+from typing import Optional, Any, Union, List, Type, Literal

 import h5py
 import numpy as np
-from h5py import Dataset
-from overrides import overrides
+from scipy.sparse import coo_matrix

 Bytes = Union[bytes, bytearray]
+
 Scalar = Union[None, bool, str, int, float]
+
 Vector = Union[
    None,
    List[bool],
@@ -23,6 +22,7 @@ Vector = Union[
    List[Optional[str]],
    np.ndarray,
 ]
+
 VectorList = Union[
    List[List[bool]],
    List[List[str]],
@@ -35,115 +35,7 @@ VectorList = Union[
 ]


-class Sample(ABC):
-    """Abstract dictionary-like class that stores training data."""
-
-    @abstractmethod
-    def get_scalar(self, key: str) -> Optional[Any]:
-        pass
-
-    @abstractmethod
-    def put_scalar(self, key: str, value: Scalar) -> None:
-        pass
-
-    @abstractmethod
-    def put_array(self, key: str, value: Optional[np.ndarray]) -> None:
-        pass
-
-    @abstractmethod
-    def get_array(self, key: str) -> Optional[np.ndarray]:
-        pass
-
-    @abstractmethod
-    def put_sparse(self, key: str, value: coo_matrix) -> None:
-        pass
-
-    @abstractmethod
-    def get_sparse(self, key: str) -> Optional[coo_matrix]:
-        pass
-
-    def _assert_is_scalar(self, value: Any) -> None:
-        if value is None:
-            return
-        if isinstance(value, (str, bool, int, float, bytes, np.bytes_)):
-            return
-        assert False, f"scalar expected; found instead: {value} ({value.__class__})"
-
-    def _assert_is_array(self, value: np.ndarray) -> None:
-        assert isinstance(
-            value, np.ndarray
-        ), f"np.ndarray expected; found instead: {value.__class__}"
-        assert value.dtype.kind in "biufS", f"Unsupported dtype: {value.dtype}"
-
-    def _assert_is_sparse(self, value: Any) -> None:
-        assert isinstance(
-            value, coo_matrix
-        ), f"coo_matrix expected; found: {value.__class__}"
-        self._assert_is_array(value.data)
-
-
-class MemorySample(Sample):
-    """Dictionary-like class that stores training data in-memory."""
-
-    def __init__(
-        self,
-        data: Optional[Dict[str, Any]] = None,
-    ) -> None:
-        if data is None:
-            data = {}
-        self._data: Dict[str, Any] = data
-
-    @overrides
-    def get_scalar(self, key: str) -> Optional[Any]:
-        return self._get(key)
-
-    @overrides
-    def put_scalar(self, key: str, value: Scalar) -> None:
-        if value is None:
-            return
-        self._assert_is_scalar(value)
-        self._put(key, value)
-
-    def _get(self, key: str) -> Optional[Any]:
-        if key in self._data:
-            return self._data[key]
-        else:
-            return None
-
-    def _put(self, key: str, value: Any) -> None:
-        self._data[key] = value
-
-    @overrides
-    def put_array(self, key: str, value: Optional[np.ndarray]) -> None:
-        if value is None:
-            return
-        self._assert_is_array(value)
-        self._put(key, value)
-
-    @overrides
-    def get_array(self, key: str) -> Optional[np.ndarray]:
-        return cast(Optional[np.ndarray], self._get(key))
-
-    @overrides
-    def put_sparse(self, key: str, value: coo_matrix) -> None:
-        if value is None:
-            return
-        self._assert_is_sparse(value)
-        self._put(key, value)
-
-    @overrides
-    def get_sparse(self, key: str) -> Optional[coo_matrix]:
-        return cast(Optional[coo_matrix], self._get(key))
-
-
-class Hdf5Sample(Sample):
-    """
-    Dictionary-like class that stores training data in an HDF5 file.
-
-    Unlike MemorySample, this class only loads to memory the parts of the data set that
-    are actually accessed, and therefore it is more scalable.
-    """
-
+class H5File:
    def __init__(
        self,
        filename: str,
@@ -151,7 +43,6 @@ class Hdf5Sample(Sample):
    ) -> None:
        self.file = h5py.File(filename, mode, libver="latest")

-    @overrides
    def get_scalar(self, key: str) -> Optional[Any]:
        if key not in self.file:
            return None
@@ -164,7 +55,6 @@ class Hdf5Sample(Sample):
        else:
            return ds[()].tolist()

-    @overrides
    def put_scalar(self, key: str, value: Any) -> None:
        if value is None:
            return
@@ -173,7 +63,6 @@ class Hdf5Sample(Sample):
            del self.file[key]
        self.file.create_dataset(key, data=value)

-    @overrides
    def put_array(self, key: str, value: Optional[np.ndarray]) -> None:
        if value is None:
            return
@@ -184,13 +73,11 @@ class Hdf5Sample(Sample):
            del self.file[key]
        return self.file.create_dataset(key, data=value, compression="gzip")

-    @overrides
    def get_array(self, key: str) -> Optional[np.ndarray]:
        if key not in self.file:
            return None
        return self.file[key][:]

-    @overrides
    def put_sparse(self, key: str, value: coo_matrix) -> None:
        if value is None:
            return
@@ -199,7 +86,6 @@ class Hdf5Sample(Sample):
        self.put_array(f"{key}_col", value.col)
        self.put_array(f"{key}_data", value.data)

-    @overrides
    def get_sparse(self, key: str) -> Optional[coo_matrix]:
        row = self.get_array(f"{key}_row")
        if row is None:
@@ -225,8 +111,36 @@ class Hdf5Sample(Sample):
        ), f"bytes expected; found: {value.__class__}"  # type: ignore
        self.put_array(key, np.frombuffer(value, dtype="uint8"))

-    def __enter__(self):
+    def close(self):
+        self.file.close()
+
+    def __enter__(self) -> "H5File":
        return self

-    def __exit__(self, type, value, traceback):
+    def __exit__(
+        self,
+        exc_type: Optional[Type[BaseException]],
+        exc_val: Optional[BaseException],
+        exc_tb: Optional[TracebackType],
+    ) -> Literal[False]:
        self.file.close()
+        return False
+
+    def _assert_is_scalar(self, value: Any) -> None:
+        if value is None:
+            return
+        if isinstance(value, (str, bool, int, float, bytes, np.bytes_)):
+            return
+        assert False, f"scalar expected; found instead: {value} ({value.__class__})"
+
+    def _assert_is_array(self, value: np.ndarray) -> None:
+        assert isinstance(
+            value, np.ndarray
+        ), f"np.ndarray expected; found instead: {value.__class__}"
+        assert value.dtype.kind in "biufS", f"Unsupported dtype: {value.dtype}"
+
+    def _assert_is_sparse(self, value: Any) -> None:
+        assert isinstance(
+            value, coo_matrix
+        ), f"coo_matrix expected; found: {value.__class__}"
+        self._assert_is_array(value.data)
--- a/miplearn/instance/init.py
+++ b/miplearn/instance/init.py
@@ -1,3 +0,0 @@
-#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
-#  Released under the modified BSD license. See COPYING.md for more details.
--- a/miplearn/instance/base.py
+++ b/miplearn/instance/base.py
@@ -1,204 +0,0 @@
-#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
-#  Released under the modified BSD license. See COPYING.md for more details.
-
-import logging
-from abc import ABC, abstractmethod
-from typing import Any, List, TYPE_CHECKING, Dict
-
-import numpy as np
-
-from miplearn.features.sample import Sample, MemorySample
-from miplearn.types import ConstraintName
-
-logger = logging.getLogger(__name__)
-
-if TYPE_CHECKING:
-    from miplearn.solvers.learning import InternalSolver
-
-
-# noinspection PyMethodMayBeStatic
-class Instance(ABC):
-    """
-    Abstract class holding all the data necessary to generate a concrete model of the
-    proble.
-
-    In the knapsack problem, for example, this class could hold the number of items,
-    their weights and costs, as well as the size of the knapsack. Objects
-    implementing this class are able to convert themselves into a concrete
-    optimization model, which can be optimized by a solver, or into arrays of
-    features, which can be provided as inputs to machine learning models.
-    """
-
-    def __init__(self) -> None:
-        self._samples: List[Sample] = []
-
-    @abstractmethod
-    def to_model(self) -> Any:
-        """
-        Returns the optimization model corresponding to this instance.
-        """
-        pass
-
-    def get_instance_features(self) -> np.ndarray:
-        """
-        Returns a 1-dimensional array of (numerical) features describing the
-        entire instance.
-
-        The array is used by LearningSolver to determine how similar two instances
-        are. It may also be used to predict, in combination with variable-specific
-        features, the values of binary decision variables in the problem.
-
-        There is not necessarily a one-to-one correspondence between models and
-        instance features: the features may encode only part of the data necessary to
-        generate the complete model. Features may also be statistics computed from
-        the original data. For example, in the knapsack problem, an implementation
-        may decide to provide as instance features only the average weights, average
-        prices, number of items and the size of the knapsack.
-
-        The returned array MUST have the same length for all relevant instances of
-        the problem. If two instances map into arrays of different lengths,
-        they cannot be solved by the same LearningSolver object.
-
-        By default, returns [0.0].
-        """
-        return np.zeros(1)
-
-    def get_variable_features(self, names: np.ndarray) -> np.ndarray:
-        """
-        Returns dictionary mapping the name of each variable to a (1-dimensional) list
-        of numerical features describing a particular decision variable.
-
-        In combination with instance features, variable features are used by
-        LearningSolver to predict, among other things, the optimal value of each
-        decision variable before the optimization takes place. In the knapsack
-        problem, for example, an implementation could provide as variable features
-        the weight and the price of a specific item.
-
-        Like instance features, the arrays returned by this method MUST have the same
-        length for all variables within the same category, for all relevant instances
-        of the problem.
-
-        If features are not provided for a given variable, MIPLearn will use a
-        default set of features.
-
-        By default, returns [[0.0], ..., [0.0]].
-        """
-        return np.zeros((len(names), 1))
-
-    def get_variable_categories(self, names: np.ndarray) -> np.ndarray:
-        """
-        Returns a dictionary mapping the name of each variable to its category.
-
-        If two variables have the same category, LearningSolver will use the same
-        internal ML model to predict the values of both variables. If a variable is not
-        listed in the dictionary, ML models will ignore the variable.
-
-        By default, returns `names`.
-        """
-        return names
-
-    def get_constraint_features(self, names: np.ndarray) -> np.ndarray:
-        return np.zeros((len(names), 1))
-
-    def get_constraint_categories(self, names: np.ndarray) -> np.ndarray:
-        return names
-
-    def has_dynamic_lazy_constraints(self) -> bool:
-        return False
-
-    def are_constraints_lazy(self, names: np.ndarray) -> np.ndarray:
-        return np.zeros(len(names), dtype=bool)
-
-    def find_violated_lazy_constraints(
-        self,
-        solver: "InternalSolver",
-        model: Any,
-    ) -> Dict[ConstraintName, Any]:
-        """
-        Returns lazy constraint violations found for the current solution.
-
-        After solving a model, LearningSolver will ask the instance to identify which
-        lazy constraints are violated by the current solution. For each identified
-        violation, LearningSolver will then call the enforce_lazy_constraint and
-        resolve the problem. The process repeats until no further lazy constraint
-        violations are found.
-
-        Violations should be returned in a dictionary mapping the name of the violation
-        to some user-specified data that allows the instance to unambiguously generate
-        the lazy constraints at a later time. In the Traveling Salesman Problem, for
-        example, this function could return a dictionary identifying violated subtour
-        inequalities. More concretely, it could return:
-            {
-                "s1": [1, 2, 3],
-                "s2": [4, 5, 6, 7],
-            }
-        where "s1" and "s2" are the names of the subtours, and [1,2,3] and [4,5,6,7]
-        are the cities in each subtour. The names of the violations should be kept
-        stable across instances. In our example, "s1" should always correspond to
-        [1,2,3] across all instances. The user-provided data should be picklable.
-
-        The current solution can be queried with `solver.get_solution()`. If the solver
-        is configured to use lazy callbacks, this solution may be non-integer.
-
-        For a concrete example, see TravelingSalesmanInstance.
-        """
-        return {}
-
-    def enforce_lazy_constraint(
-        self,
-        solver: "InternalSolver",
-        model: Any,
-        violation_data: Any,
-    ) -> None:
-        """
-        Adds constraints to the model to ensure that the given violation is fixed.
-
-        This method is typically called immediately after
-        `find_violated_lazy_constraints`. The argument `violation_data` is the
-        user-provided data, previously returned by `find_violated_lazy_constraints`.
-        In the Traveling Salesman Problem, for example, it could be a list of cities
-        in the subtour.
-
-        After some training, LearningSolver may decide to proactively build some lazy
-        constraints at the beginning of the optimization process, before a solution
-        is even available. In this case, `enforce_lazy_constraints` will be called
-        without a corresponding call to `find_violated_lazy_constraints`.
-
-        For a concrete example, see TravelingSalesmanInstance.
-        """
-        pass
-
-    def has_user_cuts(self) -> bool:
-        return False
-
-    def find_violated_user_cuts(self, model: Any) -> Dict[ConstraintName, Any]:
-        return {}
-
-    def enforce_user_cut(
-        self,
-        solver: "InternalSolver",
-        model: Any,
-        violation_data: Any,
-    ) -> Any:
-        return None
-
-    def load(self) -> None:
-        pass
-
-    def free(self) -> None:
-        pass
-
-    def flush(self) -> None:
-        """
-        Save any pending changes made to the instance to the underlying data store.
-        """
-        pass
-
-    def get_samples(self) -> List[Sample]:
-        return self._samples
-
-    def create_sample(self) -> Sample:
-        sample = MemorySample()
-        self._samples.append(sample)
-        return sample
--- a/miplearn/instance/file.py
+++ b/miplearn/instance/file.py
@@ -1,131 +0,0 @@
-#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
-#  Released under the modified BSD license. See COPYING.md for more details.
-import gc
-import os
-import pickle
-from typing import Any, Optional, List, Dict, TYPE_CHECKING
-
-import numpy as np
-from overrides import overrides
-
-from miplearn.features.sample import Hdf5Sample, Sample
-from miplearn.instance.base import Instance
-from miplearn.types import ConstraintName
-
-if TYPE_CHECKING:
-    from miplearn.solvers.learning import InternalSolver
-
-
-class FileInstance(Instance):
-    def __init__(self, filename: str) -> None:
-        super().__init__()
-        assert os.path.exists(filename), f"File not found: {filename}"
-        self.h5 = Hdf5Sample(filename)
-        self.instance: Optional[Instance] = None
-
-    # Delegation
-    # -------------------------------------------------------------------------
-    @overrides
-    def to_model(self) -> Any:
-        assert self.instance is not None
-        return self.instance.to_model()
-
-    @overrides
-    def get_instance_features(self) -> np.ndarray:
-        assert self.instance is not None
-        return self.instance.get_instance_features()
-
-    @overrides
-    def get_variable_features(self, names: np.ndarray) -> np.ndarray:
-        assert self.instance is not None
-        return self.instance.get_variable_features(names)
-
-    @overrides
-    def get_variable_categories(self, names: np.ndarray) -> np.ndarray:
-        assert self.instance is not None
-        return self.instance.get_variable_categories(names)
-
-    @overrides
-    def get_constraint_features(self, names: np.ndarray) -> np.ndarray:
-        assert self.instance is not None
-        return self.instance.get_constraint_features(names)
-
-    @overrides
-    def get_constraint_categories(self, names: np.ndarray) -> np.ndarray:
-        assert self.instance is not None
-        return self.instance.get_constraint_categories(names)
-
-    @overrides
-    def has_dynamic_lazy_constraints(self) -> bool:
-        assert self.instance is not None
-        return self.instance.has_dynamic_lazy_constraints()
-
-    @overrides
-    def are_constraints_lazy(self, names: np.ndarray) -> np.ndarray:
-        assert self.instance is not None
-        return self.instance.are_constraints_lazy(names)
-
-    @overrides
-    def find_violated_lazy_constraints(
-        self,
-        solver: "InternalSolver",
-        model: Any,
-    ) -> Dict[ConstraintName, Any]:
-        assert self.instance is not None
-        return self.instance.find_violated_lazy_constraints(solver, model)
-
-    @overrides
-    def enforce_lazy_constraint(
-        self,
-        solver: "InternalSolver",
-        model: Any,
-        violation_data: Any,
-    ) -> None:
-        assert self.instance is not None
-        self.instance.enforce_lazy_constraint(solver, model, violation_data)
-
-    @overrides
-    def find_violated_user_cuts(self, model: Any) -> Dict[ConstraintName, Any]:
-        assert self.instance is not None
-        return self.instance.find_violated_user_cuts(model)
-
-    @overrides
-    def enforce_user_cut(
-        self,
-        solver: "InternalSolver",
-        model: Any,
-        violation_data: Any,
-    ) -> None:
-        assert self.instance is not None
-        self.instance.enforce_user_cut(solver, model, violation_data)
-
-    # Input & Output
-    # -------------------------------------------------------------------------
-    @overrides
-    def free(self) -> None:
-        self.instance = None
-        gc.collect()
-
-    @overrides
-    def load(self) -> None:
-        if self.instance is not None:
-            return
-        pkl = self.h5.get_bytes("pickled")
-        assert pkl is not None
-        self.instance = pickle.loads(pkl)
-        assert isinstance(self.instance, Instance)
-
-    @classmethod
-    def save(cls, instance: Instance, filename: str) -> None:
-        h5 = Hdf5Sample(filename, mode="w")
-        instance_pkl = pickle.dumps(instance)
-        h5.put_bytes("pickled", instance_pkl)
-
-    @overrides
-    def create_sample(self) -> Sample:
-        return self.h5
-
-    @overrides
-    def get_samples(self) -> List[Sample]:
-        return [self.h5]
--- a/miplearn/instance/picklegz.py
+++ b/miplearn/instance/picklegz.py
@@ -1,195 +0,0 @@
-#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
-#  Released under the modified BSD license. See COPYING.md for more details.
-
-import gc
-import gzip
-import os
-import pickle
-from typing import Optional, Any, List, cast, IO, TYPE_CHECKING, Dict, Callable
-
-import numpy as np
-from overrides import overrides
-
-from miplearn.features.sample import Sample
-from miplearn.instance.base import Instance
-from miplearn.types import ConstraintName
-from tqdm.auto import tqdm
-from p_tqdm import p_umap
-
-if TYPE_CHECKING:
-    from miplearn.solvers.learning import InternalSolver
-
-
-class PickleGzInstance(Instance):
-    """
-    An instance backed by a gzipped pickle file.
-
-    The instance is only loaded to memory after an operation is called (for example,
-    `to_model`).
-
-    Parameters
-    ----------
-    filename: str
-        Path of the gzipped pickle file that should be loaded.
-    """
-
-    # noinspection PyMissingConstructor
-    def __init__(self, filename: str) -> None:
-        assert os.path.exists(filename), f"File not found: {filename}"
-        self.instance: Optional[Instance] = None
-        self.filename: str = filename
-
-    @overrides
-    def to_model(self) -> Any:
-        assert self.instance is not None
-        return self.instance.to_model()
-
-    @overrides
-    def get_instance_features(self) -> np.ndarray:
-        assert self.instance is not None
-        return self.instance.get_instance_features()
-
-    @overrides
-    def get_variable_features(self, names: np.ndarray) -> np.ndarray:
-        assert self.instance is not None
-        return self.instance.get_variable_features(names)
-
-    @overrides
-    def get_variable_categories(self, names: np.ndarray) -> np.ndarray:
-        assert self.instance is not None
-        return self.instance.get_variable_categories(names)
-
-    @overrides
-    def get_constraint_features(self, names: np.ndarray) -> np.ndarray:
-        assert self.instance is not None
-        return self.instance.get_constraint_features(names)
-
-    @overrides
-    def get_constraint_categories(self, names: np.ndarray) -> np.ndarray:
-        assert self.instance is not None
-        return self.instance.get_constraint_categories(names)
-
-    @overrides
-    def has_dynamic_lazy_constraints(self) -> bool:
-        assert self.instance is not None
-        return self.instance.has_dynamic_lazy_constraints()
-
-    @overrides
-    def are_constraints_lazy(self, names: np.ndarray) -> np.ndarray:
-        assert self.instance is not None
-        return self.instance.are_constraints_lazy(names)
-
-    @overrides
-    def find_violated_lazy_constraints(
-        self,
-        solver: "InternalSolver",
-        model: Any,
-    ) -> Dict[ConstraintName, Any]:
-        assert self.instance is not None
-        return self.instance.find_violated_lazy_constraints(solver, model)
-
-    @overrides
-    def enforce_lazy_constraint(
-        self,
-        solver: "InternalSolver",
-        model: Any,
-        violation_data: Any,
-    ) -> None:
-        assert self.instance is not None
-        self.instance.enforce_lazy_constraint(solver, model, violation_data)
-
-    @overrides
-    def find_violated_user_cuts(self, model: Any) -> Dict[ConstraintName, Any]:
-        assert self.instance is not None
-        return self.instance.find_violated_user_cuts(model)
-
-    @overrides
-    def enforce_user_cut(
-        self,
-        solver: "InternalSolver",
-        model: Any,
-        violation_name: Any,
-    ) -> None:
-        assert self.instance is not None
-        self.instance.enforce_user_cut(solver, model, violation_name)
-
-    @overrides
-    def load(self) -> None:
-        if self.instance is None:
-            obj = read_pickle_gz(self.filename)
-            assert isinstance(obj, Instance)
-            self.instance = obj
-
-    @overrides
-    def free(self) -> None:
-        self.instance = None  # type: ignore
-        gc.collect()
-
-    @overrides
-    def flush(self) -> None:
-        write_pickle_gz(self.instance, self.filename)
-
-    @overrides
-    def get_samples(self) -> List[Sample]:
-        assert self.instance is not None
-        return self.instance.get_samples()
-
-    @overrides
-    def create_sample(self) -> Sample:
-        assert self.instance is not None
-        return self.instance.create_sample()
-
-
-def write_pickle_gz(obj: Any, filename: str) -> None:
-    os.makedirs(os.path.dirname(filename), exist_ok=True)
-    with gzip.GzipFile(filename, "wb") as file:
-        pickle.dump(obj, cast(IO[bytes], file))
-
-
-def read_pickle_gz(filename: str) -> Any:
-    with gzip.GzipFile(filename, "rb") as file:
-        return pickle.load(cast(IO[bytes], file))
-
-
-def write_pickle_gz_multiple(objs: List[Any], dirname: str) -> None:
-    for (i, obj) in enumerate(objs):
-        write_pickle_gz(obj, f"{dirname}/{i:05d}.pkl.gz")
-
-
-def save(
-    objs: List[Any],
-    dirname: str,
-    progress: bool = False,
-    n_jobs: int = 1,
-) -> List[str]:
-    """
-    Saves the provided objects to gzipped pickled files. Files are named sequentially
-    as `dirname/00000.pkl.gz`, `dirname/00001.pkl.gz`, etc.
-
-    Parameters
-    ----------
-    progress: bool
-        If True, show progress bar
-    objs: List[any]
-        List of files to save
-    dirname: str
-        Output directory
-
-    Returns
-    -------
-    List containing the relative paths of the saved files.
-    """
-
-    def _process(obj, filename):
-        write_pickle_gz(obj, filename)
-
-    filenames = [f"{dirname}/{i:05d}.pkl.gz" for i in range(len(objs))]
-    p_umap(_process, objs, filenames, num_cpus=n_jobs)
-    return filenames
-
-
-def load(filename: str, build_model: Callable) -> Any:
-    with gzip.GzipFile(filename, "rb") as file:
-        data = pickle.load(cast(IO[bytes], file))
-        return build_model(data)
--- a/miplearn/io.py
+++ b/miplearn/io.py
@@ -0,0 +1,92 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020-2022, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+
+from gzip import GzipFile
+import os
+import pickle
+import sys
+from typing import IO, Any, Callable, List, cast, TextIO
+
+from .parallel import p_umap
+import shutil
+
+
+class _RedirectOutput:
+    def __init__(self, streams: List[Any]) -> None:
+        self.streams = streams
+
+    def write(self, data: Any) -> None:
+        for stream in self.streams:
+            stream.write(data)
+
+    def flush(self) -> None:
+        for stream in self.streams:
+            stream.flush()
+
+    def __enter__(self) -> Any:
+        self._original_stdout = sys.stdout
+        self._original_stderr = sys.stderr
+        sys.stdout = cast(TextIO, self)
+        sys.stderr = cast(TextIO, self)
+        return self
+
+    def __exit__(
+        self,
+        _type: Any,
+        _value: Any,
+        _traceback: Any,
+    ) -> None:
+        sys.stdout = self._original_stdout
+        sys.stderr = self._original_stderr
+
+
+def write_pkl_gz(
+    objs: List[Any],
+    dirname: str,
+    prefix: str = "",
+    n_jobs: int = 1,
+    progress: bool = False,
+) -> List[str]:
+    filenames = [f"{dirname}/{prefix}{i:05d}.pkl.gz" for i in range(len(objs))]
+
+    def _process(i: int) -> None:
+        filename = filenames[i]
+        obj = objs[i]
+        os.makedirs(os.path.dirname(filename), exist_ok=True)
+        with GzipFile(filename, "wb") as file:
+            pickle.dump(obj, cast(IO[bytes], file))
+
+    if n_jobs > 1:
+        p_umap(
+            _process,
+            range(len(objs)),
+            smoothing=0,
+            num_cpus=n_jobs,
+            maxtasksperchild=None,
+            disable=not progress,
+        )
+    else:
+        for i in range(len(objs)):
+            _process(i)
+    return filenames
+
+
+def gzip(filename: str) -> None:
+    with open(filename, "rb") as input_file:
+        with GzipFile(f"{filename}.gz", "wb") as output_file:
+            shutil.copyfileobj(input_file, output_file)
+    os.remove(filename)
+
+
+def read_pkl_gz(filename: str) -> Any:
+    with GzipFile(filename, "rb") as file:
+        return pickle.load(cast(IO[bytes], file))
+
+
+def _to_h5_filename(data_filename: str) -> str:
+    output = f"{data_filename}.h5"
+    output = output.replace(".pkl.gz.h5", ".h5")
+    output = output.replace(".pkl.h5", ".h5")
+    output = output.replace(".jld2.h5", ".h5")
+    return output
--- a/miplearn/log.py
+++ b/miplearn/log.py
@@ -1,74 +0,0 @@
-#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
-#  Released under the modified BSD license. See COPYING.md for more details.
-
-import logging
-import sys
-import time
-import traceback
-import warnings
-from typing import Dict, Any, Optional
-
-_formatwarning = warnings.formatwarning
-
-
-class TimeFormatter(logging.Formatter):
-    def __init__(
-        self,
-        start_time: float,
-        log_colors: Dict[str, str],
-    ) -> None:
-        super().__init__()
-        self.start_time = start_time
-        self.log_colors = log_colors
-
-    def format(self, record: logging.LogRecord) -> str:
-        if record.levelno >= logging.ERROR:
-            color = self.log_colors["red"]
-        elif record.levelno >= logging.WARNING:
-            color = self.log_colors["yellow"]
-        else:
-            color = self.log_colors["green"]
-        return "%s[%12.3f]%s %s" % (
-            color,
-            record.created - self.start_time,
-            self.log_colors["reset"],
-            record.getMessage(),
-        )
-
-
-def formatwarning_tb(*args: Any, **kwargs: Any) -> str:
-    s = _formatwarning(*args, **kwargs)
-    tb = traceback.format_stack()
-    s += "".join(tb[:-1])
-    return s
-
-
-def setup_logger(
-    start_time: Optional[float] = None,
-    force_color: bool = False,
-) -> None:
-    if start_time is None:
-        start_time = time.time()
-    if sys.stdout.isatty() or force_color:
-        log_colors = {
-            "green": "\033[92m",
-            "yellow": "\033[93m",
-            "red": "\033[91m",
-            "reset": "\033[0m",
-        }
-    else:
-        log_colors = {
-            "green": "",
-            "yellow": "",
-            "red": "",
-            "reset": "",
-        }
-    handler = logging.StreamHandler()
-    handler.setFormatter(TimeFormatter(start_time, log_colors))
-    logging.getLogger().addHandler(handler)
-    logging.getLogger("miplearn").setLevel(logging.INFO)
-    logging.getLogger("gurobipy").setLevel(logging.ERROR)
-    logging.getLogger("pyomo.core").setLevel(logging.ERROR)
-    warnings.formatwarning = formatwarning_tb
-    logging.captureWarnings(True)
--- a/miplearn/parallel.py
+++ b/miplearn/parallel.py
@@ -0,0 +1,32 @@
+# Modified version of: https://github.com/swansonk14/p_tqdm
+# Copyright (c) 2022 Kyle Swanson
+# MIT License
+
+from collections.abc import Sized
+from typing import Any, Callable, Generator, Iterable, List
+
+from pathos.multiprocessing import _ProcessPool as Pool
+from tqdm.auto import tqdm
+
+
+def _parallel(function: Callable, *iterables: Iterable, **kwargs: Any) -> Generator:
+    # Determine length of tqdm (equal to length of the shortest iterable or total kwarg)
+    total = kwargs.pop("total", None)
+    lengths = [len(iterable) for iterable in iterables if isinstance(iterable, Sized)]
+    length = total or (min(lengths) if lengths else None)
+
+    # Create parallel generator
+    num_cpus = kwargs.pop("num_cpus", 1)
+    maxtasksperchild = kwargs.pop("maxtasksperchild", 1)
+    chunksize = kwargs.pop("chunksize", 1)
+    with Pool(num_cpus, maxtasksperchild=maxtasksperchild) as pool:
+        for item in tqdm(
+            pool.imap_unordered(function, *iterables, chunksize=chunksize),
+            total=length,
+            **kwargs
+        ):
+            yield item
+
+
+def p_umap(function: Callable, *iterables: Iterable, **kwargs: Any) -> List[Any]:
+    return list(_parallel(function, *iterables, **kwargs))
--- a/miplearn/problems/init.py
+++ b/miplearn/problems/init.py
@@ -1,3 +1,3 @@
 #  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
+#  Copyright (C) 2020-2022, UChicago Argonne, LLC. All rights reserved.
 #  Released under the modified BSD license. See COPYING.md for more details.
--- a/miplearn/problems/binpack.py
+++ b/miplearn/problems/binpack.py
@@ -0,0 +1,146 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020-2022, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+
+from dataclasses import dataclass
+from typing import List, Optional, Union
+
+import gurobipy as gp
+import numpy as np
+from gurobipy import GRB, quicksum
+from scipy.stats import uniform, randint
+from scipy.stats.distributions import rv_frozen
+
+from miplearn.io import read_pkl_gz
+from miplearn.solvers.gurobi import GurobiModel
+
+
+@dataclass
+class BinPackData:
+    """Data for the bin packing problem.
+
+    Parameters
+    ----------
+    sizes
+        Sizes of the items
+    capacity
+        Capacity of the bin
+    """
+
+    sizes: np.ndarray
+    capacity: int
+
+
+class BinPackGenerator:
+    """Random instance generator for the bin packing problem.
+
+    If `fix_items=False`, the class samples the user-provided probability distributions
+    n, sizes and capacity to decide, respectively, the number of items, the sizes of
+    the items and capacity of the bin. All values are sampled independently.
+
+    If `fix_items=True`, the class creates a reference instance, using the method
+    previously described, then generates additional instances by perturbing its item
+    sizes and bin capacity. More specifically, the sizes of the items are set to `s_i
+    * gamma_i` where `s_i` is the size of the i-th item in the reference instance and
+    `gamma_i` is sampled from `sizes_jitter`. Similarly, the bin capacity is set to `B *
+    beta`, where `B` is the reference bin capacity and `beta` is sampled from
+    `capacity_jitter`. The number of items remains the same across all generated
+    instances.
+
+    Args
+    ----
+    n
+        Probability distribution for the number of items.
+    sizes
+        Probability distribution for the item sizes.
+    capacity
+        Probability distribution for the bin capacity.
+    sizes_jitter
+        Probability distribution for the item size randomization.
+    capacity_jitter
+        Probability distribution for the bin capacity.
+    fix_items
+        If `True`, generates a reference instance, then applies some perturbation to it.
+        If `False`, generates completely different instances.
+    """
+
+    def __init__(
+        self,
+        n: rv_frozen,
+        sizes: rv_frozen,
+        capacity: rv_frozen,
+        sizes_jitter: rv_frozen,
+        capacity_jitter: rv_frozen,
+        fix_items: bool,
+    ) -> None:
+        self.n = n
+        self.sizes = sizes
+        self.capacity = capacity
+        self.sizes_jitter = sizes_jitter
+        self.capacity_jitter = capacity_jitter
+        self.fix_items = fix_items
+        self.ref_data: Optional[BinPackData] = None
+
+    def generate(self, n_samples: int) -> List[BinPackData]:
+        """Generates random instances.
+
+        Parameters
+        ----------
+        n_samples
+            Number of samples to generate.
+        """
+
+        def _sample() -> BinPackData:
+            if self.ref_data is None:
+                n = self.n.rvs()
+                sizes = self.sizes.rvs(n)
+                capacity = self.capacity.rvs()
+                if self.fix_items:
+                    self.ref_data = BinPackData(sizes, capacity)
+            else:
+                n = self.ref_data.sizes.shape[0]
+                sizes = self.ref_data.sizes
+                capacity = self.ref_data.capacity
+
+            sizes = sizes * self.sizes_jitter.rvs(n)
+            capacity = capacity * self.capacity_jitter.rvs()
+            return BinPackData(sizes.round(2), capacity.round(2))
+
+        return [_sample() for n in range(n_samples)]
+
+
+def build_binpack_model(data: Union[str, BinPackData]) -> GurobiModel:
+    """Converts bin packing problem data into a concrete Gurobipy model."""
+    if isinstance(data, str):
+        data = read_pkl_gz(data)
+    assert isinstance(data, BinPackData)
+
+    model = gp.Model()
+    n = data.sizes.shape[0]
+
+    # Var: Use bin
+    y = model.addVars(n, name="y", vtype=GRB.BINARY)
+
+    # Var: Assign item to bin
+    x = model.addVars(n, n, name="x", vtype=GRB.BINARY)
+
+    # Obj: Minimize number of bins
+    model.setObjective(quicksum(y[i] for i in range(n)))
+
+    # Eq: Enforce bin capacity
+    model.addConstrs(
+        (
+            quicksum(data.sizes[i] * x[i, j] for i in range(n)) <= data.capacity * y[j]
+            for j in range(n)
+        ),
+        name="eq_capacity",
+    )
+
+    # Eq: Must assign all items to bins
+    model.addConstrs(
+        (quicksum(x[i, j] for j in range(n)) == 1 for i in range(n)),
+        name="eq_assign",
+    )
+
+    model.update()
+    return GurobiModel(model)
--- a/miplearn/problems/knapsack.py
+++ b/miplearn/problems/knapsack.py
@@ -1,230 +0,0 @@
-#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
-#  Released under the modified BSD license. See COPYING.md for more details.
-
-from dataclasses import dataclass
-from typing import List, Dict, Optional
-
-import numpy as np
-import pyomo.environ as pe
-from overrides import overrides
-from scipy.stats import uniform, randint, rv_discrete
-from scipy.stats.distributions import rv_frozen
-
-from miplearn.instance.base import Instance
-
-
-@dataclass
-class MultiKnapsackData:
-    prices: np.ndarray
-    capacities: np.ndarray
-    weights: np.ndarray
-
-
-class MultiKnapsackInstance(Instance):
-    """Representation of the Multidimensional 0-1 Knapsack Problem.
-
-    Given a set of n items and m knapsacks, the problem is to find a subset of items
-    S maximizing sum(prices[i] for i in S). If selected, each item i occupies
-    weights[i,j] units of space in each knapsack j. Furthermore, each knapsack j has
-    limited storage space, given by capacities[j].
-
-    This implementation assigns a different category for each decision variable,
-    and therefore trains one ML model per variable. It is only suitable when training
-    and test instances have same size and items don't shuffle around.
-    """
-
-    def __init__(
-        self,
-        prices: np.ndarray,
-        capacities: np.ndarray,
-        weights: np.ndarray,
-    ) -> None:
-        super().__init__()
-        assert isinstance(prices, np.ndarray)
-        assert isinstance(capacities, np.ndarray)
-        assert isinstance(weights, np.ndarray)
-        assert len(weights.shape) == 2
-        self.m, self.n = weights.shape
-        assert prices.shape == (self.n,)
-        assert capacities.shape == (self.m,)
-        self.prices = prices
-        self.capacities = capacities
-        self.weights = weights
-
-    @overrides
-    def to_model(self) -> pe.ConcreteModel:
-        model = pe.ConcreteModel()
-        model.x = pe.Var(range(self.n), domain=pe.Binary)
-        model.OBJ = pe.Objective(
-            expr=sum(-model.x[j] * self.prices[j] for j in range(self.n)),
-            sense=pe.minimize,
-        )
-        model.eq_capacity = pe.ConstraintList()
-        for i in range(self.m):
-            model.eq_capacity.add(
-                sum(model.x[j] * self.weights[i, j] for j in range(self.n))
-                <= self.capacities[i]
-            )
-
-        return model
-
-
-# noinspection PyPep8Naming
-class MultiKnapsackGenerator:
-    def __init__(
-        self,
-        n: rv_frozen = randint(low=100, high=101),
-        m: rv_frozen = randint(low=30, high=31),
-        w: rv_frozen = randint(low=0, high=1000),
-        K: rv_frozen = randint(low=500, high=501),
-        u: rv_frozen = uniform(loc=0.0, scale=1.0),
-        alpha: rv_frozen = uniform(loc=0.25, scale=0.0),
-        fix_w: bool = False,
-        w_jitter: rv_frozen = uniform(loc=1.0, scale=0.0),
-        p_jitter: rv_frozen = uniform(loc=1.0, scale=0.0),
-        round: bool = True,
-    ):
-        """Initialize the problem generator.
-
-        Instances have a random number of items (or variables) and a random number of
-        knapsacks (or constraints), as specified by the provided probability
-        distributions `n` and `m`, respectively. The weight of each item `i` on
-        knapsack `j` is sampled independently from the provided distribution `w`. The
-        capacity of knapsack `j` is set to:
-
-            alpha_j * sum(w[i,j] for i in range(n)),
-
-        where `alpha_j`, the tightness ratio, is sampled from the provided
-        probability distribution `alpha`. To make the instances more challenging,
-        the costs of the items are linearly correlated to their average weights. More
-        specifically, the weight of each item `i` is set to:
-
-            sum(w[i,j]/m for j in range(m)) + K * u_i,
-
-        where `K`, the correlation coefficient, and `u_i`, the correlation
-        multiplier, are sampled from the provided probability distributions. Note
-        that `K` is only sample once for the entire instance.
-
-        If fix_w=True is provided, then w[i,j] are kept the same in all generated
-        instances. This also implies that n and m are kept fixed. Although the prices
-        and capacities are derived from w[i,j], as long as u and K are not constants,
-        the generated instances will still not be completely identical.
-
-        If a probability distribution w_jitter is provided, then item weights will be
-        set to w[i,j] * gamma[i,j] where gamma[i,j] is sampled from w_jitter. When
-        combined with fix_w=True, this argument may be used to generate instances
-        where the weight of each item is roughly the same, but not exactly identical,
-        across all instances. The prices of the items and the capacities of the
-        knapsacks will be calculated as above, but using these perturbed weights
-        instead.
-
-        By default, all generated prices, weights and capacities are rounded to the
-        nearest integer number. If `round=False` is provided, this rounding will be
-        disabled.
-
-        Parameters
-        ----------
-        n: rv_discrete
-            Probability distribution for the number of items (or variables)
-        m: rv_discrete
-            Probability distribution for the number of knapsacks (or constraints)
-        w: rv_continuous
-            Probability distribution for the item weights
-        K: rv_continuous
-            Probability distribution for the profit correlation coefficient
-        u: rv_continuous
-            Probability distribution for the profit multiplier
-        alpha: rv_continuous
-            Probability distribution for the tightness ratio
-        fix_w: boolean
-            If true, weights are kept the same (minus the noise from w_jitter) in all
-            instances
-        w_jitter: rv_continuous
-            Probability distribution for random noise added to the weights
-        round: boolean
-            If true, all prices, weights and capacities are rounded to the nearest
-            integer
-        """
-        assert isinstance(n, rv_frozen), "n should be a SciPy probability distribution"
-        assert isinstance(m, rv_frozen), "m should be a SciPy probability distribution"
-        assert isinstance(w, rv_frozen), "w should be a SciPy probability distribution"
-        assert isinstance(K, rv_frozen), "K should be a SciPy probability distribution"
-        assert isinstance(u, rv_frozen), "u should be a SciPy probability distribution"
-        assert isinstance(
-            alpha, rv_frozen
-        ), "alpha should be a SciPy probability distribution"
-        assert isinstance(fix_w, bool), "fix_w should be boolean"
-        assert isinstance(
-            w_jitter, rv_frozen
-        ), "w_jitter should be a SciPy probability distribution"
-
-        self.n = n
-        self.m = m
-        self.w = w
-        self.u = u
-        self.K = K
-        self.alpha = alpha
-        self.w_jitter = w_jitter
-        self.p_jitter = p_jitter
-        self.round = round
-        self.fix_n: Optional[int] = None
-        self.fix_m: Optional[int] = None
-        self.fix_w: Optional[np.ndarray] = None
-        self.fix_u: Optional[np.ndarray] = None
-        self.fix_K: Optional[float] = None
-
-        if fix_w:
-            self.fix_n = self.n.rvs()
-            self.fix_m = self.m.rvs()
-            self.fix_w = np.array([self.w.rvs(self.fix_n) for _ in range(self.fix_m)])
-            self.fix_u = self.u.rvs(self.fix_n)
-            self.fix_K = self.K.rvs()
-
-    def generate(self, n_samples: int) -> List[MultiKnapsackData]:
-        def _sample() -> MultiKnapsackData:
-            if self.fix_w is not None:
-                assert self.fix_m is not None
-                assert self.fix_n is not None
-                assert self.fix_u is not None
-                assert self.fix_K is not None
-                n = self.fix_n
-                m = self.fix_m
-                w = self.fix_w
-                u = self.fix_u
-                K = self.fix_K
-            else:
-                n = self.n.rvs()
-                m = self.m.rvs()
-                w = np.array([self.w.rvs(n) for _ in range(m)])
-                u = self.u.rvs(n)
-                K = self.K.rvs()
-            w = w * np.array([self.w_jitter.rvs(n) for _ in range(m)])
-            alpha = self.alpha.rvs(m)
-            p = np.array(
-                [w[:, j].sum() / m + K * u[j] for j in range(n)]
-            ) * self.p_jitter.rvs(n)
-            b = np.array([w[i, :].sum() * alpha[i] for i in range(m)])
-            if self.round:
-                p = p.round()
-                b = b.round()
-                w = w.round()
-            return MultiKnapsackData(p, b, w)
-
-        return [_sample() for _ in range(n_samples)]
-
-
-def build_multiknapsack_model(data: MultiKnapsackData) -> pe.ConcreteModel:
-    model = pe.ConcreteModel()
-    m, n = data.weights.shape
-    model.x = pe.Var(range(n), domain=pe.Binary)
-    model.OBJ = pe.Objective(
-        expr=sum(-model.x[j] * data.prices[j] for j in range(n)),
-        sense=pe.minimize,
-    )
-    model.eq_capacity = pe.ConstraintList()
-    for i in range(m):
-        model.eq_capacity.add(
-            sum(model.x[j] * data.weights[i, j] for j in range(n)) <= data.capacities[i]
-        )
-    return model
--- a/miplearn/problems/multiknapsack.py
+++ b/miplearn/problems/multiknapsack.py
@@ -0,0 +1,189 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020-2022, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+
+from dataclasses import dataclass
+from typing import List, Optional, Union
+
+import gurobipy as gp
+import numpy as np
+from gurobipy import GRB
+from scipy.stats import uniform, randint
+from scipy.stats.distributions import rv_frozen
+
+from miplearn.io import read_pkl_gz
+from miplearn.solvers.gurobi import GurobiModel
+
+
+@dataclass
+class MultiKnapsackData:
+    """Data for the multi-dimensional knapsack problem
+
+    Args
+    ----
+    prices
+        Item prices.
+    capacities
+        Knapsack capacities.
+    weights
+        Matrix of item weights.
+    """
+
+    prices: np.ndarray
+    capacities: np.ndarray
+    weights: np.ndarray
+
+
+# noinspection PyPep8Naming
+class MultiKnapsackGenerator:
+    """Random instance generator for the multi-dimensional knapsack problem.
+
+    Instances have a random number of items (or variables) and a random number of
+    knapsacks (or constraints), as specified by the provided probability
+    distributions `n` and `m`, respectively. The weight of each item `i` on knapsack
+    `j` is sampled independently from the provided distribution `w`. The capacity of
+    knapsack `j` is set to ``alpha_j * sum(w[i,j] for i in range(n))``,
+    where `alpha_j`, the tightness ratio, is sampled from the provided probability
+    distribution `alpha`.
+
+    To make the instances more challenging, the costs of the items are linearly
+    correlated to their average weights. More specifically, the weight of each item
+    `i` is set to ``sum(w[i,j]/m for j in range(m)) + K * u_i``, where `K`,
+    the correlation coefficient, and `u_i`, the correlation multiplier, are sampled
+    from the provided probability distributions. Note that `K` is only sample once
+    for the entire instance.
+
+    If `fix_w=True`, then `weights[i,j]` are kept the same in all generated
+    instances. This also implies that n and m are kept fixed. Although the prices and
+    capacities are derived from `weights[i,j]`, as long as `u` and `K` are not
+    constants, the generated instances will still not be completely identical.
+
+    If a probability distribution `w_jitter` is provided, then item weights will be
+    set to ``w[i,j] * gamma[i,j]`` where `gamma[i,j]` is sampled from `w_jitter`.
+    When combined with `fix_w=True`, this argument may be used to generate instances
+    where the weight of each item is roughly the same, but not exactly identical,
+    across all instances. The prices of the items and the capacities of the knapsacks
+    will be calculated as above, but using these perturbed weights instead.
+
+    By default, all generated prices, weights and capacities are rounded to the
+    nearest integer number. If `round=False` is provided, this rounding will be
+    disabled.
+
+    Parameters
+    ----------
+    n: rv_discrete
+        Probability distribution for the number of items (or variables).
+    m: rv_discrete
+        Probability distribution for the number of knapsacks (or constraints).
+    w: rv_continuous
+        Probability distribution for the item weights.
+    K: rv_continuous
+        Probability distribution for the profit correlation coefficient.
+    u: rv_continuous
+        Probability distribution for the profit multiplier.
+    alpha: rv_continuous
+        Probability distribution for the tightness ratio.
+    fix_w: boolean
+        If true, weights are kept the same (minus the noise from w_jitter) in all
+        instances.
+    w_jitter: rv_continuous
+        Probability distribution for random noise added to the weights.
+    round: boolean
+        If true, all prices, weights and capacities are rounded to the nearest
+        integer.
+    """
+
+    def __init__(
+        self,
+        n: rv_frozen = randint(low=100, high=101),
+        m: rv_frozen = randint(low=30, high=31),
+        w: rv_frozen = randint(low=0, high=1000),
+        K: rv_frozen = randint(low=500, high=501),
+        u: rv_frozen = uniform(loc=0.0, scale=1.0),
+        alpha: rv_frozen = uniform(loc=0.25, scale=0.0),
+        fix_w: bool = False,
+        w_jitter: rv_frozen = uniform(loc=1.0, scale=0.0),
+        p_jitter: rv_frozen = uniform(loc=1.0, scale=0.0),
+        round: bool = True,
+    ):
+        assert isinstance(n, rv_frozen), "n should be a SciPy probability distribution"
+        assert isinstance(m, rv_frozen), "m should be a SciPy probability distribution"
+        assert isinstance(w, rv_frozen), "w should be a SciPy probability distribution"
+        assert isinstance(K, rv_frozen), "K should be a SciPy probability distribution"
+        assert isinstance(u, rv_frozen), "u should be a SciPy probability distribution"
+        assert isinstance(
+            alpha, rv_frozen
+        ), "alpha should be a SciPy probability distribution"
+        assert isinstance(fix_w, bool), "fix_w should be boolean"
+        assert isinstance(
+            w_jitter, rv_frozen
+        ), "w_jitter should be a SciPy probability distribution"
+
+        self.n = n
+        self.m = m
+        self.w = w
+        self.u = u
+        self.K = K
+        self.alpha = alpha
+        self.w_jitter = w_jitter
+        self.p_jitter = p_jitter
+        self.round = round
+        self.fix_n: Optional[int] = None
+        self.fix_m: Optional[int] = None
+        self.fix_w: Optional[np.ndarray] = None
+        self.fix_u: Optional[np.ndarray] = None
+        self.fix_K: Optional[float] = None
+
+        if fix_w:
+            self.fix_n = self.n.rvs()
+            self.fix_m = self.m.rvs()
+            self.fix_w = np.array([self.w.rvs(self.fix_n) for _ in range(self.fix_m)])
+            self.fix_u = self.u.rvs(self.fix_n)
+            self.fix_K = self.K.rvs()
+
+    def generate(self, n_samples: int) -> List[MultiKnapsackData]:
+        def _sample() -> MultiKnapsackData:
+            if self.fix_w is not None:
+                assert self.fix_m is not None
+                assert self.fix_n is not None
+                assert self.fix_u is not None
+                assert self.fix_K is not None
+                n = self.fix_n
+                m = self.fix_m
+                w = self.fix_w
+                u = self.fix_u
+                K = self.fix_K
+            else:
+                n = self.n.rvs()
+                m = self.m.rvs()
+                w = np.array([self.w.rvs(n) for _ in range(m)])
+                u = self.u.rvs(n)
+                K = self.K.rvs()
+            w = w * np.array([self.w_jitter.rvs(n) for _ in range(m)])
+            alpha = self.alpha.rvs(m)
+            p = np.array(
+                [w[:, j].sum() / m + K * u[j] for j in range(n)]
+            ) * self.p_jitter.rvs(n)
+            b = np.array([w[i, :].sum() * alpha[i] for i in range(m)])
+            if self.round:
+                p = p.round()
+                b = b.round()
+                w = w.round()
+            return MultiKnapsackData(p, b, w)
+
+        return [_sample() for _ in range(n_samples)]
+
+
+def build_multiknapsack_model(data: Union[str, MultiKnapsackData]) -> GurobiModel:
+    """Converts multi-knapsack problem data into a concrete Gurobipy model."""
+    if isinstance(data, str):
+        data = read_pkl_gz(data)
+    assert isinstance(data, MultiKnapsackData)
+
+    model = gp.Model()
+    m, n = data.weights.shape
+    x = model.addMVar(n, vtype=GRB.BINARY, name="x")
+    model.addConstr(data.weights @ x <= data.capacities)
+    model.setObjective(-data.prices @ x)
+    model.update()
+    return GurobiModel(model)
--- a/miplearn/problems/pmedian.py
+++ b/miplearn/problems/pmedian.py
@@ -0,0 +1,185 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020-2022, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+
+from dataclasses import dataclass
+from typing import List, Optional, Union
+
+import gurobipy as gp
+import numpy as np
+from gurobipy import quicksum, GRB
+from scipy.spatial.distance import pdist, squareform
+from scipy.stats import uniform, randint
+from scipy.stats.distributions import rv_frozen
+
+from miplearn.io import read_pkl_gz
+from miplearn.solvers.gurobi import GurobiModel
+
+
+@dataclass
+class PMedianData:
+    """Data for the capacitated p-median problem
+
+    Args
+    ----
+    distances
+        Matrix of distances between customer i and facility j.
+    demands
+        Customer demands.
+    p
+        Number of medians that need to be chosen.
+    capacities
+        Facility capacities.
+    """
+
+    distances: np.ndarray
+    demands: np.ndarray
+    p: int
+    capacities: np.ndarray
+
+
+class PMedianGenerator:
+    """Random generator for the capacitated p-median problem.
+
+    This class first decides the number of customers and the parameter `p` by
+    sampling the provided `n` and `p` distributions, respectively. Then, for each
+    customer `i`, the class builds its geographical location `(xi, yi)` by sampling
+    the provided `x` and `y` distributions. For each `i`, the demand for customer `i`
+    and the capacity of facility `i` are decided by sampling the distributions
+    `demands` and `capacities`, respectively. Finally, the costs `w[i,j]` are set to
+    the Euclidean distance between the locations of customers `i` and `j`.
+
+    If `fixed=True`, then the number of customers, their locations, the parameter
+    `p`, the demands and the capacities are only sampled from their respective
+    distributions exactly once, to build a reference instance which is then
+    perturbed. Specifically, for each perturbation, the distances, demands and
+    capacities are multiplied by factors sampled from the distributions
+    `distances_jitter`, `demands_jitter` and `capacities_jitter`, respectively. The
+    result is a list of instances that have the same set of customers, but slightly
+    different demands, capacities and distances.
+
+    Parameters
+    ----------
+    x
+        Probability distribution for the x-coordinate of the points.
+    y
+        Probability distribution for the y-coordinate of the points.
+    n
+        Probability distribution for the number of customer.
+    p
+        Probability distribution for the number of medians.
+    demands
+        Probability distribution for the customer demands.
+    capacities
+        Probability distribution for the facility capacities.
+    distances_jitter
+        Probability distribution for the random scaling factor applied to distances.
+    demands_jitter
+        Probability distribution for the random scaling factor applied to demands.
+    capacities_jitter
+        Probability distribution for the random scaling factor applied to capacities.
+    fixed
+        If `True`, then customer are kept the same across instances.
+    """
+
+    def __init__(
+        self,
+        x: rv_frozen = uniform(loc=0.0, scale=100.0),
+        y: rv_frozen = uniform(loc=0.0, scale=100.0),
+        n: rv_frozen = randint(low=100, high=101),
+        p: rv_frozen = randint(low=10, high=11),
+        demands: rv_frozen = uniform(loc=0, scale=20),
+        capacities: rv_frozen = uniform(loc=0, scale=100),
+        distances_jitter: rv_frozen = uniform(loc=1.0, scale=0.0),
+        demands_jitter: rv_frozen = uniform(loc=1.0, scale=0.0),
+        capacities_jitter: rv_frozen = uniform(loc=1.0, scale=0.0),
+        fixed: bool = True,
+    ):
+        self.x = x
+        self.y = y
+        self.n = n
+        self.p = p
+        self.demands = demands
+        self.capacities = capacities
+        self.distances_jitter = distances_jitter
+        self.demands_jitter = demands_jitter
+        self.capacities_jitter = capacities_jitter
+        self.fixed = fixed
+        self.ref_data: Optional[PMedianData] = None
+
+    def generate(self, n_samples: int) -> List[PMedianData]:
+        def _sample() -> PMedianData:
+            if self.ref_data is None:
+                n = self.n.rvs()
+                p = self.p.rvs()
+                loc = np.array([(self.x.rvs(), self.y.rvs()) for _ in range(n)])
+                distances = squareform(pdist(loc))
+                demands = self.demands.rvs(n)
+                capacities = self.capacities.rvs(n)
+            else:
+                n = self.ref_data.demands.shape[0]
+                distances = self.ref_data.distances * self.distances_jitter.rvs(
+                    size=(n, n)
+                )
+                distances = np.tril(distances) + np.triu(distances.T, 1)
+                demands = self.ref_data.demands * self.demands_jitter.rvs(n)
+                capacities = self.ref_data.capacities * self.capacities_jitter.rvs(n)
+                p = self.ref_data.p
+
+            data = PMedianData(
+                distances=distances.round(2),
+                demands=demands.round(2),
+                p=p,
+                capacities=capacities.round(2),
+            )
+
+            if self.fixed and self.ref_data is None:
+                self.ref_data = data
+
+            return data
+
+        return [_sample() for _ in range(n_samples)]
+
+
+def build_pmedian_model(data: Union[str, PMedianData]) -> GurobiModel:
+    """Converts capacitated p-median data into a concrete Gurobipy model."""
+    if isinstance(data, str):
+        data = read_pkl_gz(data)
+    assert isinstance(data, PMedianData)
+
+    model = gp.Model()
+    n = len(data.demands)
+
+    # Decision variables
+    x = model.addVars(n, n, vtype=GRB.BINARY, name="x")
+    y = model.addVars(n, vtype=GRB.BINARY, name="y")
+
+    # Objective function
+    model.setObjective(
+        quicksum(data.distances[i, j] * x[i, j] for i in range(n) for j in range(n))
+    )
+
+    # Eq: Must serve each customer
+    model.addConstrs(
+        (quicksum(x[i, j] for j in range(n)) == 1 for i in range(n)),
+        name="eq_demand",
+    )
+
+    # Eq: Must choose p medians
+    model.addConstr(
+        quicksum(y[j] for j in range(n)) == data.p,
+        name="eq_choose",
+    )
+
+    # Eq: Must not exceed capacity
+    model.addConstrs(
+        (
+            quicksum(data.demands[i] * x[i, j] for i in range(n))
+            <= data.capacities[j] * y[j]
+            for j in range(n)
+        ),
+        name="eq_capacity",
+    )
+
+    model.update()
+    return GurobiModel(model)
--- a/miplearn/problems/setcover.py
+++ b/miplearn/problems/setcover.py
@@ -0,0 +1,120 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020-2022, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+
+from dataclasses import dataclass
+from typing import List, Union
+
+import gurobipy as gp
+import numpy as np
+import pyomo.environ as pe
+from gurobipy.gurobipy import GRB
+from scipy.stats import uniform, randint
+from scipy.stats.distributions import rv_frozen
+
+from miplearn.io import read_pkl_gz
+from miplearn.solvers.gurobi import GurobiModel
+from miplearn.solvers.pyomo import PyomoModel
+
+
+@dataclass
+class SetCoverData:
+    costs: np.ndarray
+    incidence_matrix: np.ndarray
+
+
+class SetCoverGenerator:
+    def __init__(
+        self,
+        n_elements: rv_frozen = randint(low=50, high=51),
+        n_sets: rv_frozen = randint(low=100, high=101),
+        costs: rv_frozen = uniform(loc=0.0, scale=100.0),
+        costs_jitter: rv_frozen = uniform(loc=-5.0, scale=10.0),
+        K: rv_frozen = uniform(loc=25.0, scale=0.0),
+        density: rv_frozen = uniform(loc=0.02, scale=0.00),
+        fix_sets: bool = True,
+    ):
+        self.n_elements = n_elements
+        self.n_sets = n_sets
+        self.costs = costs
+        self.costs_jitter = costs_jitter
+        self.density = density
+        self.K = K
+        self.fix_sets = fix_sets
+        self.fixed_costs = None
+        self.fixed_matrix = None
+
+    def generate(self, n_samples: int) -> List[SetCoverData]:
+        def _sample() -> SetCoverData:
+            if self.fixed_matrix is None:
+                n_sets = self.n_sets.rvs()
+                n_elements = self.n_elements.rvs()
+                density = self.density.rvs()
+
+                incidence_matrix = np.random.rand(n_elements, n_sets) < density
+                incidence_matrix = incidence_matrix.astype(int)
+
+                # Ensure each element belongs to at least one set
+                for j in range(n_elements):
+                    if incidence_matrix[j, :].sum() == 0:
+                        incidence_matrix[j, randint(low=0, high=n_sets).rvs()] = 1
+
+                # Ensure each set contains at least one element
+                for i in range(n_sets):
+                    if incidence_matrix[:, i].sum() == 0:
+                        incidence_matrix[randint(low=0, high=n_elements).rvs(), i] = 1
+
+                costs = self.costs.rvs(n_sets) + self.K.rvs() * incidence_matrix.sum(
+                    axis=0
+                )
+                if self.fix_sets:
+                    self.fixed_matrix = incidence_matrix
+                    self.fixed_costs = costs
+            else:
+                incidence_matrix = self.fixed_matrix
+                (_, n_sets) = incidence_matrix.shape
+                costs = self.fixed_costs * self.costs_jitter.rvs(n_sets)
+            return SetCoverData(
+                costs=costs.round(2),
+                incidence_matrix=incidence_matrix,
+            )
+
+        return [_sample() for _ in range(n_samples)]
+
+
+def build_setcover_model_gurobipy(data: Union[str, SetCoverData]) -> GurobiModel:
+    data = _read_setcover_data(data)
+    (n_elements, n_sets) = data.incidence_matrix.shape
+    model = gp.Model()
+    x = model.addMVar(n_sets, vtype=GRB.BINARY, name="x")
+    model.addConstr(data.incidence_matrix @ x >= np.ones(n_elements), name="eqs")
+    model.setObjective(data.costs @ x)
+    model.update()
+    return GurobiModel(model)
+
+
+def build_setcover_model_pyomo(
+    data: Union[str, SetCoverData],
+    solver="gurobi_persistent",
+) -> PyomoModel:
+    data = _read_setcover_data(data)
+    (n_elements, n_sets) = data.incidence_matrix.shape
+    model = pe.ConcreteModel()
+    model.sets = pe.Set(initialize=range(n_sets))
+    model.x = pe.Var(model.sets, domain=pe.Boolean, name="x")
+    model.eqs = pe.Constraint(pe.Any)
+    for i in range(n_elements):
+        model.eqs[i] = (
+            sum(data.incidence_matrix[i, j] * model.x[j] for j in range(n_sets)) >= 1
+        )
+    model.obj = pe.Objective(
+        expr=sum(data.costs[j] * model.x[j] for j in range(n_sets))
+    )
+    return PyomoModel(model, solver)
+
+
+def _read_setcover_data(data: Union[str, SetCoverData]) -> SetCoverData:
+    if isinstance(data, str):
+        data = read_pkl_gz(data)
+    assert isinstance(data, SetCoverData)
+    return data
--- a/miplearn/problems/setpack.py
+++ b/miplearn/problems/setpack.py
@@ -0,0 +1,66 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020-2022, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+
+from dataclasses import dataclass
+from typing import List, Union
+
+import gurobipy as gp
+import numpy as np
+from gurobipy.gurobipy import GRB
+from scipy.stats import uniform, randint
+from scipy.stats.distributions import rv_frozen
+
+from .setcover import SetCoverGenerator
+from miplearn.solvers.gurobi import GurobiModel
+from ..io import read_pkl_gz
+
+
+@dataclass
+class SetPackData:
+    costs: np.ndarray
+    incidence_matrix: np.ndarray
+
+
+class SetPackGenerator:
+    def __init__(
+        self,
+        n_elements: rv_frozen = randint(low=50, high=51),
+        n_sets: rv_frozen = randint(low=100, high=101),
+        costs: rv_frozen = uniform(loc=0.0, scale=100.0),
+        costs_jitter: rv_frozen = uniform(loc=-5.0, scale=10.0),
+        K: rv_frozen = uniform(loc=25.0, scale=0.0),
+        density: rv_frozen = uniform(loc=0.02, scale=0.00),
+        fix_sets: bool = True,
+    ) -> None:
+        self.gen = SetCoverGenerator(
+            n_elements=n_elements,
+            n_sets=n_sets,
+            costs=costs,
+            costs_jitter=costs_jitter,
+            K=K,
+            density=density,
+            fix_sets=fix_sets,
+        )
+
+    def generate(self, n_samples: int) -> List[SetPackData]:
+        return [
+            SetPackData(
+                s.costs,
+                s.incidence_matrix,
+            )
+            for s in self.gen.generate(n_samples)
+        ]
+
+
+def build_setpack_model(data: Union[str, SetPackData]) -> GurobiModel:
+    if isinstance(data, str):
+        data = read_pkl_gz(data)
+    assert isinstance(data, SetPackData)
+    (n_elements, n_sets) = data.incidence_matrix.shape
+    model = gp.Model()
+    x = model.addMVar(n_sets, vtype=GRB.BINARY, name="x")
+    model.addConstr(data.incidence_matrix @ x <= np.ones(n_elements))
+    model.setObjective(-data.costs @ x)
+    model.update()
+    return GurobiModel(model)
--- a/miplearn/problems/stab.py
+++ b/miplearn/problems/stab.py
@@ -1,19 +1,22 @@
 #  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
+#  Copyright (C) 2020-2022, UChicago Argonne, LLC. All rights reserved.
 #  Released under the modified BSD license. See COPYING.md for more details.

 from dataclasses import dataclass
-from typing import List
+from typing import List, Union

+import gurobipy as gp
 import networkx as nx
 import numpy as np
 import pyomo.environ as pe
+from gurobipy import GRB, quicksum
 from networkx import Graph
-from overrides import overrides
 from scipy.stats import uniform, randint
 from scipy.stats.distributions import rv_frozen

-from miplearn.instance.base import Instance
+from miplearn.io import read_pkl_gz
+from miplearn.solvers.gurobi import GurobiModel
+from miplearn.solvers.pyomo import PyomoModel


@dataclass
@@ -22,36 +25,6 @@ class MaxWeightStableSetData:
    weights: np.ndarray


-class MaxWeightStableSetInstance(Instance):
-    """An instance of the Maximum-Weight Stable Set Problem.
-
-    Given a graph G=(V,E) and a weight w_v for each vertex v, the problem asks for a stable
-    set S of G maximizing sum(w_v for v in S). A stable set (also called independent set) is
-    a subset of vertices, no two of which are adjacent.
-
-    This is one of Karp's 21 NP-complete problems.
-    """
-
-    def __init__(self, graph: Graph, weights: np.ndarray) -> None:
-        super().__init__()
-        self.graph = graph
-        self.weights = weights
-        self.nodes = list(self.graph.nodes)
-
-    @overrides
-    def to_model(self) -> pe.ConcreteModel:
-        model = pe.ConcreteModel()
-        model.x = pe.Var(self.nodes, domain=pe.Binary)
-        model.OBJ = pe.Objective(
-            expr=sum(model.x[v] * self.weights[v] for v in self.nodes),
-            sense=pe.maximize,
-        )
-        model.clique_eqs = pe.ConstraintList()
-        for clique in nx.find_cliques(self.graph):
-            model.clique_eqs.add(sum(model.x[v] for v in clique) <= 1)
-        return model
-
-
 class MaxWeightStableSetGenerator:
    """Random instance generator for the Maximum-Weight Stable Set Problem.

@@ -100,7 +73,7 @@ class MaxWeightStableSetGenerator:
                graph = self.graph
            else:
                graph = self._generate_graph()
-            weights = self.w.rvs(graph.number_of_nodes())
+            weights = np.round(self.w.rvs(graph.number_of_nodes()), 2)
            return MaxWeightStableSetData(graph, weights)

        return [_sample() for _ in range(n_samples)]
@@ -109,15 +82,35 @@ class MaxWeightStableSetGenerator:
        return nx.generators.random_graphs.binomial_graph(self.n.rvs(), self.p.rvs())


-def build_stab_model(data: MaxWeightStableSetData) -> pe.ConcreteModel:
-    model = pe.ConcreteModel()
+def build_stab_model_gurobipy(data: MaxWeightStableSetData) -> GurobiModel:
+    data = _read_stab_data(data)
+    model = gp.Model()
    nodes = list(data.graph.nodes)
-    model.x = pe.Var(nodes, domain=pe.Binary)
-    model.OBJ = pe.Objective(
-        expr=sum(-model.x[v] * data.weights[v] for v in nodes),
-        sense=pe.minimize,
-    )
+    x = model.addVars(nodes, vtype=GRB.BINARY, name="x")
+    model.setObjective(quicksum(-data.weights[i] * x[i] for i in nodes))
+    for clique in nx.find_cliques(data.graph):
+        model.addConstr(quicksum(x[i] for i in clique) <= 1)
+    model.update()
+    return GurobiModel(model)
+
+
+def build_stab_model_pyomo(
+    data: MaxWeightStableSetData,
+    solver="gurobi_persistent",
+) -> PyomoModel:
+    data = _read_stab_data(data)
+    model = pe.ConcreteModel()
+    nodes = pe.Set(initialize=list(data.graph.nodes))
+    model.x = pe.Var(nodes, domain=pe.Boolean, name="x")
+    model.obj = pe.Objective(expr=sum([-data.weights[i] * model.x[i] for i in nodes]))
    model.clique_eqs = pe.ConstraintList()
    for clique in nx.find_cliques(data.graph):
-        model.clique_eqs.add(sum(model.x[v] for v in clique) <= 1)
-    return model
+        model.clique_eqs.add(expr=sum(model.x[i] for i in clique) <= 1)
+    return PyomoModel(model, solver)
+
+
+def _read_stab_data(data: Union[str, MaxWeightStableSetData]) -> MaxWeightStableSetData:
+    if isinstance(data, str):
+        data = read_pkl_gz(data)
+    assert isinstance(data, MaxWeightStableSetData)
+    return data
--- a/miplearn/problems/tsp.py
+++ b/miplearn/problems/tsp.py
@@ -1,22 +1,23 @@
 #  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
+#  Copyright (C) 2020-2022, UChicago Argonne, LLC. All rights reserved.
 #  Released under the modified BSD license. See COPYING.md for more details.

 from dataclasses import dataclass
-from typing import List, Tuple, Any, Optional, Dict
+from typing import List, Tuple, Optional, Any, Union

+import gurobipy as gp
 import networkx as nx
 import numpy as np
-import pyomo.environ as pe
-from overrides import overrides
+from gurobipy import quicksum, GRB, tuplelist
 from scipy.spatial.distance import pdist, squareform
 from scipy.stats import uniform, randint
 from scipy.stats.distributions import rv_frozen
+import logging

-from miplearn.instance.base import Instance
-from miplearn.solvers.learning import InternalSolver
-from miplearn.solvers.pyomo.base import BasePyomoSolver
-from miplearn.types import ConstraintName
+from miplearn.io import read_pkl_gz
+from miplearn.solvers.gurobi import GurobiModel
+
+logger = logging.getLogger(__name__)


@dataclass
@@ -25,80 +26,6 @@ class TravelingSalesmanData:
    distances: np.ndarray


-class TravelingSalesmanInstance(Instance):
-    """An instance ot the Traveling Salesman Problem.
-
-    Given a list of cities and the distance between each pair of cities, the problem
-    asks for the shortest route starting at the first city, visiting each other city
-    exactly once, then returning to the first city. This problem is a generalization
-    of the Hamiltonian path problem, one of Karp's 21 NP-complete problems.
-    """
-
-    def __init__(self, n_cities: int, distances: np.ndarray) -> None:
-        super().__init__()
-        assert isinstance(distances, np.ndarray)
-        assert distances.shape == (n_cities, n_cities)
-        self.n_cities = n_cities
-        self.distances = distances
-        self.edges = [
-            (i, j) for i in range(self.n_cities) for j in range(i + 1, self.n_cities)
-        ]
-
-    @overrides
-    def to_model(self) -> pe.ConcreteModel:
-        model = pe.ConcreteModel()
-        model.x = pe.Var(self.edges, domain=pe.Binary)
-        model.obj = pe.Objective(
-            expr=sum(model.x[i, j] * self.distances[i, j] for (i, j) in self.edges),
-            sense=pe.minimize,
-        )
-        model.eq_degree = pe.ConstraintList()
-        model.eq_subtour = pe.ConstraintList()
-        for i in range(self.n_cities):
-            model.eq_degree.add(
-                sum(
-                    model.x[min(i, j), max(i, j)]
-                    for j in range(self.n_cities)
-                    if i != j
-                )
-                == 2
-            )
-        return model
-
-    @overrides
-    def find_violated_lazy_constraints(
-        self,
-        solver: InternalSolver,
-        model: Any,
-    ) -> Dict[ConstraintName, List]:
-        selected_edges = [e for e in self.edges if model.x[e].value > 0.5]
-        graph = nx.Graph()
-        graph.add_edges_from(selected_edges)
-        violations = {}
-        for c in list(nx.connected_components(graph)):
-            if len(c) < self.n_cities:
-                cname = ("st[" + ",".join(map(str, c)) + "]").encode()
-                violations[cname] = list(c)
-        return violations
-
-    @overrides
-    def enforce_lazy_constraint(
-        self,
-        solver: InternalSolver,
-        model: Any,
-        component: List,
-    ) -> None:
-        assert isinstance(solver, BasePyomoSolver)
-        cut_edges = [
-            e
-            for e in self.edges
-            if (e[0] in component and e[1] not in component)
-            or (e[0] not in component and e[1] in component)
-        ]
-        constr = model.eq_subtour.add(expr=sum(model.x[e] for e in cut_edges) >= 2)
-        solver.add_constraint(constr)
-
-
 class TravelingSalesmanGenerator:
    """Random generator for the Traveling Salesman Problem."""

@@ -118,7 +45,7 @@ class TravelingSalesmanGenerator:
        distributions `n`, `x` and `y`. For each (unordered) pair of cities (i,j),
        the distance d[i,j] between them is set to:

-            d[i,j] = gamma[i,j] \sqrt{(x_i - x_j)^2 + (y_i - y_j)^2}
+            d[i,j] = gamma[i,j] \\sqrt{(x_i - x_j)^2 + (y_i - y_j)^2}

        where gamma is sampled from the provided probability distribution `gamma`.

@@ -183,3 +110,68 @@ class TravelingSalesmanGenerator:
        n = self.n.rvs()
        cities = np.array([(self.x.rvs(), self.y.rvs()) for _ in range(n)])
        return n, cities
+
+
+def build_tsp_model(data: Union[str, TravelingSalesmanData]) -> GurobiModel:
+    if isinstance(data, str):
+        data = read_pkl_gz(data)
+    assert isinstance(data, TravelingSalesmanData)
+
+    edges = tuplelist(
+        (i, j) for i in range(data.n_cities) for j in range(i + 1, data.n_cities)
+    )
+    model = gp.Model()
+
+    # Decision variables
+    x = model.addVars(edges, vtype=GRB.BINARY, name="x")
+
+    model._x = x
+    model._edges = edges
+    model._n_cities = data.n_cities
+
+    # Objective function
+    model.setObjective(quicksum(x[(i, j)] * data.distances[i, j] for (i, j) in edges))
+
+    # Eq: Must choose two edges adjacent to each node
+    model.addConstrs(
+        (
+            quicksum(x[min(i, j), max(i, j)] for j in range(data.n_cities) if i != j)
+            == 2
+            for i in range(data.n_cities)
+        ),
+        name="eq_degree",
+    )
+
+    def find_violations(model: GurobiModel) -> List[Any]:
+        violations = []
+        x = model.inner.cbGetSolution(model.inner._x)
+        selected_edges = [e for e in model.inner._edges if x[e] > 0.5]
+        graph = nx.Graph()
+        graph.add_edges_from(selected_edges)
+        for component in list(nx.connected_components(graph)):
+            if len(component) < model.inner._n_cities:
+                cut_edges = [
+                    e
+                    for e in model.inner._edges
+                    if (e[0] in component and e[1] not in component)
+                    or (e[0] not in component and e[1] in component)
+                ]
+                violations.append(cut_edges)
+        return violations
+
+    def fix_violations(model: GurobiModel, violations: List[Any], where: str) -> None:
+        for violation in violations:
+            constr = quicksum(model.inner._x[e[0], e[1]] for e in violation) >= 2
+            if where == "cb":
+                model.inner.cbLazy(constr)
+            else:
+                model.inner.addConstr(constr)
+        logger.info(f"tsp: added {len(violations)} subtour elimination constraints")
+
+    model.update()
+
+    return GurobiModel(
+        model,
+        find_violations=find_violations,
+        fix_violations=fix_violations,
+    )
--- a/miplearn/problems/uc.py
+++ b/miplearn/problems/uc.py
@@ -0,0 +1,201 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020-2022, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+
+from dataclasses import dataclass
+from math import pi
+from typing import List, Optional, Union
+
+import gurobipy as gp
+import numpy as np
+from gurobipy import GRB, quicksum
+from scipy.stats import uniform, randint
+from scipy.stats.distributions import rv_frozen
+
+from miplearn.io import read_pkl_gz
+from miplearn.solvers.gurobi import GurobiModel
+
+
+@dataclass
+class UnitCommitmentData:
+    demand: np.ndarray
+    min_power: np.ndarray
+    max_power: np.ndarray
+    min_uptime: np.ndarray
+    min_downtime: np.ndarray
+    cost_startup: np.ndarray
+    cost_prod: np.ndarray
+    cost_fixed: np.ndarray
+
+
+class UnitCommitmentGenerator:
+    def __init__(
+        self,
+        n_units: rv_frozen = randint(low=1_000, high=1_001),
+        n_periods: rv_frozen = randint(low=72, high=73),
+        max_power: rv_frozen = uniform(loc=50, scale=450),
+        min_power: rv_frozen = uniform(loc=0.5, scale=0.25),
+        cost_startup: rv_frozen = uniform(loc=0, scale=10_000),
+        cost_prod: rv_frozen = uniform(loc=0, scale=50),
+        cost_fixed: rv_frozen = uniform(loc=0, scale=1_000),
+        min_uptime: rv_frozen = randint(low=2, high=8),
+        min_downtime: rv_frozen = randint(low=2, high=8),
+        cost_jitter: rv_frozen = uniform(loc=0.75, scale=0.5),
+        demand_jitter: rv_frozen = uniform(loc=0.9, scale=0.2),
+        fix_units: bool = False,
+    ) -> None:
+        self.n_units = n_units
+        self.n_periods = n_periods
+        self.max_power = max_power
+        self.min_power = min_power
+        self.cost_startup = cost_startup
+        self.cost_prod = cost_prod
+        self.cost_fixed = cost_fixed
+        self.min_uptime = min_uptime
+        self.min_downtime = min_downtime
+        self.cost_jitter = cost_jitter
+        self.demand_jitter = demand_jitter
+        self.fix_units = fix_units
+        self.ref_data: Optional[UnitCommitmentData] = None
+
+    def generate(self, n_samples: int) -> List[UnitCommitmentData]:
+        def _sample() -> UnitCommitmentData:
+            if self.ref_data is None:
+                T = self.n_periods.rvs()
+                G = self.n_units.rvs()
+
+                # Generate unit parameteres
+                max_power = self.max_power.rvs(G)
+                min_power = max_power * self.min_power.rvs(G)
+                max_power = max_power
+                min_uptime = self.min_uptime.rvs(G)
+                min_downtime = self.min_downtime.rvs(G)
+                cost_startup = self.cost_startup.rvs(G)
+                cost_prod = self.cost_prod.rvs(G)
+                cost_fixed = self.cost_fixed.rvs(G)
+                capacity = max_power.sum()
+
+                # Generate periodic demand in the range [0.4, 0.8] * capacity, with a peak every 12 hours.
+                demand = np.sin([i / 6 * pi for i in range(T)])
+                demand *= uniform(loc=0, scale=1).rvs(T)
+                demand -= demand.min()
+                demand /= demand.max() / 0.4
+                demand += 0.4
+                demand *= capacity
+            else:
+                T, G = len(self.ref_data.demand), len(self.ref_data.max_power)
+                demand = self.ref_data.demand * self.demand_jitter.rvs(T)
+                min_power = self.ref_data.min_power
+                max_power = self.ref_data.max_power
+                min_uptime = self.ref_data.min_uptime
+                min_downtime = self.ref_data.min_downtime
+                cost_startup = self.ref_data.cost_startup * self.cost_jitter.rvs(G)
+                cost_prod = self.ref_data.cost_prod * self.cost_jitter.rvs(G)
+                cost_fixed = self.ref_data.cost_fixed * self.cost_jitter.rvs(G)
+
+            data = UnitCommitmentData(
+                demand.round(2),
+                min_power.round(2),
+                max_power.round(2),
+                min_uptime,
+                min_downtime,
+                cost_startup.round(2),
+                cost_prod.round(2),
+                cost_fixed.round(2),
+            )
+
+            if self.ref_data is None and self.fix_units:
+                self.ref_data = data
+
+            return data
+
+        return [_sample() for _ in range(n_samples)]
+
+
+def build_uc_model(data: Union[str, UnitCommitmentData]) -> GurobiModel:
+    """
+    Models the unit commitment problem according to equations (1)-(5) of:
+
+        Bendotti, P., Fouilhoux, P. & Rottner, C. The min-up/min-down unit
+        commitment polytope. J Comb Optim 36, 1024-1058 (2018).
+        https://doi.org/10.1007/s10878-018-0273-y
+
+    """
+    if isinstance(data, str):
+        data = read_pkl_gz(data)
+    assert isinstance(data, UnitCommitmentData)
+
+    T = len(data.demand)
+    G = len(data.min_power)
+    D = data.demand
+    Pmin, Pmax = data.min_power, data.max_power
+    L = data.min_uptime
+    l = data.min_downtime
+
+    model = gp.Model()
+    is_on = model.addVars(G, T, vtype=GRB.BINARY, name="is_on")
+    switch_on = model.addVars(G, T, vtype=GRB.BINARY, name="switch_on")
+    prod = model.addVars(G, T, name="prod")
+
+    # Objective function
+    model.setObjective(
+        quicksum(
+            is_on[g, t] * data.cost_fixed[g]
+            + switch_on[g, t] * data.cost_startup[g]
+            + prod[g, t] * data.cost_prod[g]
+            for g in range(G)
+            for t in range(T)
+        )
+    )
+
+    # Eq 1: Minimum up-time constraint: If unit g is down at time t, then it
+    # cannot have start up during the previous L[g] periods.
+    model.addConstrs(
+        (
+            quicksum(switch_on[g, k] for k in range(t - L[g] + 1, t + 1)) <= is_on[g, t]
+            for g in range(G)
+            for t in range(L[g] - 1, T)
+        ),
+        name="eq_min_uptime",
+    )
+
+    # Eq 2: Minimum down-time constraint: Symmetric to the minimum-up constraint.
+    model.addConstrs(
+        (
+            quicksum(switch_on[g, k] for k in range(t - l[g] + 1, t + 1))
+            <= 1 - is_on[g, t - l[g] + 1]
+            for g in range(G)
+            for t in range(l[g] - 1, T)
+        ),
+        name="eq_min_downtime",
+    )
+
+    # Eq 3: Ensures that if unit g start up at time t, then the start-up variable
+    # must be one.
+    model.addConstrs(
+        (
+            switch_on[g, t] >= is_on[g, t] - is_on[g, t - 1]
+            for g in range(G)
+            for t in range(1, T)
+        ),
+        name="eq_startup",
+    )
+
+    # Eq 4: Ensures that demand is satisfied at each time period.
+    model.addConstrs(
+        (quicksum(prod[g, t] for g in range(G)) >= D[t] for t in range(T)),
+        name="eq_demand",
+    )
+
+    # Eq 5: Sets the bounds to the quantity of power produced by each unit.
+    model.addConstrs(
+        (Pmin[g] * is_on[g, t] <= prod[g, t] for g in range(G) for t in range(T)),
+        name="eq_prod_lb",
+    )
+    model.addConstrs(
+        (prod[g, t] <= Pmax[g] * is_on[g, t] for g in range(G) for t in range(T)),
+        name="eq_prod_ub",
+    )
+    model.update()
+
+    return GurobiModel(model)
--- a/miplearn/problems/vertexcover.py
+++ b/miplearn/problems/vertexcover.py
@@ -0,0 +1,54 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020-2022, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+
+from dataclasses import dataclass
+from typing import List, Union
+
+import gurobipy as gp
+import numpy as np
+from gurobipy import GRB, quicksum
+from networkx import Graph
+from scipy.stats import uniform, randint
+from scipy.stats.distributions import rv_frozen
+
+from .stab import MaxWeightStableSetGenerator
+from miplearn.solvers.gurobi import GurobiModel
+from ..io import read_pkl_gz
+
+
+@dataclass
+class MinWeightVertexCoverData:
+    graph: Graph
+    weights: np.ndarray
+
+
+class MinWeightVertexCoverGenerator:
+    def __init__(
+        self,
+        w: rv_frozen = uniform(loc=10.0, scale=1.0),
+        n: rv_frozen = randint(low=250, high=251),
+        p: rv_frozen = uniform(loc=0.05, scale=0.0),
+        fix_graph: bool = True,
+    ):
+        self._generator = MaxWeightStableSetGenerator(w, n, p, fix_graph)
+
+    def generate(self, n_samples: int) -> List[MinWeightVertexCoverData]:
+        return [
+            MinWeightVertexCoverData(s.graph, s.weights)
+            for s in self._generator.generate(n_samples)
+        ]
+
+
+def build_vertexcover_model(data: Union[str, MinWeightVertexCoverData]) -> GurobiModel:
+    if isinstance(data, str):
+        data = read_pkl_gz(data)
+    assert isinstance(data, MinWeightVertexCoverData)
+    model = gp.Model()
+    nodes = list(data.graph.nodes)
+    x = model.addVars(nodes, vtype=GRB.BINARY, name="x")
+    model.setObjective(quicksum(data.weights[i] * x[i] for i in nodes))
+    for (v1, v2) in data.graph.edges:
+        model.addConstr(x[v1] + x[v2] >= 1)
+    model.update()
+    return GurobiModel(model)
--- a/miplearn/solvers/init.py
+++ b/miplearn/solvers/init.py
@@ -1,48 +1,3 @@
 #  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
+#  Copyright (C) 2020-2022, UChicago Argonne, LLC. All rights reserved.
 #  Released under the modified BSD license. See COPYING.md for more details.
-
-import logging
-import sys
-from typing import Any, List, TextIO, cast, TypeVar, Optional, Sized
-
-logger = logging.getLogger(__name__)
-
-
-class _RedirectOutput:
-    def __init__(self, streams: List[Any]) -> None:
-        self.streams = streams
-
-    def write(self, data: Any) -> None:
-        for stream in self.streams:
-            stream.write(data)
-
-    def flush(self) -> None:
-        for stream in self.streams:
-            stream.flush()
-
-    def __enter__(self) -> Any:
-        self._original_stdout = sys.stdout
-        self._original_stderr = sys.stderr
-        sys.stdout = cast(TextIO, self)
-        sys.stderr = cast(TextIO, self)
-        return self
-
-    def __exit__(
-        self,
-        _type: Any,
-        _value: Any,
-        _traceback: Any,
-    ) -> None:
-        sys.stdout = self._original_stdout
-        sys.stderr = self._original_stderr
-
-
-T = TypeVar("T", bound=Sized)
-
-
-def _none_if_empty(obj: T) -> Optional[T]:
-    if len(obj) == 0:
-        return None
-    else:
-        return obj
--- a/miplearn/solvers/abstract.py
+++ b/miplearn/solvers/abstract.py
@@ -0,0 +1,70 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020-2022, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+
+from abc import ABC, abstractmethod
+from typing import Optional, Dict
+
+import numpy as np
+
+from miplearn.h5 import H5File
+
+
+class AbstractModel(ABC):
+    _supports_basis_status = False
+    _supports_sensitivity_analysis = False
+    _supports_node_count = False
+    _supports_solution_pool = False
+
+    @abstractmethod
+    def add_constrs(
+        self,
+        var_names: np.ndarray,
+        constrs_lhs: np.ndarray,
+        constrs_sense: np.ndarray,
+        constrs_rhs: np.ndarray,
+        stats: Optional[Dict] = None,
+    ) -> None:
+        pass
+
+    @abstractmethod
+    def extract_after_load(self, h5: H5File) -> None:
+        pass
+
+    @abstractmethod
+    def extract_after_lp(self, h5: H5File) -> None:
+        pass
+
+    @abstractmethod
+    def extract_after_mip(self, h5: H5File) -> None:
+        pass
+
+    @abstractmethod
+    def fix_variables(
+        self,
+        var_names: np.ndarray,
+        var_values: np.ndarray,
+        stats: Optional[Dict] = None,
+    ) -> None:
+        pass
+
+    @abstractmethod
+    def optimize(self) -> None:
+        pass
+
+    @abstractmethod
+    def relax(self) -> "AbstractModel":
+        pass
+
+    @abstractmethod
+    def set_warm_starts(
+        self,
+        var_names: np.ndarray,
+        var_values: np.ndarray,
+        stats: Optional[Dict] = None,
+    ) -> None:
+        pass
+
+    @abstractmethod
+    def write(self, filename: str) -> None:
+        pass
--- a/miplearn/solvers/gurobi.py
+++ b/miplearn/solvers/gurobi.py
@@ -1,319 +1,216 @@
 #  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
+#  Copyright (C) 2020-2022, UChicago Argonne, LLC. All rights reserved.
 #  Released under the modified BSD license. See COPYING.md for more details.
-import logging
-import re
-import sys
-from io import StringIO
-from random import randint
-from typing import List, Any, Dict, Optional, TYPE_CHECKING
+from typing import Dict, Optional, Callable, Any, List

+import gurobipy as gp
+from gurobipy import GRB, GurobiError
 import numpy as np
-from overrides import overrides
-from scipy.sparse import coo_matrix, lil_matrix
+from scipy.sparse import lil_matrix

-from miplearn.instance.base import Instance
-from miplearn.solvers import _RedirectOutput
-from miplearn.solvers.internal import (
-    InternalSolver,
-    LPSolveStats,
-    IterationCallback,
-    LazyCallback,
-    MIPSolveStats,
-    Variables,
-    Constraints,
-)
-from miplearn.solvers.pyomo.base import PyomoTestInstanceKnapsack
-from miplearn.types import (
-    SolverParams,
-    UserCutCallback,
-    Solution,
-)
-
-if TYPE_CHECKING:
-    import gurobipy
-
-logger = logging.getLogger(__name__)
+from miplearn.h5 import H5File


-class GurobiSolver(InternalSolver):
-    """
-    An InternalSolver backed by Gurobi's Python API (without Pyomo).
-
-    Parameters
-    ----------
-    params: Optional[SolverParams]
-        Parameters to pass to Gurobi. For example, `params={"MIPGap": 1e-3}`
-        sets the gap tolerance to 1e-3.
-    lazy_cb_frequency: int
-        If 1, calls lazy constraint callbacks whenever an integer solution
-        is found. If 2, calls it also at every node, after solving the
-        LP relaxation of that node.
-    """
+class GurobiModel:
+    _supports_basis_status = True
+    _supports_sensitivity_analysis = True
+    _supports_node_count = True
+    _supports_solution_pool = True

    def __init__(
        self,
-        params: Optional[SolverParams] = None,
-        lazy_cb_frequency: int = 1,
+        inner: gp.Model,
+        find_violations: Optional[Callable] = None,
+        fix_violations: Optional[Callable] = None,
    ) -> None:
-        import gurobipy
+        self.fix_violations = fix_violations
+        self.find_violations = find_violations
+        self.inner = inner
+        self.violations_: Optional[List[Any]] = None

-        assert lazy_cb_frequency in [1, 2]
-        if params is None:
-            params = {}
-        params["InfUnbdInfo"] = True
-        params["Seed"] = randint(0, 1_000_000)
+    def add_constrs(
+        self,
+        var_names: np.ndarray,
+        constrs_lhs: np.ndarray,
+        constrs_sense: np.ndarray,
+        constrs_rhs: np.ndarray,
+        stats: Optional[Dict] = None,
+    ) -> None:
+        assert len(var_names.shape) == 1
+        nvars = len(var_names)
+        assert len(constrs_lhs.shape) == 2
+        nconstrs = constrs_lhs.shape[0]
+        assert constrs_lhs.shape[1] == nvars
+        assert constrs_sense.shape == (nconstrs,)
+        assert constrs_rhs.shape == (nconstrs,)

-        self.gp = gurobipy
-        self.instance: Optional[Instance] = None
-        self.model: Optional["gurobipy.Model"] = None
-        self.params: SolverParams = params
-        self.cb_where: Optional[int] = None
-        self.lazy_cb_frequency = lazy_cb_frequency
-        self._dirty = True
-        self._has_lp_solution = False
-        self._has_mip_solution = False
+        gp_vars = [self.inner.getVarByName(var_name.decode()) for var_name in var_names]
+        self.inner.addMConstr(constrs_lhs, gp_vars, constrs_sense, constrs_rhs)

-        self._varname_to_var: Dict[bytes, "gurobipy.Var"] = {}
-        self._cname_to_constr: Dict[str, "gurobipy.Constr"] = {}
-        self._gp_vars: List["gurobipy.Var"] = []
-        self._gp_constrs: List["gurobipy.Constr"] = []
-        self._var_names: np.ndarray = np.empty(0)
-        self._constr_names: List[str] = []
-        self._var_types: np.ndarray = np.empty(0)
-        self._var_lbs: np.ndarray = np.empty(0)
-        self._var_ubs: np.ndarray = np.empty(0)
-        self._var_obj_coeffs: np.ndarray = np.empty(0)
+        if stats is not None:
+            if "Added constraints" not in stats:
+                stats["Added constraints"] = 0
+            stats["Added constraints"] += nconstrs

-        if self.lazy_cb_frequency == 1:
-            self.lazy_cb_where = [self.gp.GRB.Callback.MIPSOL]
+    def extract_after_load(self, h5: H5File) -> None:
+        """
+        Given a model that has just been loaded, extracts static problem
+        features, such as variable names and types, objective coefficients, etc.
+        """
+        self.inner.update()
+        self._extract_after_load_vars(h5)
+        self._extract_after_load_constrs(h5)
+        h5.put_scalar("static_sense", "min" if self.inner.modelSense > 0 else "max")
+        h5.put_scalar("static_obj_offset", self.inner.objCon)
+
+    def extract_after_lp(self, h5: H5File) -> None:
+        """
+        Given a linear programming model that has just been solved, extracts
+        dynamic problem features, such as optimal LP solution, basis status,
+        etc.
+        """
+        self._extract_after_lp_vars(h5)
+        self._extract_after_lp_constrs(h5)
+        h5.put_scalar("lp_obj_value", self.inner.objVal)
+        h5.put_scalar("lp_wallclock_time", self.inner.runtime)
+
+    def extract_after_mip(self, h5: H5File) -> None:
+        """
+        Given a mixed-integer linear programming model that has just been
+        solved, extracts dynamic problem features, such as optimal MIP solution.
+        """
+        h5.put_scalar("mip_wallclock_time", self.inner.runtime)
+        h5.put_scalar("mip_node_count", self.inner.nodeCount)
+        if self.inner.status == GRB.INFEASIBLE:
+            return
+        gp_vars = self.inner.getVars()
+        gp_constrs = self.inner.getConstrs()
+        h5.put_array(
+            "mip_var_values",
+            np.array(self.inner.getAttr("x", gp_vars), dtype=float),
+        )
+        h5.put_array(
+            "mip_constr_slacks",
+            np.abs(np.array(self.inner.getAttr("slack", gp_constrs), dtype=float)),
+        )
+        h5.put_scalar("mip_obj_value", self.inner.objVal)
+        h5.put_scalar("mip_obj_bound", self.inner.objBound)
+        try:
+            h5.put_scalar("mip_gap", self.inner.mipGap)
+        except AttributeError:
+            pass
+        self._extract_after_mip_solution_pool(h5)
+
+    def fix_variables(
+        self,
+        var_names: np.ndarray,
+        var_values: np.ndarray,
+        stats: Optional[Dict] = None,
+    ) -> None:
+        assert len(var_values.shape) == 1
+        assert len(var_values.shape) == 1
+        assert var_names.shape == var_values.shape
+
+        n_fixed = 0
+        for (var_idx, var_name) in enumerate(var_names):
+            var_val = var_values[var_idx]
+            if np.isfinite(var_val):
+                var = self.inner.getVarByName(var_name.decode())
+                var.vtype = "C"
+                var.lb = var_val
+                var.ub = var_val
+                n_fixed += 1
+        if stats is not None:
+            stats["Fixed variables"] = n_fixed
+
+    def optimize(self) -> None:
+        self.violations_ = []
+
+        def callback(m: gp.Model, where: int) -> None:
+            assert self.find_violations is not None
+            assert self.violations_ is not None
+            assert self.fix_violations is not None
+            if where == GRB.Callback.MIPSOL:
+                violations = self.find_violations(self)
+                self.violations_.extend(violations)
+                self.fix_violations(self, violations, "cb")
+
+        if self.fix_violations is not None:
+            self.inner.Params.lazyConstraints = 1
+            self.inner.optimize(callback)
        else:
-            self.lazy_cb_where = [
-                self.gp.GRB.Callback.MIPSOL,
-                self.gp.GRB.Callback.MIPNODE,
-            ]
+            self.inner.optimize()

-    @overrides
-    def add_constraints(self, cf: Constraints) -> None:
-        assert cf.names is not None
-        assert cf.senses is not None
-        assert cf.lhs is not None
-        assert cf.rhs is not None
-        assert self.model is not None
-        lhs = cf.lhs.tocsr()
-        for i in range(len(cf.names)):
-            sense = cf.senses[i]
-            row = lhs[i, :]
-            row_expr = self.gp.quicksum(
-                self._gp_vars[row.indices[j]] * row.data[j] for j in range(row.getnnz())
+    def relax(self) -> "GurobiModel":
+        return GurobiModel(self.inner.relax())
+
+    def set_time_limit(self, time_limit_sec: float) -> None:
+        self.inner.params.timeLimit = time_limit_sec
+
+    def set_warm_starts(
+        self,
+        var_names: np.ndarray,
+        var_values: np.ndarray,
+        stats: Optional[Dict] = None,
+    ) -> None:
+        assert len(var_values.shape) == 2
+        (n_starts, n_vars) = var_values.shape
+        assert len(var_names.shape) == 1
+        assert var_names.shape[0] == n_vars
+
+        self.inner.numStart = n_starts
+        for start_idx in range(n_starts):
+            self.inner.params.startNumber = start_idx
+            for (var_idx, var_name) in enumerate(var_names):
+                var_val = var_values[start_idx, var_idx]
+                if np.isfinite(var_val):
+                    var = self.inner.getVarByName(var_name.decode())
+                    var.start = var_val
+
+        if stats is not None:
+            stats["WS: Count"] = n_starts
+            stats["WS: Number of variables set"] = (
+                np.isfinite(var_values).mean(axis=0).sum()
            )
-            if sense == b"=":
-                self.model.addConstr(row_expr == cf.rhs[i], name=cf.names[i])
-            elif sense == b"<":
-                self.model.addConstr(row_expr <= cf.rhs[i], name=cf.names[i])
-            elif sense == b">":
-                self.model.addConstr(row_expr >= cf.rhs[i], name=cf.names[i])
-            else:
-                raise Exception(f"Unknown sense: {sense}")
-        self.model.update()
-        self._dirty = True
-        self._has_lp_solution = False
-        self._has_mip_solution = False

-    @overrides
-    def are_callbacks_supported(self) -> bool:
-        return True
-
-    @overrides
-    def are_constraints_satisfied(
-        self,
-        cf: Constraints,
-        tol: float = 1e-5,
-    ) -> List[bool]:
-        assert cf.names is not None
-        assert cf.senses is not None
-        assert cf.lhs is not None
-        assert cf.rhs is not None
-        assert self.model is not None
-        result = []
-        x = np.array(self.model.getAttr("x", self.model.getVars()))
-        lhs = cf.lhs.tocsr() * x
-        for i in range(len(cf.names)):
-            sense = cf.senses[i]
-            if sense == b"<":
-                result.append(lhs[i] <= cf.rhs[i] + tol)
-            elif sense == b">":
-                result.append(lhs[i] >= cf.rhs[i] - tol)
-            elif sense == b"<":
-                result.append(abs(cf.rhs[i] - lhs[i]) <= tol)
-            else:
-                raise Exception(f"unknown sense: {sense}")
-        return result
-
-    @overrides
-    def build_test_instance_infeasible(self) -> Instance:
-        return GurobiTestInstanceInfeasible()
-
-    @overrides
-    def build_test_instance_knapsack(self) -> Instance:
-        return GurobiTestInstanceKnapsack(
-            weights=[23.0, 26.0, 20.0, 18.0],
-            prices=[505.0, 352.0, 458.0, 220.0],
-            capacity=67.0,
-        )
-
-    @overrides
-    def clone(self) -> "GurobiSolver":
-        return GurobiSolver(
-            params=self.params,
-            lazy_cb_frequency=self.lazy_cb_frequency,
-        )
-
-    @overrides
-    def fix(self, solution: Solution) -> None:
-        self._raise_if_callback()
-        for (varname, value) in solution.items():
-            if value is None:
-                continue
-            var = self._varname_to_var[varname]
-            var.vtype = self.gp.GRB.CONTINUOUS
-            var.lb = value
-            var.ub = value
-
-    @overrides
-    def get_constraint_attrs(self) -> List[str]:
-        return [
-            "basis_status",
-            "categories",
-            "dual_values",
-            "lazy",
-            "lhs",
-            "names",
-            "rhs",
-            "sa_rhs_down",
-            "sa_rhs_up",
-            "senses",
-            "slacks",
-            "user_features",
-        ]
-
-    @overrides
-    def get_constraints(
-        self,
-        with_static: bool = True,
-        with_sa: bool = True,
-        with_lhs: bool = True,
-    ) -> Constraints:
-        model = self.model
-        assert model is not None
-        assert model.numVars == len(self._gp_vars)
-
-        def _parse_gurobi_cbasis(v: int) -> str:
-            if v == 0:
-                return "B"
-            if v == -1:
-                return "N"
-            raise Exception(f"unknown cbasis: {v}")
-
-        gp_constrs = model.getConstrs()
-        constr_names = np.array(model.getAttr("constrName", gp_constrs), dtype="S")
-        lhs: Optional[coo_matrix] = None
-        rhs, senses, slacks, basis_status = None, None, None, None
-        dual_value, basis_status, sa_rhs_up, sa_rhs_down = None, None, None, None
-
-        if with_static:
-            rhs = np.array(model.getAttr("rhs", gp_constrs), dtype=float)
-            senses = np.array(model.getAttr("sense", gp_constrs), dtype="S")
-            if with_lhs:
-                nrows = len(gp_constrs)
-                ncols = len(self._var_names)
-                tmp = lil_matrix((nrows, ncols), dtype=float)
-                for (i, gp_constr) in enumerate(gp_constrs):
-                    expr = model.getRow(gp_constr)
-                    for j in range(expr.size()):
-                        tmp[i, expr.getVar(j).index] = expr.getCoeff(j)
-                lhs = tmp.tocoo()
-
-        if self._has_lp_solution:
-            dual_value = np.array(model.getAttr("pi", gp_constrs), dtype=float)
-            basis_status = np.array(
-                [_parse_gurobi_cbasis(c) for c in model.getAttr("cbasis", gp_constrs)],
-                dtype="S",
+    def _extract_after_load_vars(self, h5: H5File) -> None:
+        gp_vars = self.inner.getVars()
+        for (h5_field, gp_field) in {
+            "static_var_names": "varName",
+            "static_var_types": "vtype",
+        }.items():
+            h5.put_array(
+                h5_field, np.array(self.inner.getAttr(gp_field, gp_vars), dtype="S")
+            )
+        for (h5_field, gp_field) in {
+            "static_var_upper_bounds": "ub",
+            "static_var_lower_bounds": "lb",
+            "static_var_obj_coeffs": "obj",
+        }.items():
+            h5.put_array(
+                h5_field, np.array(self.inner.getAttr(gp_field, gp_vars), dtype=float)
            )
-            if with_sa:
-                sa_rhs_up = np.array(model.getAttr("saRhsUp", gp_constrs), dtype=float)
-                sa_rhs_down = np.array(
-                    model.getAttr("saRhsLow", gp_constrs), dtype=float
-                )

-        if self._has_lp_solution or self._has_mip_solution:
-            slacks = np.array(model.getAttr("slack", gp_constrs), dtype=float)
+    def _extract_after_load_constrs(self, h5: H5File) -> None:
+        gp_constrs = self.inner.getConstrs()
+        gp_vars = self.inner.getVars()
+        rhs = np.array(self.inner.getAttr("rhs", gp_constrs), dtype=float)
+        senses = np.array(self.inner.getAttr("sense", gp_constrs), dtype="S")
+        names = np.array(self.inner.getAttr("constrName", gp_constrs), dtype="S")
+        nrows, ncols = len(gp_constrs), len(gp_vars)
+        tmp = lil_matrix((nrows, ncols), dtype=float)
+        for (i, gp_constr) in enumerate(gp_constrs):
+            expr = self.inner.getRow(gp_constr)
+            for j in range(expr.size()):
+                tmp[i, expr.getVar(j).index] = expr.getCoeff(j)
+        lhs = tmp.tocoo()

-        return Constraints(
-            basis_status=basis_status,
-            dual_values=dual_value,
-            lhs=lhs,
-            names=constr_names,
-            rhs=rhs,
-            sa_rhs_down=sa_rhs_down,
-            sa_rhs_up=sa_rhs_up,
-            senses=senses,
-            slacks=slacks,
-        )
-
-    @overrides
-    def get_solution(self) -> Optional[Solution]:
-        assert self.model is not None
-        if self.cb_where is not None:
-            if self.cb_where == self.gp.GRB.Callback.MIPNODE:
-                return {
-                    v.varName.encode(): self.model.cbGetNodeRel(v)
-                    for v in self.model.getVars()
-                }
-            elif self.cb_where == self.gp.GRB.Callback.MIPSOL:
-                return {
-                    v.varName.encode(): self.model.cbGetSolution(v)
-                    for v in self.model.getVars()
-                }
-            else:
-                raise Exception(
-                    f"get_solution can only be called from a callback "
-                    f"when cb_where is either MIPNODE or MIPSOL"
-                )
-        if self.model.solCount == 0:
-            return None
-        return {v.varName.encode(): v.x for v in self.model.getVars()}
-
-    @overrides
-    def get_variable_attrs(self) -> List[str]:
-        return [
-            "names",
-            "basis_status",
-            "categories",
-            "lower_bounds",
-            "obj_coeffs",
-            "reduced_costs",
-            "sa_lb_down",
-            "sa_lb_up",
-            "sa_obj_down",
-            "sa_obj_up",
-            "sa_ub_down",
-            "sa_ub_up",
-            "types",
-            "upper_bounds",
-            "user_features",
-            "values",
-        ]
-
-    @overrides
-    def get_variables(
-        self,
-        with_static: bool = True,
-        with_sa: bool = True,
-    ) -> Variables:
-        model = self.model
-        assert model is not None
+        h5.put_array("static_constr_names", names)
+        h5.put_array("static_constr_rhs", rhs)
+        h5.put_array("static_constr_sense", senses)
+        h5.put_sparse("static_constr_lhs", lhs)

+    def _extract_after_lp_vars(self, h5: H5File) -> None:
        def _parse_gurobi_vbasis(b: int) -> str:
            if b == 0:
                return "B"
@@ -324,393 +221,81 @@ class GurobiSolver(InternalSolver):
            elif b == -3:
                return "S"
            else:
-                raise Exception(f"unknown vbasis: {basis_status}")
+                raise Exception(f"unknown vbasis: {b}")

-        basis_status: Optional[np.ndarray] = None
-        upper_bounds, lower_bounds, types, values = None, None, None, None
-        obj_coeffs, reduced_costs = None, None
-        sa_obj_up, sa_ub_up, sa_lb_up = None, None, None
-        sa_obj_down, sa_ub_down, sa_lb_down = None, None, None
-
-        if with_static:
-            upper_bounds = self._var_ubs
-            lower_bounds = self._var_lbs
-            types = self._var_types
-            obj_coeffs = self._var_obj_coeffs
-
-        if self._has_lp_solution:
-            reduced_costs = np.array(model.getAttr("rc", self._gp_vars), dtype=float)
-            basis_status = np.array(
+        gp_vars = self.inner.getVars()
+        h5.put_array(
+            "lp_var_basis_status",
+            np.array(
                [
                    _parse_gurobi_vbasis(b)
-                    for b in model.getAttr("vbasis", self._gp_vars)
+                    for b in self.inner.getAttr("vbasis", gp_vars)
                ],
                dtype="S",
+            ),
+        )
+        for (h5_field, gp_field) in {
+            "lp_var_reduced_costs": "rc",
+            "lp_var_sa_obj_up": "saobjUp",
+            "lp_var_sa_obj_down": "saobjLow",
+            "lp_var_sa_ub_up": "saubUp",
+            "lp_var_sa_ub_down": "saubLow",
+            "lp_var_sa_lb_up": "salbUp",
+            "lp_var_sa_lb_down": "salbLow",
+            "lp_var_values": "x",
+        }.items():
+            h5.put_array(
+                h5_field,
+                np.array(self.inner.getAttr(gp_field, gp_vars), dtype=float),
            )

-            if with_sa:
-                sa_obj_up = np.array(
-                    model.getAttr("saobjUp", self._gp_vars),
-                    dtype=float,
-                )
-                sa_obj_down = np.array(
-                    model.getAttr("saobjLow", self._gp_vars),
-                    dtype=float,
-                )
-                sa_ub_up = np.array(
-                    model.getAttr("saubUp", self._gp_vars),
-                    dtype=float,
-                )
-                sa_ub_down = np.array(
-                    model.getAttr("saubLow", self._gp_vars),
-                    dtype=float,
-                )
-                sa_lb_up = np.array(
-                    model.getAttr("salbUp", self._gp_vars),
-                    dtype=float,
-                )
-                sa_lb_down = np.array(
-                    model.getAttr("salbLow", self._gp_vars),
-                    dtype=float,
-                )
+    def _extract_after_lp_constrs(self, h5: H5File) -> None:
+        def _parse_gurobi_cbasis(v: int) -> str:
+            if v == 0:
+                return "B"
+            if v == -1:
+                return "N"
+            raise Exception(f"unknown cbasis: {v}")

-        if model.solCount > 0:
-            values = np.array(model.getAttr("x", self._gp_vars), dtype=float)
-
-        return Variables(
-            names=self._var_names,
-            upper_bounds=upper_bounds,
-            lower_bounds=lower_bounds,
-            types=types,
-            obj_coeffs=obj_coeffs,
-            reduced_costs=reduced_costs,
-            basis_status=basis_status,
-            sa_obj_up=sa_obj_up,
-            sa_obj_down=sa_obj_down,
-            sa_ub_up=sa_ub_up,
-            sa_ub_down=sa_ub_down,
-            sa_lb_up=sa_lb_up,
-            sa_lb_down=sa_lb_down,
-            values=values,
+        gp_constrs = self.inner.getConstrs()
+        h5.put_array(
+            "lp_constr_basis_status",
+            np.array(
+                [
+                    _parse_gurobi_cbasis(c)
+                    for c in self.inner.getAttr("cbasis", gp_constrs)
+                ],
+                dtype="S",
+            ),
+        )
+        for (h5_field, gp_field) in {
+            "lp_constr_dual_values": "pi",
+            "lp_constr_sa_rhs_up": "saRhsUp",
+            "lp_constr_sa_rhs_down": "saRhsLow",
+        }.items():
+            h5.put_array(
+                h5_field,
+                np.array(self.inner.getAttr(gp_field, gp_constrs), dtype=float),
+            )
+        h5.put_array(
+            "lp_constr_slacks",
+            np.abs(np.array(self.inner.getAttr("slack", gp_constrs), dtype=float)),
        )

-    @overrides
-    def is_infeasible(self) -> bool:
-        assert self.model is not None
-        return self.model.status in [self.gp.GRB.INFEASIBLE, self.gp.GRB.INF_OR_UNBD]
-
-    @overrides
-    def remove_constraints(self, names: List[str]) -> None:
-        assert self.model is not None
-        constrs = [self.model.getConstrByName(n) for n in names]
-        self.model.remove(constrs)
-        self.model.update()
-
-    @overrides
-    def set_instance(
-        self,
-        instance: Instance,
-        model: Any = None,
-    ) -> None:
-        self._raise_if_callback()
-        if model is None:
-            model = instance.to_model()
-        assert isinstance(model, self.gp.Model)
-        self.instance = instance
-        self.model = model
-        self.model.update()
-        self._update()
-
-    @overrides
-    def set_warm_start(self, solution: Solution) -> None:
-        self._raise_if_callback()
-        self._clear_warm_start()
-        for (var_name, value) in solution.items():
-            var = self._varname_to_var[var_name]
-            if value is not None:
-                var.start = value
-
-    @overrides
-    def solve(
-        self,
-        tee: bool = False,
-        iteration_cb: Optional[IterationCallback] = None,
-        lazy_cb: Optional[LazyCallback] = None,
-        user_cut_cb: Optional[UserCutCallback] = None,
-    ) -> MIPSolveStats:
-        self._raise_if_callback()
-        assert self.model is not None
-        if iteration_cb is None:
-            iteration_cb = lambda: False
-        callback_exceptions = []
-
-        # Create callback wrapper
-        def cb_wrapper(cb_model: Any, cb_where: int) -> None:
+    def _extract_after_mip_solution_pool(self, h5: H5File) -> None:
+        gp_vars = self.inner.getVars()
+        pool_var_values = []
+        pool_obj_values = []
+        for i in range(self.inner.SolCount):
+            self.inner.params.SolutionNumber = i
            try:
-                self.cb_where = cb_where
-                if lazy_cb is not None and cb_where in self.lazy_cb_where:
-                    lazy_cb(self, self.model)
-                if user_cut_cb is not None and cb_where == self.gp.GRB.Callback.MIPNODE:
-                    user_cut_cb(self, self.model)
-            except Exception as e:
-                logger.exception("callback error")
-                callback_exceptions.append(e)
-            finally:
-                self.cb_where = None
+                pool_var_values.append(self.inner.getAttr("Xn", gp_vars))
+                pool_obj_values.append(self.inner.PoolObjVal)
+            except GurobiError:
+                pass
+        h5.put_array("pool_var_values", np.array(pool_var_values))
+        h5.put_array("pool_obj_values", np.array(pool_obj_values))

-        # Configure Gurobi
-        if lazy_cb is not None:
-            self.params["LazyConstraints"] = 1
-        if user_cut_cb is not None:
-            self.params["PreCrush"] = 1
-
-        # Solve problem
-        total_wallclock_time = 0
-        total_nodes = 0
-        streams: List[Any] = [StringIO()]
-        if tee:
-            streams += [sys.stdout]
-        self._apply_params(streams)
-        while True:
-            with _RedirectOutput(streams):
-                self.model.optimize(cb_wrapper)
-                self._dirty = False
-            if len(callback_exceptions) > 0:
-                raise callback_exceptions[0]
-            total_wallclock_time += self.model.runtime
-            total_nodes += int(self.model.nodeCount)
-            should_repeat = iteration_cb()
-            if not should_repeat:
-                break
-        self._has_lp_solution = False
-        self._has_mip_solution = self.model.solCount > 0
-
-        # Fetch results and stats
-        log = streams[0].getvalue()
-        ub, lb = None, None
-        sense = "min" if self.model.modelSense == 1 else "max"
-        if self.model.solCount > 0:
-            if self.model.modelSense == 1:
-                lb = self.model.objBound
-                ub = self.model.objVal
-            else:
-                lb = self.model.objVal
-                ub = self.model.objBound
-        ws_value = self._extract_warm_start_value(log)
-        return MIPSolveStats(
-            mip_lower_bound=lb,
-            mip_upper_bound=ub,
-            mip_wallclock_time=total_wallclock_time,
-            mip_nodes=total_nodes,
-            mip_sense=sense,
-            mip_log=log,
-            mip_warm_start_value=ws_value,
-        )
-
-    @overrides
-    def solve_lp(
-        self,
-        tee: bool = False,
-    ) -> LPSolveStats:
-        self._raise_if_callback()
-        streams: List[Any] = [StringIO()]
-        if tee:
-            streams += [sys.stdout]
-        self._apply_params(streams)
-        assert self.model is not None
-        for (i, var) in enumerate(self._gp_vars):
-            if self._var_types[i] == b"B":
-                var.vtype = self.gp.GRB.CONTINUOUS
-                var.lb = 0.0
-                var.ub = 1.0
-            elif self._var_types[i] == b"I":
-                var.vtype = self.gp.GRB.CONTINUOUS
-        with _RedirectOutput(streams):
-            self.model.optimize()
-            self._dirty = False
-        for (i, var) in enumerate(self._gp_vars):
-            if self._var_types[i] == b"B":
-                var.vtype = self.gp.GRB.BINARY
-            elif self._var_types[i] == b"I":
-                var.vtype = self.gp.GRB.INTEGER
-        log = streams[0].getvalue()
-        self._has_lp_solution = self.model.solCount > 0
-        self._has_mip_solution = False
-        opt_value = None
-        if not self.is_infeasible():
-            opt_value = self.model.objVal
-        return LPSolveStats(
-            lp_value=opt_value,
-            lp_log=log,
-            lp_wallclock_time=self.model.runtime,
-        )
-
-    def _apply_params(self, streams: List[Any]) -> None:
-        assert self.model is not None
-        with _RedirectOutput(streams):
-            for (name, value) in self.params.items():
-                self.model.setParam(name, value)
-
-    def _clear_warm_start(self) -> None:
-        for var in self._varname_to_var.values():
-            var.start = self.gp.GRB.UNDEFINED
-
-    @staticmethod
-    def _extract(
-        log: str,
-        regexp: str,
-        default: Optional[str] = None,
-    ) -> Optional[str]:
-        value = default
-        for line in log.splitlines():
-            matches = re.findall(regexp, line)
-            if len(matches) == 0:
-                continue
-            value = matches[0]
-        return value
-
-    def _extract_warm_start_value(self, log: str) -> Optional[float]:
-        ws = self._extract(log, "MIP start with objective ([0-9.e+-]*)")
-        if ws is None:
-            return None
-        return float(ws)
-
-    def _get_value(self, var: Any) -> float:
-        assert self.model is not None
-        if self.cb_where == self.gp.GRB.Callback.MIPSOL:
-            return self.model.cbGetSolution(var)
-        elif self.cb_where == self.gp.GRB.Callback.MIPNODE:
-            return self.model.cbGetNodeRel(var)
-        elif self.cb_where is None:
-            return var.x
-        else:
-            raise Exception(
-                "get_value cannot be called from cb_where=%s" % self.cb_where
-            )
-
-    def _raise_if_callback(self) -> None:
-        if self.cb_where is not None:
-            raise Exception("method cannot be called from a callback")
-
-    def _update(self) -> None:
-        assert self.model is not None
-        gp_vars: List["gurobipy.Var"] = self.model.getVars()
-        gp_constrs: List["gurobipy.Constr"] = self.model.getConstrs()
-        var_names: np.ndarray = np.array(
-            self.model.getAttr("varName", gp_vars),
-            dtype="S",
-        )
-        var_types: np.ndarray = np.array(
-            self.model.getAttr("vtype", gp_vars),
-            dtype="S",
-        )
-        var_ubs: np.ndarray = np.array(
-            self.model.getAttr("ub", gp_vars),
-            dtype=float,
-        )
-        var_lbs: np.ndarray = np.array(
-            self.model.getAttr("lb", gp_vars),
-            dtype=float,
-        )
-        var_obj_coeffs: np.ndarray = np.array(
-            self.model.getAttr("obj", gp_vars),
-            dtype=float,
-        )
-        constr_names: List[str] = self.model.getAttr("constrName", gp_constrs)
-        varname_to_var: Dict[bytes, "gurobipy.Var"] = {}
-        cname_to_constr: Dict = {}
-        for (i, gp_var) in enumerate(gp_vars):
-            assert var_names[i] not in varname_to_var, (
-                f"Duplicated variable name detected: {var_names[i]}. "
-                f"Unique variable names are currently required."
-            )
-            assert var_types[i] in [b"B", b"C", b"I"], (
-                "Only binary and continuous variables are currently supported. "
-                f"Variable {var_names[i]} has type {var_types[i]}."
-            )
-            varname_to_var[var_names[i]] = gp_var
-        for (i, gp_constr) in enumerate(gp_constrs):
-            assert constr_names[i] not in cname_to_constr, (
-                f"Duplicated constraint name detected: {constr_names[i]}. "
-                f"Unique constraint names are currently required."
-            )
-            cname_to_constr[constr_names[i]] = gp_constr
-        self._varname_to_var = varname_to_var
-        self._cname_to_constr = cname_to_constr
-        self._gp_vars = gp_vars
-        self._gp_constrs = gp_constrs
-        self._var_names = var_names
-        self._constr_names = constr_names
-        self._var_types = var_types
-        self._var_lbs = var_lbs
-        self._var_ubs = var_ubs
-        self._var_obj_coeffs = var_obj_coeffs
-
-    def __getstate__(self) -> Dict:
-        return {
-            "params": self.params,
-            "lazy_cb_frequency": self.lazy_cb_frequency,
-        }
-
-    def __setstate__(self, state: Dict) -> None:
-        self.params = state["params"]
-        self.lazy_cb_frequency = state["lazy_cb_frequency"]
-        self.instance = None
-        self.model = None
-        self.cb_where = None
-
-
-class GurobiTestInstanceInfeasible(Instance):
-    @overrides
-    def to_model(self) -> Any:
-        import gurobipy as gp
-        from gurobipy import GRB
-
-        model = gp.Model()
-        x = model.addVars(1, vtype=GRB.BINARY, name="x")
-        model.addConstr(x[0] >= 2)
-        model.setObjective(x[0])
-        return model
-
-
-class GurobiTestInstanceKnapsack(PyomoTestInstanceKnapsack):
-    """
-    Simpler (one-dimensional) knapsack instance, implemented directly in Gurobi
-    instead of Pyomo, used for testing.
-    """
-
-    def __init__(
-        self,
-        weights: List[float],
-        prices: List[float],
-        capacity: float,
-    ) -> None:
-        super().__init__(weights, prices, capacity)
-
-    @overrides
-    def to_model(self) -> Any:
-        import gurobipy as gp
-        from gurobipy import GRB
-
-        model = gp.Model("Knapsack")
-        n = len(self.weights)
-        x = model.addVars(n, vtype=GRB.BINARY, name="x")
-        z = model.addVar(vtype=GRB.CONTINUOUS, name="z", ub=self.capacity)
-        model.addConstr(
-            gp.quicksum(x[i] * self.weights[i] for i in range(n)) == z,
-            "eq_capacity",
-        )
-        model.setObjective(
-            gp.quicksum(x[i] * self.prices[i] for i in range(n)), GRB.MAXIMIZE
-        )
-        return model
-
-    @overrides
-    def enforce_lazy_constraint(
-        self,
-        solver: InternalSolver,
-        model: Any,
-        violation_data: Any,
-    ) -> None:
-        x0 = model.getVarByName("x[0]")
-        model.cbLazy(x0 <= 0)
+    def write(self, filename: str) -> None:
+        self.inner.update()
+        self.inner.write(filename)
--- a/miplearn/solvers/internal.py
+++ b/miplearn/solvers/internal.py
@@ -1,340 +0,0 @@
-#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
-#  Released under the modified BSD license. See COPYING.md for more details.
-
-import logging
-from abc import ABC, abstractmethod
-from dataclasses import dataclass
-from typing import Any, Optional, List, TYPE_CHECKING
-
-import numpy as np
-from scipy.sparse import coo_matrix
-
-from miplearn.instance.base import Instance
-from miplearn.types import (
-    IterationCallback,
-    LazyCallback,
-    UserCutCallback,
-    Solution,
-)
-
-logger = logging.getLogger(__name__)
-
-if TYPE_CHECKING:
-    from miplearn.features.sample import Sample
-
-
-@dataclass
-class LPSolveStats:
-    lp_log: Optional[str] = None
-    lp_value: Optional[float] = None
-    lp_wallclock_time: Optional[float] = None
-
-    def to_list(self) -> List[float]:
-        features: List[float] = []
-        for attr in ["lp_value", "lp_wallclock_time"]:
-            if getattr(self, attr) is not None:
-                features.append(getattr(self, attr))
-        return features
-
-
-@dataclass
-class MIPSolveStats:
-    mip_lower_bound: Optional[float] = None
-    mip_log: Optional[str] = None
-    mip_nodes: Optional[int] = None
-    mip_sense: Optional[str] = None
-    mip_upper_bound: Optional[float] = None
-    mip_wallclock_time: Optional[float] = None
-    mip_warm_start_value: Optional[float] = None
-
-
-@dataclass
-class Variables:
-    names: Optional[np.ndarray] = None
-    basis_status: Optional[np.ndarray] = None
-    lower_bounds: Optional[np.ndarray] = None
-    obj_coeffs: Optional[np.ndarray] = None
-    reduced_costs: Optional[np.ndarray] = None
-    sa_lb_down: Optional[np.ndarray] = None
-    sa_lb_up: Optional[np.ndarray] = None
-    sa_obj_down: Optional[np.ndarray] = None
-    sa_obj_up: Optional[np.ndarray] = None
-    sa_ub_down: Optional[np.ndarray] = None
-    sa_ub_up: Optional[np.ndarray] = None
-    types: Optional[np.ndarray] = None
-    upper_bounds: Optional[np.ndarray] = None
-    values: Optional[np.ndarray] = None
-
-
-@dataclass
-class Constraints:
-    basis_status: Optional[np.ndarray] = None
-    dual_values: Optional[np.ndarray] = None
-    lazy: Optional[np.ndarray] = None
-    lhs: Optional[coo_matrix] = None
-    names: Optional[np.ndarray] = None
-    rhs: Optional[np.ndarray] = None
-    sa_rhs_down: Optional[np.ndarray] = None
-    sa_rhs_up: Optional[np.ndarray] = None
-    senses: Optional[np.ndarray] = None
-    slacks: Optional[np.ndarray] = None
-
-    @staticmethod
-    def from_sample(sample: "Sample") -> "Constraints":
-        return Constraints(
-            basis_status=sample.get_array("lp_constr_basis_status"),
-            dual_values=sample.get_array("lp_constr_dual_values"),
-            lazy=sample.get_array("static_constr_lazy"),
-            # lhs=sample.get_vector("static_constr_lhs"),
-            names=sample.get_array("static_constr_names"),
-            rhs=sample.get_array("static_constr_rhs"),
-            sa_rhs_down=sample.get_array("lp_constr_sa_rhs_down"),
-            sa_rhs_up=sample.get_array("lp_constr_sa_rhs_up"),
-            senses=sample.get_array("static_constr_senses"),
-            slacks=sample.get_array("lp_constr_slacks"),
-        )
-
-    def __getitem__(self, selected: List[bool]) -> "Constraints":
-        return Constraints(
-            basis_status=(
-                None if self.basis_status is None else self.basis_status[selected]
-            ),
-            dual_values=(
-                None if self.dual_values is None else self.dual_values[selected]
-            ),
-            names=(None if self.names is None else self.names[selected]),
-            lazy=(None if self.lazy is None else self.lazy[selected]),
-            lhs=(None if self.lhs is None else self.lhs.tocsr()[selected].tocoo()),
-            rhs=(None if self.rhs is None else self.rhs[selected]),
-            sa_rhs_down=(
-                None if self.sa_rhs_down is None else self.sa_rhs_down[selected]
-            ),
-            sa_rhs_up=(None if self.sa_rhs_up is None else self.sa_rhs_up[selected]),
-            senses=(None if self.senses is None else self.senses[selected]),
-            slacks=(None if self.slacks is None else self.slacks[selected]),
-        )
-
-
-class InternalSolver(ABC):
-    """
-    Abstract class representing the MIP solver used internally by LearningSolver.
-    """
-
-    @abstractmethod
-    def add_constraints(self, cf: Constraints) -> None:
-        """Adds the given constraints to the model."""
-        pass
-
-    @abstractmethod
-    def are_constraints_satisfied(
-        self,
-        cf: Constraints,
-        tol: float = 1e-5,
-    ) -> List[bool]:
-        """
-        Checks whether the current solution satisfies the given constraints.
-        """
-        pass
-
-    def are_callbacks_supported(self) -> bool:
-        """
-        Returns True if this solver supports native callbacks, such as lazy constraints
-        callback or user cuts callback.
-        """
-        return False
-
-    @abstractmethod
-    def build_test_instance_infeasible(self) -> Instance:
-        """
-        Returns an infeasible instance, for testing purposes.
-        """
-        pass
-
-    @abstractmethod
-    def build_test_instance_knapsack(self) -> Instance:
-        """
-        Returns an instance corresponding to the following MIP, for testing purposes:
-
-          maximize  505 x0 + 352 x1 + 458 x2 + 220 x3
-          s.t.      eq_capacity: z = 23 x0 + 26 x1 + 20 x2 + 18 x3
-                    x0, x1, x2, x3 binary
-                    0 <= z <= 67 continuous
-        """
-        pass
-
-    @abstractmethod
-    def clone(self) -> "InternalSolver":
-        """
-        Returns a new copy of this solver with identical parameters, but otherwise
-        completely unitialized.
-        """
-        pass
-
-    @abstractmethod
-    def fix(self, solution: Solution) -> None:
-        """
-        Fixes the values of a subset of decision variables. Missing values in the
-        solution indicate variables that should be left free.
-        """
-        pass
-
-    @abstractmethod
-    def get_solution(self) -> Optional[Solution]:
-        """
-        Returns current solution found by the solver.
-
-        If called after `solve`, returns the best primal solution found during
-        the search. If called after `solve_lp`, returns the optimal solution
-        to the LP relaxation. If no primal solution is available, return None.
-        """
-        pass
-
-    @abstractmethod
-    def get_constraint_attrs(self) -> List[str]:
-        """
-        Returns a list of constraint attributes supported by this solver. Used for
-        testing purposes only.
-        """
-
-        pass
-
-    @abstractmethod
-    def get_constraints(
-        self,
-        with_static: bool = True,
-        with_sa: bool = True,
-        with_lhs: bool = True,
-    ) -> Constraints:
-        pass
-
-    @abstractmethod
-    def get_variable_attrs(self) -> List[str]:
-        """
-        Returns a list of variable attributes supported by this solver. Used for
-        testing purposes only.
-        """
-        pass
-
-    @abstractmethod
-    def get_variables(
-        self,
-        with_static: bool = True,
-        with_sa: bool = True,
-    ) -> Variables:
-        """
-        Returns a description of the decision variables in the problem.
-
-        Parameters
-        ----------
-        with_static: bool
-            If True, include features that do not change during the solution process,
-            such as variable types and names. This parameter is used to reduce the
-            amount of duplicated data collected by LearningSolver. Features that do
-            not change are only collected once.
-        with_sa: bool
-            If True, collect sensitivity analysis information. For large models,
-            collecting this information may be expensive, so this parameter is useful
-            for reducing running times.
-        """
-        pass
-
-    @abstractmethod
-    def is_infeasible(self) -> bool:
-        """
-        Returns True if the model has been proved to be infeasible.
-        Must be called after solve.
-        """
-        pass
-
-    @abstractmethod
-    def remove_constraints(self, names: np.ndarray) -> None:
-        """
-        Removes the given constraints from the model.
-        """
-        pass
-
-    @abstractmethod
-    def set_instance(
-        self,
-        instance: Instance,
-        model: Any = None,
-    ) -> None:
-        """
-        Loads the given instance into the solver.
-
-        Parameters
-        ----------
-        instance: Instance
-            The instance to be loaded.
-        model: Any
-            The concrete optimization model corresponding to this instance
-            (e.g. JuMP.Model or pyomo.core.ConcreteModel). If not provided,
-            it will be generated by calling `instance.to_model()`.
-        """
-        pass
-
-    @abstractmethod
-    def set_warm_start(self, solution: Solution) -> None:
-        """
-        Sets the warm start to be used by the solver.
-
-        Only one warm start is supported. Calling this function when a warm start
-        already exists will remove the previous warm start.
-        """
-        pass
-
-    @abstractmethod
-    def solve(
-        self,
-        tee: bool = False,
-        iteration_cb: Optional[IterationCallback] = None,
-        lazy_cb: Optional[LazyCallback] = None,
-        user_cut_cb: Optional[UserCutCallback] = None,
-    ) -> MIPSolveStats:
-        """
-        Solves the currently loaded instance. After this method finishes,
-        the best solution found can be retrieved by calling `get_solution`.
-
-        Parameters
-        ----------
-        iteration_cb: IterationCallback
-            By default, InternalSolver makes a single call to the native `solve`
-            method and returns the result. If an iteration callback is provided
-            instead, InternalSolver enters a loop, where `solve` and `iteration_cb`
-            are called alternatively. To stop the loop, `iteration_cb` should return
-            False. Any other result causes the solver to loop again.
-        lazy_cb: LazyCallback
-            This function is called whenever the solver finds a new candidate
-            solution and can be used to add lazy constraints to the model. Only the
-            following operations within the callback are allowed:
-                - Querying the value of a variable
-                - Querying if a constraint is satisfied
-                - Adding a new constraint to the problem
-            Additional operations may be allowed by specific subclasses.
-        user_cut_cb: UserCutCallback
-            This function is called whenever the solver found a new integer-infeasible
-            solution and needs to generate cutting planes to cut it off.
-        tee: bool
-            If true, prints the solver log to the screen.
-        """
-        pass
-
-    @abstractmethod
-    def solve_lp(
-        self,
-        tee: bool = False,
-    ) -> LPSolveStats:
-        """
-        Solves the LP relaxation of the currently loaded instance. After this
-        method finishes, the solution can be retrieved by calling `get_solution`.
-
-        This method should not permanently modify the problem. That is, subsequent
-        calls to `solve` should solve the original MIP, not the LP relaxation.
-
-        Parameters
-        ----------
-        tee
-            If true, prints the solver log to the screen.
-        """
-        pass
--- a/miplearn/solvers/learning.py
+++ b/miplearn/solvers/learning.py
@@ -1,591 +1,43 @@
 #  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
+#  Copyright (C) 2020-2022, UChicago Argonne, LLC. All rights reserved.
 #  Released under the modified BSD license. See COPYING.md for more details.
-
-import logging
-import time
-import traceback
-from typing import Optional, List, Any, cast, Dict, Tuple, Callable, IO, Union
-
-from overrides import overrides
-from p_tqdm import p_map, p_umap
-from tqdm.auto import tqdm
-
-from miplearn.features.sample import Hdf5Sample, Sample
-from miplearn.components.component import Component
-from miplearn.components.dynamic_lazy import DynamicLazyConstraintsComponent
-from miplearn.components.dynamic_user_cuts import UserCutsComponent
-from miplearn.components.objective import ObjectiveValueComponent
-from miplearn.components.primal import PrimalSolutionComponent
-from miplearn.features.extractor import FeaturesExtractor
-from miplearn.instance.base import Instance
-from miplearn.solvers import _RedirectOutput
-from miplearn.solvers.internal import InternalSolver
-from miplearn.solvers.pyomo.gurobi import GurobiPyomoSolver
-from miplearn.types import LearningSolveStats, ConstraintName
-import gzip
-import pickle
-import miplearn
-import json
 from os.path import exists
-from os import remove
-import pyomo.environ as pe
+from tempfile import NamedTemporaryFile
+from typing import List, Any, Union

-
-logger = logging.getLogger(__name__)
-
-
-class PyomoFindLazyCutCallbackHandler:
-    def __init__(self):
-        pass
-
-    def value(self, var):
-        return var.value
-
-
-class PyomoEnforceLazyCutsCallbackHandler:
-    def __init__(self, opt, model):
-        self.model = model
-        self.opt = opt
-        if not hasattr(model, "miplearn_lazy_cb"):
-            model.miplearn_lazy_cb = pe.ConstraintList()
-
-    def enforce(self, expr):
-        constr = self.model.miplearn_lazy_cb.add(expr=expr)
-        self.opt.add_constraint(constr)
-
-
-class FileInstanceWrapper(Instance):
-    def __init__(
-        self, data_filename: Any, build_model: Callable, mode: Optional[str] = None
-    ):
-        super().__init__()
-        assert data_filename.endswith(".pkl.gz")
-        self.filename = data_filename
-        self.sample_filename = data_filename.replace(".pkl.gz", ".h5")
-        self.build_model = build_model
-        self.mode = mode
-        self.sample = None
-        self.model = None
-
-    @overrides
-    def to_model(self) -> Any:
-        if self.model is None:
-            self.model = miplearn.load(self.filename, self.build_model)
-        return self.model
-
-    @overrides
-    def create_sample(self) -> Sample:
-        return self.sample
-
-    @overrides
-    def get_samples(self) -> List[Sample]:
-        return [self.sample]
-
-    @overrides
-    def free(self) -> None:
-        self.sample.file.close()
-
-    @overrides
-    def load(self) -> None:
-        if self.mode is None:
-            self.mode = "r+" if exists(self.sample_filename) else "w"
-        self.sample = Hdf5Sample(self.sample_filename, mode=self.mode)
-
-    @overrides
-    def has_dynamic_lazy_constraints(self) -> bool:
-        assert hasattr(self, "model")
-        return hasattr(self.model, "_miplearn_find_lazy_cuts")
-
-    @overrides
-    def find_violated_lazy_constraints(
-        self,
-        solver: "InternalSolver",
-        model: Any,
-    ) -> Dict[ConstraintName, Any]:
-        if not hasattr(self.model, "_miplearn_find_lazy_cuts"):
-            return {}
-        cb = PyomoFindLazyCutCallbackHandler()
-        violations = model._miplearn_find_lazy_cuts(cb)
-        return {json.dumps(v).encode(): v for v in violations}
-
-    @overrides
-    def enforce_lazy_constraint(
-        self,
-        solver: "InternalSolver",
-        model: Any,
-        violation: Any,
-    ) -> None:
-        assert isinstance(solver, GurobiPyomoSolver)
-        cb = PyomoEnforceLazyCutsCallbackHandler(solver._pyomo_solver, model)
-        model._miplearn_enforce_lazy_cuts(cb, violation)
-
-
-class MemoryInstanceWrapper(Instance):
-    def __init__(self, model: Any) -> None:
-        super().__init__()
-        assert model is not None
-        self.model = model
-
-    @overrides
-    def to_model(self) -> Any:
-        return self.model
-
-    @overrides
-    def has_dynamic_lazy_constraints(self) -> bool:
-        assert hasattr(self, "model")
-        return hasattr(self.model, "_miplearn_find_lazy_cuts")
-
-    @overrides
-    def find_violated_lazy_constraints(
-        self,
-        solver: "InternalSolver",
-        model: Any,
-    ) -> Dict[ConstraintName, Any]:
-        cb = PyomoFindLazyCutCallbackHandler()
-        violations = model._miplearn_find_lazy_cuts(cb)
-        return {json.dumps(v).encode(): v for v in violations}
-
-    @overrides
-    def enforce_lazy_constraint(
-        self,
-        solver: "InternalSolver",
-        model: Any,
-        violation: Any,
-    ) -> None:
-        assert isinstance(solver, GurobiPyomoSolver)
-        cb = PyomoEnforceLazyCutsCallbackHandler(solver._pyomo_solver, model)
-        model._miplearn_enforce_lazy_cuts(cb, violation)
-
-
-class _GlobalVariables:
-    def __init__(self) -> None:
-        self.solver: Optional[LearningSolver] = None
-        self.build_model: Optional[Callable] = None
-        self.filenames: Optional[List[str]] = None
-        self.skip = False
-
-
-# Global variables used for multiprocessing. Global variables are copied by the
-# operating system when the process forks. Local variables are copied through
-# serialization, which is a much slower process.
-_GLOBAL = [_GlobalVariables()]
-
-
-def _parallel_solve(
-    idx: int,
-) -> Tuple[Optional[int], Optional[LearningSolveStats]]:
-    solver = _GLOBAL[0].solver
-    filenames = _GLOBAL[0].filenames
-    build_model = _GLOBAL[0].build_model
-    skip = _GLOBAL[0].skip
-    assert solver is not None
-    try:
-        stats = solver.solve([filenames[idx]], build_model, skip=skip)
-        return idx, stats[0]
-    except Exception as e:
-        traceback.print_exc()
-        logger.exception(f"Exception while solving {filenames[idx]}. Ignoring.")
-        return idx, None
+from miplearn.h5 import H5File
+from miplearn.io import _to_h5_filename
+from miplearn.solvers.abstract import AbstractModel


 class LearningSolver:
-    """
-    Mixed-Integer Linear Programming (MIP) solver that extracts information
-    from previous runs and uses Machine Learning methods to accelerate the
-    solution of new (yet unseen) instances.
+    def __init__(self, components: List[Any], skip_lp=False):
+        self.components = components
+        self.skip_lp = skip_lp

-    Parameters
-    ----------
-    components: List[Component]
-        Set of components in the solver. By default, includes
-        `ObjectiveValueComponent`, `PrimalSolutionComponent`,
-        `DynamicLazyConstraintsComponent` and `UserCutsComponent`.
-    mode: str
-        If "exact", solves problem to optimality, keeping all optimality
-        guarantees provided by the MIP solver. If "heuristic", uses machine
-        learning more aggressively, and may return suboptimal solutions.
-    solver: Callable[[], InternalSolver]
-        A callable that constructs the internal solver. If None is provided,
-        use GurobiPyomoSolver.
-    use_lazy_cb: bool
-        If true, use native solver callbacks for enforcing lazy constraints,
-        instead of a simple loop. May not be supported by all solvers.
-    solve_lp: bool
-        If true, solve the root LP relaxation before solving the MIP. This
-        option should be activated if the LP relaxation is not very
-        expensive to solve and if it provides good hints for the integer
-        solution.
-    """
+    def fit(self, data_filenames):
+        h5_filenames = [_to_h5_filename(f) for f in data_filenames]
+        for comp in self.components:
+            comp.fit(h5_filenames)

-    def __init__(
-        self,
-        components: Optional[List[Component]] = None,
-        mode: str = "exact",
-        solver: Optional[InternalSolver] = None,
-        use_lazy_cb: bool = False,
-        solve_lp: bool = True,
-        extractor: Optional[FeaturesExtractor] = None,
-        extract_lhs: bool = True,
-        extract_sa: bool = True,
-    ) -> None:
-        if solver is None:
-            solver = GurobiPyomoSolver()
-        if extractor is None:
-            extractor = FeaturesExtractor(
-                with_sa=extract_sa,
-                with_lhs=extract_lhs,
-            )
-        assert isinstance(solver, InternalSolver)
-        self.components: Dict[str, Component] = {}
-        self.internal_solver: Optional[InternalSolver] = None
-        self.internal_solver_prototype: InternalSolver = solver
-        self.mode: str = mode
-        self.solve_lp: bool = solve_lp
-        self.tee = False
-        self.use_lazy_cb: bool = use_lazy_cb
-        self.extractor = extractor
-        if components is not None:
-            for comp in components:
-                self._add_component(comp)
-        else:
-            self._add_component(ObjectiveValueComponent())
-            self._add_component(PrimalSolutionComponent(mode=mode))
-            self._add_component(DynamicLazyConstraintsComponent())
-            self._add_component(UserCutsComponent())
-        assert self.mode in ["exact", "heuristic"]
-
-    def _solve(
-        self,
-        instance: Instance,
-        model: Any = None,
-        discard_output: bool = False,
-        tee: bool = False,
-    ) -> LearningSolveStats:
-        """
-        Solves the given instance. If trained machine-learning models are
-        available, they will be used to accelerate the solution process.
-
-        The argument `instance` may be either an Instance object or a
-        filename pointing to a pickled Instance object.
-
-        This method adds a new training sample to `instance.training_sample`.
-        If a filename is provided, then the file is modified in-place. That is,
-        the original file is overwritten.
-
-        If `solver.solve_lp_first` is False, the properties lp_solution and
-        lp_value will be set to dummy values.
-
-        Parameters
-        ----------
-        instance: Instance
-            The instance to be solved.
-        model: Any
-            The corresponding Pyomo model. If not provided, it will be created.
-        discard_output: bool
-            If True, do not write the modified instances anywhere; simply discard
-            them. Useful during benchmarking.
-        tee: bool
-            If true, prints solver log to screen.
-
-        Returns
-        -------
-        LearningSolveStats
-            A dictionary of solver statistics containing at least the following
-            keys: "Lower bound", "Upper bound", "Wallclock time", "Nodes",
-            "Sense", "Log", "Warm start value" and "LP value".
-
-            Additional components may generate additional keys. For example,
-            ObjectiveValueComponent adds the keys "Predicted LB" and
-            "Predicted UB". See the documentation of each component for more
-            details.
-        """
-
-        # Generate model
-        # -------------------------------------------------------
-        instance.load()
-        if model is None:
-            with _RedirectOutput([]):
-                model = instance.to_model()
-
-        # Initialize training sample
-        # -------------------------------------------------------
-        sample = instance.create_sample()
-
-        # Initialize stats
-        # -------------------------------------------------------
-        stats: LearningSolveStats = {}
-
-        # Initialize internal solver
-        # -------------------------------------------------------
-        self.tee = tee
-        self.internal_solver = self.internal_solver_prototype.clone()
-        assert self.internal_solver is not None
-        assert isinstance(self.internal_solver, InternalSolver)
-        self.internal_solver.set_instance(instance, model)
-
-        # Extract features (after-load)
-        # -------------------------------------------------------
-        logger.info("Extracting features (after-load)...")
-        initial_time = time.time()
-        self.extractor.extract_after_load_features(
-            instance, self.internal_solver, sample
-        )
-        logger.info(
-            "Features (after-load) extracted in %.2f seconds"
-            % (time.time() - initial_time)
-        )
-
-        callback_args = (
-            self,
-            instance,
-            model,
-            stats,
-            sample,
-        )
-
-        # Solve root LP relaxation
-        # -------------------------------------------------------
-        lp_stats = None
-        if self.solve_lp:
-            logger.debug("Running before_solve_lp callbacks...")
-            for component in self.components.values():
-                component.before_solve_lp(*callback_args)
-
-            logger.info("Solving root LP relaxation...")
-            lp_stats = self.internal_solver.solve_lp(tee=tee)
-            stats.update(cast(LearningSolveStats, lp_stats.__dict__))
-            assert lp_stats.lp_wallclock_time is not None
-            logger.info(
-                "LP relaxation solved in %.2f seconds" % lp_stats.lp_wallclock_time
-            )
-
-            logger.debug("Running after_solve_lp callbacks...")
-            for component in self.components.values():
-                component.after_solve_lp(*callback_args)
-
-            # Extract features (after-lp)
-            # -------------------------------------------------------
-            logger.info("Extracting features (after-lp)...")
-            initial_time = time.time()
-            self.extractor.extract_after_lp_features(
-                self.internal_solver, sample, lp_stats
-            )
-            logger.info(
-                "Features (after-lp) extracted in %.2f seconds"
-                % (time.time() - initial_time)
-            )
-
-        # Callback wrappers
-        # -------------------------------------------------------
-        def iteration_cb_wrapper() -> bool:
-            should_repeat = False
-            for comp in self.components.values():
-                if comp.iteration_cb(self, instance, model):
-                    should_repeat = True
-            return should_repeat
-
-        def lazy_cb_wrapper(
-            cb_solver: InternalSolver,
-            cb_model: Any,
-        ) -> None:
-            for comp in self.components.values():
-                comp.lazy_cb(self, instance, model)
-
-        def user_cut_cb_wrapper(
-            cb_solver: InternalSolver,
-            cb_model: Any,
-        ) -> None:
-            for comp in self.components.values():
-                comp.user_cut_cb(self, instance, model)
-
-        lazy_cb = None
-        if self.use_lazy_cb:
-            lazy_cb = lazy_cb_wrapper
-
-        user_cut_cb = None
-        if instance.has_user_cuts():
-            user_cut_cb = user_cut_cb_wrapper
-
-        # Before-solve callbacks
-        # -------------------------------------------------------
-        logger.debug("Running before_solve_mip callbacks...")
-        for component in self.components.values():
-            component.before_solve_mip(*callback_args)
-
-        # Solve MIP
-        # -------------------------------------------------------
-        logger.info("Solving MIP...")
-        mip_stats = self.internal_solver.solve(
-            tee=tee,
-            iteration_cb=iteration_cb_wrapper,
-            user_cut_cb=user_cut_cb,
-            lazy_cb=lazy_cb,
-        )
-        assert mip_stats.mip_wallclock_time is not None
-        logger.info("MIP solved in %.2f seconds" % mip_stats.mip_wallclock_time)
-        stats.update(cast(LearningSolveStats, mip_stats.__dict__))
-        stats["Solver"] = "default"
-        stats["Gap"] = self._compute_gap(
-            ub=mip_stats.mip_upper_bound,
-            lb=mip_stats.mip_lower_bound,
-        )
-        stats["Mode"] = self.mode
-
-        # Extract features (after-mip)
-        # -------------------------------------------------------
-        logger.info("Extracting features (after-mip)...")
-        initial_time = time.time()
-        for (k, v) in mip_stats.__dict__.items():
-            sample.put_scalar(k, v)
-        self.extractor.extract_after_mip_features(self.internal_solver, sample)
-        logger.info(
-            "Features (after-mip) extracted in %.2f seconds"
-            % (time.time() - initial_time)
-        )
-
-        # After-solve callbacks
-        # -------------------------------------------------------
-        logger.debug("Calling after_solve_mip callbacks...")
-        for component in self.components.values():
-            component.after_solve_mip(*callback_args)
-
-        # Flush
-        # -------------------------------------------------------
-        if not discard_output:
-            instance.flush()
-        instance.free()
-
-        return stats
-
-    def solve(
-        self,
-        arg: Union[Any, List[str]],
-        build_model: Optional[Callable] = None,
-        tee: bool = False,
-        progress: bool = False,
-        skip: bool = False,
-    ) -> Union[LearningSolveStats, List[LearningSolveStats]]:
-        if isinstance(arg, list):
+    def optimize(self, model: Union[str, AbstractModel], build_model=None):
+        if isinstance(model, str):
+            h5_filename = _to_h5_filename(model)
            assert build_model is not None
-            stats = []
-            for i in tqdm(arg, disable=not progress):
-                instance = FileInstanceWrapper(i, build_model)
-                solved = False
-                if exists(instance.sample_filename):
-                    try:
-                        with Hdf5Sample(instance.sample_filename, mode="r") as sample:
-                            if sample.get_scalar("mip_lower_bound"):
-                                solved = True
-                    except OSError:
-                        # File exists but it is unreadable/corrupted. Delete it.
-                        remove(instance.sample_filename)
-                if solved and skip:
-                    stats.append({})
-                else:
-                    s = self._solve(instance, tee=tee)
-
-                    # Export to gzipped MPS file
-                    mps_filename = instance.sample_filename.replace(".h5", ".mps")
-                    instance.model.write(
-                        filename=mps_filename,
-                        io_options={
-                            "labeler": pe.NameLabeler(),
-                            "skip_objective_sense": True,
-                        },
-                    )
-                    with open(mps_filename, "rb") as original:
-                        with gzip.open(f"{mps_filename}.gz", "wb") as compressed:
-                            compressed.writelines(original)
-                    remove(mps_filename)
-
-                    stats.append(s)
-            return stats
+            model = build_model(model)
        else:
-            return self._solve(MemoryInstanceWrapper(arg), tee=tee)
+            h5_filename = NamedTemporaryFile().name
+        stats = {}
+        mode = "r+" if exists(h5_filename) else "w"
+        with H5File(h5_filename, mode) as h5:
+            model.extract_after_load(h5)
+            if not self.skip_lp:
+                relaxed = model.relax()
+                relaxed.optimize()
+                relaxed.extract_after_lp(h5)
+            for comp in self.components:
+                comp.before_mip(h5_filename, model, stats)
+            model.optimize()
+            model.extract_after_mip(h5)

-    def fit(
-        self,
-        filenames: List[str],
-        build_model: Callable,
-        progress: bool = False,
-        n_jobs: int = 1,
-    ) -> None:
-        instances: List[Instance] = [
-            FileInstanceWrapper(f, build_model, mode="r") for f in filenames
-        ]
-        self._fit(instances, progress=progress, n_jobs=n_jobs)
-
-    def parallel_solve(
-        self,
-        filenames: List[str],
-        build_model: Optional[Callable] = None,
-        n_jobs: int = 4,
-        progress: bool = False,
-        label: str = "solve",
-        skip: bool = False,
-    ) -> List[LearningSolveStats]:
-        self.internal_solver = None
-        self._silence_miplearn_logger()
-        _GLOBAL[0].solver = self
-        _GLOBAL[0].build_model = build_model
-        _GLOBAL[0].filenames = filenames
-        _GLOBAL[0].skip = skip
-        results = p_umap(
-            _parallel_solve,
-            list(range(len(filenames))),
-            num_cpus=n_jobs,
-            disable=not progress,
-            desc=label,
-        )
-        stats: List[LearningSolveStats] = [{} for _ in range(len(filenames))]
-        for (idx, s) in results:
-            if s:
-                stats[idx] = s
-        self._restore_miplearn_logger()
        return stats
-
-    def _fit(
-        self,
-        training_instances: List[Instance],
-        n_jobs: int = 1,
-        progress: bool = False,
-    ) -> None:
-        if len(training_instances) == 0:
-            logger.warning("Empty list of training instances provided. Skipping.")
-            return
-        Component.fit_multiple(
-            list(self.components.values()),
-            training_instances,
-            n_jobs=n_jobs,
-            progress=progress,
-        )
-
-    def _add_component(self, component: Component) -> None:
-        name = component.__class__.__name__
-        self.components[name] = component
-
-    def _silence_miplearn_logger(self) -> None:
-        miplearn_logger = logging.getLogger("miplearn")
-        self.prev_log_level = miplearn_logger.getEffectiveLevel()
-        miplearn_logger.setLevel(logging.WARNING)
-
-    def _restore_miplearn_logger(self) -> None:
-        miplearn_logger = logging.getLogger("miplearn")
-        miplearn_logger.setLevel(self.prev_log_level)
-
-    def __getstate__(self) -> Dict:
-        self.internal_solver = None
-        return self.__dict__
-
-    @staticmethod
-    def _compute_gap(ub: Optional[float], lb: Optional[float]) -> Optional[float]:
-        if lb is None or ub is None or lb * ub < 0:
-            # solver did not find a solution and/or bound
-            return None
-        elif abs(ub - lb) < 1e-6:
-            # avoid division by zero when ub = lb = 0
-            return 0.0
-        else:
-            # divide by max(abs(ub),abs(lb)) to ensure gap <= 1
-            return (ub - lb) / max(abs(ub), abs(lb))
--- a/miplearn/solvers/pyomo.py
+++ b/miplearn/solvers/pyomo.py
@@ -0,0 +1,366 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020-2022, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+from numbers import Number
+from typing import Optional, Dict, List, Any
+
+import numpy as np
+import pyomo
+from pyomo.core import Objective, Var, Suffix
+from pyomo.core.base import _GeneralVarData
+from pyomo.core.expr.numeric_expr import SumExpression, MonomialTermExpression
+from scipy.sparse import coo_matrix
+
+from miplearn.h5 import H5File
+from miplearn.solvers.abstract import AbstractModel
+import pyomo.environ as pe
+
+
+class PyomoModel(AbstractModel):
+    def __init__(self, model: pe.ConcreteModel, solver_name: str = "gurobi_persistent"):
+        self.inner = model
+        self.solver_name = solver_name
+        self.solver = pe.SolverFactory(solver_name)
+        self.is_persistent = hasattr(self.solver, "set_instance")
+        if self.is_persistent:
+            self.solver.set_instance(model)
+        self.results = None
+        self._is_warm_start_available = False
+        if not hasattr(self.inner, "dual"):
+            self.inner.dual = Suffix(direction=Suffix.IMPORT)
+            self.inner.rc = Suffix(direction=Suffix.IMPORT)
+            self.inner.slack = Suffix(direction=Suffix.IMPORT)
+
+    def add_constrs(
+        self,
+        var_names: np.ndarray,
+        constrs_lhs: np.ndarray,
+        constrs_sense: np.ndarray,
+        constrs_rhs: np.ndarray,
+        stats: Optional[Dict] = None,
+    ) -> None:
+        variables = self._var_names_to_vars(var_names)
+        if not hasattr(self.inner, "added_eqs"):
+            self.inner.added_eqs = pe.ConstraintList()
+        for i in range(len(constrs_sense)):
+            lhs = sum([variables[j] * constrs_lhs[i, j] for j in range(len(variables))])
+            sense = constrs_sense[i]
+            rhs = constrs_rhs[i]
+            if sense == b"=":
+                eq = self.inner.added_eqs.add(lhs == rhs)
+            elif sense == b"<":
+                eq = self.inner.added_eqs.add(lhs <= rhs)
+            elif sense == b">":
+                eq = self.inner.added_eqs.add(lhs >= rhs)
+            else:
+                raise Exception(f"Unknown sense: {sense}")
+            self.solver.add_constraint(eq)
+
+    def _var_names_to_vars(self, var_names):
+        varname_to_var = {}
+        for var in self.inner.component_objects(Var):
+            for idx in var:
+                v = var[idx]
+                varname_to_var[v.name] = var[idx]
+        return [varname_to_var[var_name.decode()] for var_name in var_names]
+
+    def extract_after_load(self, h5: H5File) -> None:
+        self._extract_after_load_vars(h5)
+        self._extract_after_load_constrs(h5)
+        h5.put_scalar("static_sense", self._get_sense())
+
+    def extract_after_lp(self, h5: H5File) -> None:
+        self._extract_after_lp_vars(h5)
+        self._extract_after_lp_constrs(h5)
+        h5.put_scalar("lp_obj_value", self.results["Problem"][0]["Lower bound"])
+        h5.put_scalar("lp_wallclock_time", self._get_runtime())
+
+    def _get_runtime(self):
+        solver_dict = self.results["Solver"][0]
+        for key in ["Wallclock time", "User time"]:
+            if isinstance(solver_dict[key], Number):
+                return solver_dict[key]
+        raise Exception("Time unavailable")
+
+    def extract_after_mip(self, h5: H5File) -> None:
+        h5.put_scalar("mip_wallclock_time", self._get_runtime())
+        if self.results["Solver"][0]["Termination condition"] == "infeasible":
+            return
+        self._extract_after_mip_vars(h5)
+        self._extract_after_mip_constrs(h5)
+        if self._get_sense() == "max":
+            obj_value = self.results["Problem"][0]["Lower bound"]
+            obj_bound = self.results["Problem"][0]["Upper bound"]
+        else:
+            obj_value = self.results["Problem"][0]["Upper bound"]
+            obj_bound = self.results["Problem"][0]["Lower bound"]
+        h5.put_scalar("mip_obj_value", obj_value)
+        h5.put_scalar("mip_obj_bound", obj_bound)
+        h5.put_scalar("mip_gap", self._gap(obj_value, obj_bound))
+
+    def fix_variables(
+        self,
+        var_names: np.ndarray,
+        var_values: np.ndarray,
+        stats: Optional[Dict] = None,
+    ) -> None:
+        variables = self._var_names_to_vars(var_names)
+        for (var, val) in zip(variables, var_values):
+            if np.isfinite(val):
+                var.fix(val)
+                self.solver.update_var(var)
+
+    def optimize(self) -> None:
+        if self.is_persistent:
+            self.results = self.solver.solve(
+                tee=True,
+                warmstart=self._is_warm_start_available,
+            )
+        else:
+            self.results = self.solver.solve(
+                self.inner,
+                tee=True,
+            )
+
+    def relax(self) -> "AbstractModel":
+        relaxed = self.inner.clone()
+        for var in relaxed.component_objects(Var):
+            for idx in var:
+                if var[idx].domain == pyomo.core.base.set_types.Binary:
+                    lb, ub = var[idx].bounds
+                    var[idx].setlb(lb)
+                    var[idx].setub(ub)
+                    var[idx].domain = pyomo.core.base.set_types.Reals
+        return PyomoModel(relaxed, self.solver_name)
+
+    def set_warm_starts(
+        self,
+        var_names: np.ndarray,
+        var_values: np.ndarray,
+        stats: Optional[Dict] = None,
+    ) -> None:
+        assert len(var_values.shape) == 2
+        (n_starts, n_vars) = var_values.shape
+        assert len(var_names.shape) == 1
+        assert var_names.shape[0] == n_vars
+        assert n_starts == 1, "Pyomo does not support multiple warm starts"
+        variables = self._var_names_to_vars(var_names)
+        for (var, val) in zip(variables, var_values[0, :]):
+            if np.isfinite(val):
+                var.value = val
+        self._is_warm_start_available = True
+
+    def _extract_after_load_vars(self, h5):
+        names: List[str] = []
+        types: List[str] = []
+        upper_bounds: List[float] = []
+        lower_bounds: List[float] = []
+        obj_coeffs: List[float] = []
+
+        obj = None
+        obj_offset = 0.0
+        obj_count = 0
+        for obj in self.inner.component_objects(Objective):
+            obj, obj_offset = self._parse_pyomo_expr(obj.expr)
+            obj_count += 1
+        assert obj_count == 1, f"One objective function expected; found {obj_count}"
+
+        for (i, var) in enumerate(self.inner.component_objects(pyomo.core.Var)):
+            for idx in var:
+                v = var[idx]
+
+                # Variable name
+                if idx is None:
+                    names.append(var.name)
+                else:
+                    names.append(var[idx].name)
+
+                # Variable type
+                if v.domain == pyomo.core.Binary:
+                    types.append("B")
+                elif v.domain in [
+                    pyomo.core.Reals,
+                    pyomo.core.NonNegativeReals,
+                    pyomo.core.NonPositiveReals,
+                    pyomo.core.NegativeReals,
+                    pyomo.core.PositiveReals,
+                ]:
+                    types.append("C")
+                else:
+                    raise Exception(f"unknown variable domain: {v.domain}")
+
+                # Variable upper/lower bounds
+                lb, ub = v.bounds
+                if lb is None:
+                    lb = -float("inf")
+                if ub is None:
+                    ub = float("Inf")
+                upper_bounds.append(float(ub))
+                lower_bounds.append(float(lb))
+
+                # Objective coefficients
+                if v.name in obj:
+                    obj_coeffs.append(obj[v.name])
+                else:
+                    obj_coeffs.append(0.0)
+
+        h5.put_array("static_var_names", np.array(names, dtype="S"))
+        h5.put_array("static_var_types", np.array(types, dtype="S"))
+        h5.put_array("static_var_lower_bounds", np.array(lower_bounds))
+        h5.put_array("static_var_upper_bounds", np.array(upper_bounds))
+        h5.put_array("static_var_obj_coeffs", np.array(obj_coeffs))
+        h5.put_scalar("static_obj_offset", obj_offset)
+
+    def _extract_after_load_constrs(self, h5):
+        names: List[str] = []
+        rhs: List[float] = []
+        senses: List[str] = []
+        lhs_row: List[int] = []
+        lhs_col: List[int] = []
+        lhs_data: List[float] = []
+
+        varname_to_idx = {}
+        for var in self.inner.component_objects(Var):
+            for idx in var:
+                varname = var.name
+                if idx is not None:
+                    varname = var[idx].name
+                varname_to_idx[varname] = len(varname_to_idx)
+
+        def _parse_constraint(c: pe.Constraint, row: int) -> None:
+            # Extract RHS and sense
+            has_ub = c.has_ub()
+            has_lb = c.has_lb()
+            assert (
+                (not has_lb) or (not has_ub) or c.upper() == c.lower()
+            ), "range constraints not supported"
+            if not has_ub:
+                senses.append(">")
+                rhs.append(float(c.lower()))
+            elif not has_lb:
+                senses.append("<")
+                rhs.append(float(c.upper()))
+            else:
+                senses.append("=")
+                rhs.append(float(c.upper()))
+
+            # Extract LHS
+            expr = c.body
+            if isinstance(expr, SumExpression):
+                for term in expr._args_:
+                    if isinstance(term, MonomialTermExpression):
+                        lhs_row.append(row)
+                        lhs_col.append(varname_to_idx[term._args_[1].name])
+                        lhs_data.append(float(term._args_[0]))
+                    elif isinstance(term, _GeneralVarData):
+                        lhs_row.append(row)
+                        lhs_col.append(varname_to_idx[term.name])
+                        lhs_data.append(1.0)
+                    else:
+                        raise Exception(f"Unknown term type: {term.__class__.__name__}")
+            elif isinstance(expr, _GeneralVarData):
+                lhs_row.append(row)
+                lhs_col.append(varname_to_idx[expr.name])
+                lhs_data.append(1.0)
+            else:
+                raise Exception(f"Unknown expression type: {expr.__class__.__name__}")
+
+        curr_row = 0
+        for (i, constr) in enumerate(
+            self.inner.component_objects(pyomo.core.Constraint)
+        ):
+            if len(constr) > 0:
+                for idx in constr:
+                    names.append(constr[idx].name)
+                    _parse_constraint(constr[idx], curr_row)
+                    curr_row += 1
+            else:
+                names.append(constr.name)
+                _parse_constraint(constr, curr_row)
+                curr_row += 1
+
+        lhs = coo_matrix((lhs_data, (lhs_row, lhs_col))).tocoo()
+        h5.put_sparse("static_constr_lhs", lhs)
+        h5.put_array("static_constr_names", np.array(names, dtype="S"))
+        h5.put_array("static_constr_rhs", np.array(rhs))
+        h5.put_array("static_constr_sense", np.array(senses, dtype="S"))
+
+    def _extract_after_lp_vars(self, h5):
+        rc = []
+        values = []
+        for var in self.inner.component_objects(Var):
+            for idx in var:
+                v = var[idx]
+                rc.append(self.inner.rc[v])
+                values.append(v.value)
+        h5.put_array("lp_var_reduced_costs", np.array(rc))
+        h5.put_array("lp_var_values", np.array(values))
+
+    def _extract_after_lp_constrs(self, h5):
+        dual = []
+        slacks = []
+        for constr in self.inner.component_objects(pyomo.core.Constraint):
+            for idx in constr:
+                c = constr[idx]
+                dual.append(self.inner.dual[c])
+                slacks.append(abs(self.inner.slack[c]))
+        h5.put_array("lp_constr_dual_values", np.array(dual))
+        h5.put_array("lp_constr_slacks", np.array(slacks))
+
+    def _extract_after_mip_vars(self, h5):
+        values = []
+        for var in self.inner.component_objects(Var):
+            for idx in var:
+                v = var[idx]
+                values.append(v.value)
+        h5.put_array("mip_var_values", np.array(values))
+
+    def _extract_after_mip_constrs(self, h5):
+        slacks = []
+        for constr in self.inner.component_objects(pyomo.core.Constraint):
+            for idx in constr:
+                c = constr[idx]
+                slacks.append(abs(self.inner.slack[c]))
+        h5.put_array("mip_constr_slacks", np.array(slacks))
+
+    def _parse_pyomo_expr(self, expr: Any):
+        lhs = {}
+        offset = 0.0
+        if isinstance(expr, SumExpression):
+            for term in expr._args_:
+                if isinstance(term, MonomialTermExpression):
+                    lhs[term._args_[1].name] = float(term._args_[0])
+                elif isinstance(term, _GeneralVarData):
+                    lhs[term.name] = 1.0
+                elif isinstance(term, Number):
+                    offset += term
+                else:
+                    raise Exception(f"Unknown term type: {term.__class__.__name__}")
+        elif isinstance(expr, _GeneralVarData):
+            lhs[expr.name] = 1.0
+        else:
+            raise Exception(f"Unknown expression type: {expr.__class__.__name__}")
+        return lhs, offset
+
+    def _gap(self, zp, zd, tol=1e-6):
+        # Reference: https://www.gurobi.com/documentation/9.5/refman/mipgap2.html
+        if abs(zp) < tol:
+            if abs(zd) < tol:
+                return 0
+            else:
+                return float("inf")
+        else:
+            return abs(zp - zd) / abs(zp)
+
+    def _get_sense(self):
+        for obj in self.inner.component_objects(Objective):
+            sense = obj.sense
+            if sense == pyomo.core.kernel.objective.minimize:
+                return "min"
+            elif sense == pyomo.core.kernel.objective.maximize:
+                return "max"
+            else:
+                raise Exception(f"Unknown sense: ${sense}")
+
+    def write(self, filename: str) -> None:
+        self.inner.write(filename, io_options={"symbolic_solver_labels": True})
--- a/miplearn/solvers/pyomo/init.py
+++ b/miplearn/solvers/pyomo/init.py
@@ -1,3 +0,0 @@
-#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
-#  Released under the modified BSD license. See COPYING.md for more details.
--- a/miplearn/solvers/pyomo/base.py
+++ b/miplearn/solvers/pyomo/base.py
@@ -1,677 +0,0 @@
-#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
-#  Released under the modified BSD license. See COPYING.md for more details.
-
-import logging
-import re
-import sys
-from io import StringIO
-from typing import Any, List, Dict, Optional
-
-import numpy as np
-import pyomo
-from overrides import overrides
-from pyomo import environ as pe
-from pyomo.core import Var, Suffix, Objective
-from pyomo.core.base import _GeneralVarData
-from pyomo.core.base.constraint import ConstraintList
-from pyomo.core.expr.numeric_expr import SumExpression, MonomialTermExpression
-from pyomo.opt import TerminationCondition
-from pyomo.opt.base.solvers import SolverFactory
-from scipy.sparse import coo_matrix
-
-from miplearn.instance.base import Instance
-from miplearn.solvers import _RedirectOutput, _none_if_empty
-from miplearn.solvers.internal import (
-    InternalSolver,
-    LPSolveStats,
-    IterationCallback,
-    LazyCallback,
-    MIPSolveStats,
-    Variables,
-    Constraints,
-)
-from miplearn.types import (
-    SolverParams,
-    UserCutCallback,
-    Solution,
-)
-
-logger = logging.getLogger(__name__)
-
-
-class BasePyomoSolver(InternalSolver):
-    """
-    Base class for all Pyomo solvers.
-    """
-
-    def __init__(
-        self,
-        solver_factory: SolverFactory,
-        params: SolverParams,
-    ) -> None:
-        self.instance: Optional[Instance] = None
-        self.model: Optional[pe.ConcreteModel] = None
-        self.params = params
-        self._all_vars: List[pe.Var] = []
-        self._bin_vars: List[pe.Var] = []
-        self._is_warm_start_available: bool = False
-        self._pyomo_solver: SolverFactory = solver_factory
-        self._obj_sense: str = "min"
-        self._varname_to_var: Dict[bytes, pe.Var] = {}
-        self._varname_to_idx: Dict[str, int] = {}
-        self._name_buffer = {}
-        self._cname_to_constr: Dict[str, pe.Constraint] = {}
-        self._termination_condition: str = ""
-        self._has_lp_solution = False
-        self._has_mip_solution = False
-        self._obj: Dict[str, float] = {}
-
-        for (key, value) in params.items():
-            self._pyomo_solver.options[key] = value
-
-    def add_constraint(
-        self,
-        constr: Any,
-    ) -> None:
-        assert self.model is not None
-        self._pyomo_solver.add_constraint(constr)
-        self._termination_condition = ""
-        self._has_lp_solution = False
-        self._has_mip_solution = False
-
-    @overrides
-    def add_constraints(self, cf: Constraints) -> None:
-        assert cf.names is not None
-        assert cf.senses is not None
-        assert cf.lhs is not None
-        assert cf.rhs is not None
-        assert self.model is not None
-        lhs = cf.lhs.tocsr()
-        for i in range(len(cf.names)):
-            row = lhs[i, :]
-            lhsi = 0.0
-            for j in range(row.getnnz()):
-                lhsi += self._all_vars[row.indices[j]] * row.data[j]
-            if cf.senses[i] == b"=":
-                expr = lhsi == cf.rhs[i]
-            elif cf.senses[i] == b"<":
-                expr = lhsi <= cf.rhs[i]
-            elif cf.senses[i] == b">":
-                expr = lhsi >= cf.rhs[i]
-            else:
-                raise Exception(f"Unknown sense: {cf.senses[i]}")
-            cl = pe.Constraint(expr=expr, name=cf.names[i])
-            self.model.add_component(cf.names[i].decode(), cl)
-            self._pyomo_solver.add_constraint(cl)
-            self._cname_to_constr[cf.names[i]] = cl
-        self._termination_condition = ""
-        self._has_lp_solution = False
-        self._has_mip_solution = False
-
-    @overrides
-    def are_callbacks_supported(self) -> bool:
-        return False
-
-    @overrides
-    def are_constraints_satisfied(
-        self,
-        cf: Constraints,
-        tol: float = 1e-5,
-    ) -> List[bool]:
-        assert cf.names is not None
-        assert cf.lhs is not None
-        assert cf.rhs is not None
-        assert cf.senses is not None
-        x = [v.value for v in self._all_vars]
-        lhs = cf.lhs.tocsr() * x
-        result = []
-        for i in range(len(lhs)):
-            if cf.senses[i] == b"<":
-                result.append(lhs[i] <= cf.rhs[i] + tol)
-            elif cf.senses[i] == b">":
-                result.append(lhs[i] >= cf.rhs[i] - tol)
-            elif cf.senses[i] == b"=":
-                result.append(abs(cf.rhs[i] - lhs[i]) < tol)
-            else:
-                raise Exception(f"unknown sense: {cf.senses[i]}")
-        return result
-
-    @overrides
-    def build_test_instance_infeasible(self) -> Instance:
-        return PyomoTestInstanceInfeasible()
-
-    @overrides
-    def build_test_instance_knapsack(self) -> Instance:
-        return PyomoTestInstanceKnapsack(
-            weights=[23.0, 26.0, 20.0, 18.0],
-            prices=[505.0, 352.0, 458.0, 220.0],
-            capacity=67.0,
-        )
-
-    @overrides
-    def fix(self, solution: Solution) -> None:
-        for (varname, value) in solution.items():
-            if value is None:
-                continue
-            var = self._varname_to_var[varname]
-            var.fix(value)
-            self._pyomo_solver.update_var(var)
-
-    @overrides
-    def get_constraints(
-        self,
-        with_static: bool = True,
-        with_sa: bool = True,
-        with_lhs: bool = True,
-    ) -> Constraints:
-        model = self.model
-        assert model is not None
-        names: List[str] = []
-        rhs: List[float] = []
-        senses: List[str] = []
-        dual_values: List[float] = []
-        slacks: List[float] = []
-        lhs_row: List[int] = []
-        lhs_col: List[int] = []
-        lhs_data: List[float] = []
-        lhs: Optional[coo_matrix] = None
-
-        def _parse_constraint(c: pe.Constraint, row: int) -> None:
-            assert model is not None
-            if with_static:
-                # Extract RHS and sense
-                has_ub = c.has_ub()
-                has_lb = c.has_lb()
-                assert (
-                    (not has_lb) or (not has_ub) or c.upper() == c.lower()
-                ), "range constraints not supported"
-                if not has_ub:
-                    senses.append(">")
-                    rhs.append(float(c.lower()))
-                elif not has_lb:
-                    senses.append("<")
-                    rhs.append(float(c.upper()))
-                else:
-                    senses.append("=")
-                    rhs.append(float(c.upper()))
-
-                if with_lhs:
-                    # Extract LHS
-                    expr = c.body
-                    if isinstance(expr, SumExpression):
-                        for term in expr._args_:
-                            if isinstance(term, MonomialTermExpression):
-                                lhs_row.append(row)
-                                lhs_col.append(
-                                    self._varname_to_idx[self.name(term._args_[1])]
-                                )
-                                lhs_data.append(float(term._args_[0]))
-                            elif isinstance(term, _GeneralVarData):
-                                lhs_row.append(row)
-                                lhs_col.append(self._varname_to_idx[self.name(term)])
-                                lhs_data.append(1.0)
-                            else:
-                                raise Exception(
-                                    f"Unknown term type: {term.__class__.__name__}"
-                                )
-                    elif isinstance(expr, _GeneralVarData):
-                        lhs_row.append(row)
-                        lhs_col.append(self._varname_to_idx[self.name(expr)])
-                        lhs_data.append(1.0)
-                    else:
-                        raise Exception(
-                            f"Unknown expression type: {expr.__class__.__name__}"
-                        )
-
-            # Extract dual values
-            if self._has_lp_solution:
-                dual_values.append(model.dual[c])
-
-            # Extract slacks
-            if self._has_mip_solution or self._has_lp_solution:
-                slacks.append(model.slack[c])
-
-        curr_row = 0
-        for (i, constr) in enumerate(model.component_objects(pyomo.core.Constraint)):
-            if isinstance(constr, pe.ConstraintList):
-                for idx in constr:
-                    names.append(self.name(constr[idx]))
-                    _parse_constraint(constr[idx], curr_row)
-                    curr_row += 1
-            else:
-                names.append(self.name(constr))
-                _parse_constraint(constr, curr_row)
-                curr_row += 1
-
-        if len(lhs_data) > 0:
-            lhs = coo_matrix((lhs_data, (lhs_row, lhs_col))).tocoo()
-
-        return Constraints(
-            names=_none_if_empty(np.array(names, dtype="S")),
-            rhs=_none_if_empty(np.array(rhs, dtype=float)),
-            senses=_none_if_empty(np.array(senses, dtype="S")),
-            lhs=lhs,
-            slacks=_none_if_empty(np.array(slacks, dtype=float)),
-            dual_values=_none_if_empty(np.array(dual_values, dtype=float)),
-        )
-
-    @overrides
-    def get_constraint_attrs(self) -> List[str]:
-        return [
-            "dual_values",
-            "lhs",
-            "names",
-            "rhs",
-            "senses",
-            "slacks",
-        ]
-
-    @overrides
-    def get_solution(self) -> Optional[Solution]:
-        assert self.model is not None
-        if self.is_infeasible():
-            return None
-        solution: Solution = {}
-        for var in self.model.component_objects(Var):
-            for index in var:
-                if var[index].fixed:
-                    continue
-                solution[self.name(var[index]).encode()] = var[index].value
-        return solution
-
-    @overrides
-    def get_variables(
-        self,
-        with_static: bool = True,
-        with_sa: bool = True,
-    ) -> Variables:
-        assert self.model is not None
-
-        names: List[str] = []
-        types: List[str] = []
-        upper_bounds: List[float] = []
-        lower_bounds: List[float] = []
-        obj_coeffs: List[float] = []
-        reduced_costs: List[float] = []
-        values: List[float] = []
-
-        for (i, var) in enumerate(self.model.component_objects(pyomo.core.Var)):
-            for idx in var:
-                v = var[idx]
-
-                # Variable name
-                if idx is None:
-                    names.append(self.name(var))
-                else:
-                    names.append(self.name(var[idx]))
-
-                if with_static:
-                    # Variable type
-                    if v.domain == pyomo.core.Binary:
-                        types.append("B")
-                    elif v.domain in [
-                        pyomo.core.Reals,
-                        pyomo.core.NonNegativeReals,
-                        pyomo.core.NonPositiveReals,
-                        pyomo.core.NegativeReals,
-                        pyomo.core.PositiveReals,
-                    ]:
-                        types.append("C")
-                    else:
-                        raise Exception(f"unknown variable domain: {v.domain}")
-
-                    # Bounds
-                    lb, ub = v.bounds
-                    if ub is not None:
-                        upper_bounds.append(float(ub))
-                    else:
-                        upper_bounds.append(float("inf"))
-                    if lb is not None:
-                        lower_bounds.append(float(lb))
-                    else:
-                        lower_bounds.append(-float("inf"))
-
-                    # Objective coefficient
-                    name = self.name(v)
-                    if name in self._obj:
-                        obj_coeffs.append(self._obj[name])
-                    else:
-                        obj_coeffs.append(0.0)
-
-                # Reduced costs
-                if self._has_lp_solution:
-                    reduced_costs.append(self.model.rc[v])
-
-                # Values
-                if self._has_lp_solution or self._has_mip_solution:
-                    values.append(v.value)
-
-        return Variables(
-            names=_none_if_empty(np.array(names, dtype="S")),
-            types=_none_if_empty(np.array(types, dtype="S")),
-            upper_bounds=_none_if_empty(np.array(upper_bounds, dtype=float)),
-            lower_bounds=_none_if_empty(np.array(lower_bounds, dtype=float)),
-            obj_coeffs=_none_if_empty(np.array(obj_coeffs, dtype=float)),
-            reduced_costs=_none_if_empty(np.array(reduced_costs, dtype=float)),
-            values=_none_if_empty(np.array(values, dtype=float)),
-        )
-
-    @overrides
-    def get_variable_attrs(self) -> List[str]:
-        return [
-            "names",
-            # "basis_status",
-            "categories",
-            "lower_bounds",
-            "obj_coeffs",
-            "reduced_costs",
-            # "sa_lb_down",
-            # "sa_lb_up",
-            # "sa_obj_down",
-            # "sa_obj_up",
-            # "sa_ub_down",
-            # "sa_ub_up",
-            "types",
-            "upper_bounds",
-            "user_features",
-            "values",
-        ]
-
-    @overrides
-    def is_infeasible(self) -> bool:
-        return self._termination_condition == TerminationCondition.infeasible
-
-    @overrides
-    def remove_constraints(self, names: List[str]) -> None:
-        assert self.model is not None
-        for name in names:
-            constr = self._cname_to_constr[name]
-            del self._cname_to_constr[name]
-            self.model.del_component(constr)
-            self._pyomo_solver.remove_constraint(constr)
-
-    @overrides
-    def set_instance(
-        self,
-        instance: Instance,
-        model: Any = None,
-    ) -> None:
-        if model is None:
-            model = instance.to_model()
-        assert isinstance(
-            model, pe.ConcreteModel
-        ), f"expected pe.ConcreteModel; found {model.__class__} instead"
-        self.instance = instance
-        self.model = model
-        self.model.extra_constraints = ConstraintList()
-        self.model.dual = Suffix(direction=Suffix.IMPORT)
-        self.model.rc = Suffix(direction=Suffix.IMPORT)
-        self.model.slack = Suffix(direction=Suffix.IMPORT)
-        self._pyomo_solver.set_instance(model)
-        self._update_obj()
-        self._update_vars()
-        self._update_constrs()
-
-    @overrides
-    def set_warm_start(self, solution: Solution) -> None:
-        self._clear_warm_start()
-        count_fixed = 0
-        for (var_name, value) in solution.items():
-            if value is None:
-                continue
-            var = self._varname_to_var[var_name]
-            var.value = solution[var_name]
-            count_fixed += 1
-        if count_fixed > 0:
-            self._is_warm_start_available = True
-
-    @overrides
-    def solve(
-        self,
-        tee: bool = False,
-        iteration_cb: Optional[IterationCallback] = None,
-        lazy_cb: Optional[LazyCallback] = None,
-        user_cut_cb: Optional[UserCutCallback] = None,
-    ) -> MIPSolveStats:
-        assert lazy_cb is None, "callbacks are not currently supported"
-        assert user_cut_cb is None, "callbacks are not currently supported"
-        total_wallclock_time = 0
-        streams: List[Any] = [StringIO()]
-        if tee:
-            streams += [sys.stdout]
-        if iteration_cb is None:
-            iteration_cb = lambda: False
-        while True:
-            logger.debug("Solving MIP...")
-            with _RedirectOutput(streams):
-                results = self._pyomo_solver.solve(
-                    tee=True,
-                    warmstart=self._is_warm_start_available,
-                )
-            self._termination_condition = results["Solver"][0]["Termination condition"]
-            total_wallclock_time += results["Solver"][0]["Wallclock time"]
-            if self.is_infeasible():
-                break
-            should_repeat = iteration_cb()
-            if not should_repeat:
-                break
-        log = streams[0].getvalue()
-        node_count = self._extract_node_count(log)
-        ws_value = self._extract_warm_start_value(log)
-        lb, ub = None, None
-        self._has_mip_solution = False
-        self._has_lp_solution = False
-        if not self.is_infeasible():
-            self._has_mip_solution = True
-            lb = results["Problem"][0]["Lower bound"]
-            ub = results["Problem"][0]["Upper bound"]
-        return MIPSolveStats(
-            mip_lower_bound=lb,
-            mip_upper_bound=ub,
-            mip_wallclock_time=total_wallclock_time,
-            mip_sense=self._obj_sense,
-            mip_log=log,
-            mip_nodes=node_count,
-            mip_warm_start_value=ws_value,
-        )
-
-    @overrides
-    def solve_lp(
-        self,
-        tee: bool = False,
-    ) -> LPSolveStats:
-        self._relax()
-        streams: List[Any] = [StringIO()]
-        if tee:
-            streams += [sys.stdout]
-        with _RedirectOutput(streams):
-            results = self._pyomo_solver.solve(tee=True)
-        self._termination_condition = results["Solver"][0]["Termination condition"]
-        self._restore_integrality()
-        opt_value = None
-        self._has_lp_solution = False
-        self._has_mip_solution = False
-        if not self.is_infeasible():
-            opt_value = results["Problem"][0]["Lower bound"]
-            self._has_lp_solution = True
-        return LPSolveStats(
-            lp_value=opt_value,
-            lp_log=streams[0].getvalue(),
-            lp_wallclock_time=results["Solver"][0]["Wallclock time"],
-        )
-
-    def _clear_warm_start(self) -> None:
-        for var in self._all_vars:
-            if not var.fixed:
-                var.value = None
-        self._is_warm_start_available = False
-
-    @staticmethod
-    def _extract(
-        log: str,
-        regexp: Optional[str],
-        default: Optional[str] = None,
-    ) -> Optional[str]:
-        if regexp is None:
-            return default
-        value = default
-        for line in log.splitlines():
-            matches = re.findall(regexp, line)
-            if len(matches) == 0:
-                continue
-            value = matches[0]
-        return value
-
-    def _extract_node_count(self, log: str) -> Optional[int]:
-        value = self._extract(log, self._get_node_count_regexp())
-        if value is None:
-            return None
-        return int(value)
-
-    def _extract_warm_start_value(self, log: str) -> Optional[float]:
-        value = self._extract(log, self._get_warm_start_regexp())
-        if value is None:
-            return None
-        return float(value)
-
-    def _get_node_count_regexp(self) -> Optional[str]:
-        return None
-
-    def _get_warm_start_regexp(self) -> Optional[str]:
-        return None
-
-    def _parse_pyomo_expr(self, expr: Any) -> Dict[str, float]:
-        lhs = {}
-        if isinstance(expr, SumExpression):
-            for term in expr._args_:
-                if isinstance(term, MonomialTermExpression):
-                    lhs[self.name(term._args_[1])] = float(term._args_[0])
-                elif isinstance(term, _GeneralVarData):
-                    lhs[self.name(term)] = 1.0
-                else:
-                    raise Exception(f"Unknown term type: {term.__class__.__name__}")
-        elif isinstance(expr, _GeneralVarData):
-            lhs[self.name(expr)] = 1.0
-        else:
-            raise Exception(f"Unknown expression type: {expr.__class__.__name__}")
-        return lhs
-
-    def _relax(self) -> None:
-        for var in self._bin_vars:
-            lb, ub = var.bounds
-            var.setlb(lb)
-            var.setub(ub)
-            var.domain = pyomo.core.base.set_types.Reals
-            self._pyomo_solver.update_var(var)
-
-    def _restore_integrality(self) -> None:
-        for var in self._bin_vars:
-            var.domain = pyomo.core.base.set_types.Binary
-            self._pyomo_solver.update_var(var)
-
-    def _update_obj(self) -> None:
-        self._obj_sense = "max"
-        if self._pyomo_solver._objective.sense == pyomo.core.kernel.objective.minimize:
-            self._obj_sense = "min"
-
-    def _update_vars(self) -> None:
-        assert self.model is not None
-        self._all_vars = []
-        self._bin_vars = []
-        self._varname_to_var = {}
-        self._varname_to_idx = {}
-        for var in self.model.component_objects(Var):
-            for idx in var:
-                varname = self.name(var)
-                if idx is not None:
-                    varname = self.name(var[idx])
-                self._varname_to_var[varname.encode()] = var[idx]
-                self._varname_to_idx[varname] = len(self._all_vars)
-                self._all_vars += [var[idx]]
-                if var[idx].domain == pyomo.core.base.set_types.Binary:
-                    self._bin_vars += [var[idx]]
-        for obj in self.model.component_objects(Objective):
-            self._obj = self._parse_pyomo_expr(obj.expr)
-            break
-
-    def _update_constrs(self) -> None:
-        assert self.model is not None
-        self._cname_to_constr.clear()
-        for constr in self.model.component_objects(pyomo.core.Constraint):
-            if isinstance(constr, pe.ConstraintList):
-                for idx in constr:
-                    self._cname_to_constr[self.name(constr[idx])] = constr[idx]
-            else:
-                self._cname_to_constr[self.name(constr)] = constr
-
-    def name(self, comp):
-        return comp.getname(name_buffer=self._name_buffer)
-
-
-class PyomoTestInstanceInfeasible(Instance):
-    @overrides
-    def to_model(self) -> pe.ConcreteModel:
-        model = pe.ConcreteModel()
-        model.x = pe.Var([0], domain=pe.Binary)
-        model.OBJ = pe.Objective(expr=model.x[0], sense=pe.maximize)
-        model.eq = pe.Constraint(expr=model.x[0] >= 2)
-        return model
-
-
-class PyomoTestInstanceKnapsack(Instance):
-    """
-    Simpler (one-dimensional) Knapsack Problem, used for testing.
-    """
-
-    def __init__(
-        self,
-        weights: List[float],
-        prices: List[float],
-        capacity: float,
-    ) -> None:
-        super().__init__()
-        self.weights = weights
-        self.prices = prices
-        self.capacity = capacity
-        self.n = len(weights)
-
-    @overrides
-    def to_model(self) -> pe.ConcreteModel:
-        model = pe.ConcreteModel()
-        items = range(len(self.weights))
-        model.x = pe.Var(items, domain=pe.Binary)
-        model.z = pe.Var(domain=pe.Reals, bounds=(0, self.capacity))
-        model.OBJ = pe.Objective(
-            expr=sum(model.x[v] * self.prices[v] for v in items),
-            sense=pe.maximize,
-        )
-        model.eq_capacity = pe.Constraint(
-            expr=sum(model.x[v] * self.weights[v] for v in items) == model.z
-        )
-        return model
-
-    @overrides
-    def get_instance_features(self) -> np.ndarray:
-        return np.array(
-            [
-                self.capacity,
-                np.average(self.weights),
-            ]
-        )
-
-    @overrides
-    def get_variable_features(self, names: np.ndarray) -> np.ndarray:
-        return np.vstack(
-            [
-                [[self.weights[i], self.prices[i]] for i in range(self.n)],
-                [0.0, 0.0],
-            ]
-        )
-
-    @overrides
-    def get_variable_categories(self, names: np.ndarray) -> np.ndarray:
-        return np.array(
-            ["default" if n.decode().startswith("x") else "" for n in names],
-            dtype="S",
-        )
--- a/miplearn/solvers/pyomo/cplex.py
+++ b/miplearn/solvers/pyomo/cplex.py
@@ -1,48 +0,0 @@
-#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
-#  Released under the modified BSD license. See COPYING.md for more details.
-from typing import Optional
-
-from overrides import overrides
-from pyomo import environ as pe
-from scipy.stats import randint
-
-from miplearn.solvers.pyomo.base import BasePyomoSolver
-from miplearn.types import SolverParams
-
-
-class CplexPyomoSolver(BasePyomoSolver):
-    """
-    An InternalSolver that uses CPLEX and the Pyomo modeling language.
-
-    Parameters
-    ----------
-    params: dict
-        Dictionary of options to pass to the Pyomo solver. For example,
-        {"mip_display": 5} to increase the log verbosity.
-    """
-
-    def __init__(
-        self,
-        params: Optional[SolverParams] = None,
-    ) -> None:
-        if params is None:
-            params = {}
-        if "mip_display" not in params.keys():
-            params["mip_display"] = 4
-        super().__init__(
-            solver_factory=pe.SolverFactory("cplex_persistent"),
-            params=params,
-        )
-
-    @overrides
-    def _get_warm_start_regexp(self) -> str:
-        return "MIP start .* with objective ([0-9.e+-]*)\\."
-
-    @overrides
-    def _get_node_count_regexp(self) -> str:
-        return "^[ *] *([0-9]+)"
-
-    @overrides
-    def clone(self) -> "CplexPyomoSolver":
-        return CplexPyomoSolver(params=self.params)
--- a/miplearn/solvers/pyomo/gurobi.py
+++ b/miplearn/solvers/pyomo/gurobi.py
@@ -1,61 +0,0 @@
-#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
-#  Released under the modified BSD license. See COPYING.md for more details.
-
-import logging
-from typing import Optional
-
-from overrides import overrides
-from pyomo import environ as pe
-from scipy.stats import randint
-
-from miplearn.solvers.pyomo.base import BasePyomoSolver
-from miplearn.types import SolverParams
-
-logger = logging.getLogger(__name__)
-
-
-class GurobiPyomoSolver(BasePyomoSolver):
-    """
-    An InternalSolver that uses Gurobi and the Pyomo modeling language.
-
-    Parameters
-    ----------
-    params: dict
-        Dictionary of options to pass to the Pyomo solver. For example,
-        {"Threads": 4} to set the number of threads.
-    """
-
-    def __init__(
-        self,
-        params: Optional[SolverParams] = None,
-    ) -> None:
-        if params is None:
-            params = {}
-        super().__init__(
-            solver_factory=pe.SolverFactory("gurobi_persistent"),
-            params=params,
-        )
-
-    @overrides
-    def clone(self) -> "GurobiPyomoSolver":
-        return GurobiPyomoSolver(params=self.params)
-
-    @overrides
-    def _extract_node_count(self, log: str) -> int:
-        return max(1, int(self._pyomo_solver._solver_model.getAttr("NodeCount")))
-
-    @overrides
-    def _get_warm_start_regexp(self) -> str:
-        return "MIP start with objective ([0-9.e+-]*)"
-
-    @overrides
-    def _get_node_count_regexp(self) -> Optional[str]:
-        return None
-
-    def set_priorities(self, priorities):
-        for (var_name, priority) in priorities.items():
-            pvar = self._varname_to_var[var_name]
-            gvar = self._pyomo_solver._pyomo_var_to_solver_var_map[pvar]
-            gvar.branchPriority = priority
-        return None
--- a/miplearn/solvers/pyomo/xpress.py
+++ b/miplearn/solvers/pyomo/xpress.py
@@ -1,42 +0,0 @@
-#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
-#  Released under the modified BSD license. See COPYING.md for more details.
-
-import logging
-from typing import Optional
-
-from overrides import overrides
-from pyomo import environ as pe
-from scipy.stats import randint
-
-from miplearn.solvers.pyomo.base import BasePyomoSolver
-from miplearn.types import SolverParams
-
-logger = logging.getLogger(__name__)
-
-
-class XpressPyomoSolver(BasePyomoSolver):
-    """
-    An InternalSolver that uses XPRESS and the Pyomo modeling language.
-
-    Parameters
-    ----------
-    params: dict
-        Dictionary of options to pass to the Pyomo solver. For example,
-        {"Threads": 4} to set the number of threads.
-    """
-
-    def __init__(
-        self,
-        params: Optional[SolverParams] = None,
-    ) -> None:
-        if params is None:
-            params = {}
-        super().__init__(
-            solver_factory=pe.SolverFactory("xpress_persistent"),
-            params=params,
-        )
-
-    @overrides
-    def clone(self) -> "XpressPyomoSolver":
-        return XpressPyomoSolver(params=self.params)
--- a/miplearn/solvers/tests/init.py
+++ b/miplearn/solvers/tests/init.py
@@ -1,288 +0,0 @@
-#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
-#  Released under the modified BSD license. See COPYING.md for more details.
-
-from typing import Any, List
-
-import numpy as np
-from scipy.sparse import coo_matrix
-
-from miplearn.solvers.internal import InternalSolver, Variables, Constraints
-
-inf = float("inf")
-
-
-# NOTE:
-# This file is in the main source folder, so that it can be called from Julia.
-
-
-def _filter_attrs(allowed_keys: List[str], obj: Any) -> Any:
-    for key in obj.__dict__.keys():
-        if key not in allowed_keys:
-            setattr(obj, key, None)
-    return obj
-
-
-def run_internal_solver_tests(solver: InternalSolver) -> None:
-    run_basic_usage_tests(solver.clone())
-    run_warm_start_tests(solver.clone())
-    run_infeasibility_tests(solver.clone())
-    run_iteration_cb_tests(solver.clone())
-    if solver.are_callbacks_supported():
-        run_lazy_cb_tests(solver.clone())
-
-
-def run_basic_usage_tests(solver: InternalSolver) -> None:
-    # Create and set instance
-    instance = solver.build_test_instance_knapsack()
-    model = instance.to_model()
-    solver.set_instance(instance, model)
-
-    # Fetch variables (after-load)
-    assert_equals(
-        solver.get_variables(),
-        Variables(
-            names=np.array(["x[0]", "x[1]", "x[2]", "x[3]", "z"], dtype="S"),
-            lower_bounds=np.array([0.0, 0.0, 0.0, 0.0, 0.0]),
-            upper_bounds=np.array([1.0, 1.0, 1.0, 1.0, 67.0]),
-            types=np.array(["B", "B", "B", "B", "C"], dtype="S"),
-            obj_coeffs=np.array([505.0, 352.0, 458.0, 220.0, 0.0]),
-        ),
-    )
-
-    # Fetch constraints (after-load)
-    assert_equals(
-        solver.get_constraints(),
-        Constraints(
-            names=np.array(["eq_capacity"], dtype="S"),
-            rhs=np.array([0.0]),
-            lhs=coo_matrix([[23.0, 26.0, 20.0, 18.0, -1.0]]),
-            senses=np.array(["="], dtype="S"),
-        ),
-    )
-
-    # Solve linear programming relaxation
-    lp_stats = solver.solve_lp()
-    assert not solver.is_infeasible()
-    assert lp_stats.lp_value is not None
-    assert_equals(round(lp_stats.lp_value, 3), 1287.923)
-    assert lp_stats.lp_log is not None
-    assert len(lp_stats.lp_log) > 100
-    assert lp_stats.lp_wallclock_time is not None
-    assert lp_stats.lp_wallclock_time > 0
-
-    # Fetch variables (after-lp)
-    assert_equals(
-        solver.get_variables(with_static=False),
-        _filter_attrs(
-            solver.get_variable_attrs(),
-            Variables(
-                names=np.array(["x[0]", "x[1]", "x[2]", "x[3]", "z"], dtype="S"),
-                basis_status=np.array(["U", "B", "U", "L", "U"], dtype="S"),
-                reduced_costs=np.array(
-                    [193.615385, 0.0, 187.230769, -23.692308, 13.538462]
-                ),
-                sa_lb_down=np.array([-inf, -inf, -inf, -0.111111, -inf]),
-                sa_lb_up=np.array([1.0, 0.923077, 1.0, 1.0, 67.0]),
-                sa_obj_down=np.array(
-                    [311.384615, 317.777778, 270.769231, -inf, -13.538462]
-                ),
-                sa_obj_up=np.array([inf, 570.869565, inf, 243.692308, inf]),
-                sa_ub_down=np.array([0.913043, 0.923077, 0.9, 0.0, 43.0]),
-                sa_ub_up=np.array([2.043478, inf, 2.2, inf, 69.0]),
-                values=np.array([1.0, 0.923077, 1.0, 0.0, 67.0]),
-            ),
-        ),
-    )
-
-    # Fetch constraints (after-lp)
-    assert_equals(
-        solver.get_constraints(with_static=False),
-        _filter_attrs(
-            solver.get_constraint_attrs(),
-            Constraints(
-                basis_status=np.array(["N"], dtype="S"),
-                dual_values=np.array([13.538462]),
-                names=np.array(["eq_capacity"], dtype="S"),
-                sa_rhs_down=np.array([-24.0]),
-                sa_rhs_up=np.array([2.0]),
-                slacks=np.array([0.0]),
-            ),
-        ),
-    )
-
-    # Solve MIP
-    mip_stats = solver.solve(
-        tee=True,
-    )
-    assert not solver.is_infeasible()
-    assert mip_stats.mip_log is not None
-    assert len(mip_stats.mip_log) > 100
-    assert mip_stats.mip_lower_bound is not None
-    assert_equals(mip_stats.mip_lower_bound, 1183.0)
-    assert mip_stats.mip_upper_bound is not None
-    assert_equals(mip_stats.mip_upper_bound, 1183.0)
-    assert mip_stats.mip_sense is not None
-    assert_equals(mip_stats.mip_sense, "max")
-    assert mip_stats.mip_wallclock_time is not None
-    assert isinstance(mip_stats.mip_wallclock_time, float)
-    assert mip_stats.mip_wallclock_time > 0
-
-    # Fetch variables (after-mip)
-    assert_equals(
-        solver.get_variables(with_static=False),
-        _filter_attrs(
-            solver.get_variable_attrs(),
-            Variables(
-                names=np.array(["x[0]", "x[1]", "x[2]", "x[3]", "z"], dtype="S"),
-                values=np.array([1.0, 0.0, 1.0, 1.0, 61.0]),
-            ),
-        ),
-    )
-
-    # Fetch constraints (after-mip)
-    assert_equals(
-        solver.get_constraints(with_static=False),
-        _filter_attrs(
-            solver.get_constraint_attrs(),
-            Constraints(
-                names=np.array(["eq_capacity"], dtype="S"),
-                slacks=np.array([0.0]),
-            ),
-        ),
-    )
-
-    # Build new constraint and verify that it is violated
-    cf = Constraints(
-        names=np.array(["cut"], dtype="S"),
-        lhs=coo_matrix([[1.0, 0.0, 0.0, 0.0, 0.0]]),
-        rhs=np.array([0.0]),
-        senses=np.array(["<"], dtype="S"),
-    )
-    assert_equals(solver.are_constraints_satisfied(cf), [False])
-
-    # Add constraint and verify it affects solution
-    solver.add_constraints(cf)
-    assert_equals(
-        solver.get_constraints(with_static=True),
-        _filter_attrs(
-            solver.get_constraint_attrs(),
-            Constraints(
-                names=np.array(["eq_capacity", "cut"], dtype="S"),
-                rhs=np.array([0.0, 0.0]),
-                lhs=coo_matrix(
-                    [
-                        [23.0, 26.0, 20.0, 18.0, -1.0],
-                        [1.0, 0.0, 0.0, 0.0, 0.0],
-                    ]
-                ),
-                senses=np.array(["=", "<"], dtype="S"),
-            ),
-        ),
-    )
-    stats = solver.solve()
-    assert_equals(stats.mip_lower_bound, 1030.0)
-    assert_equals(solver.are_constraints_satisfied(cf), [True])
-
-    # Remove the new constraint
-    solver.remove_constraints(np.array(["cut"], dtype="S"))
-
-    # New constraint should no longer affect solution
-    stats = solver.solve()
-    assert_equals(stats.mip_lower_bound, 1183.0)
-
-
-def run_warm_start_tests(solver: InternalSolver) -> None:
-    instance = solver.build_test_instance_knapsack()
-    model = instance.to_model()
-    solver.set_instance(instance, model)
-    solver.set_warm_start({b"x[0]": 1.0, b"x[1]": 0.0, b"x[2]": 0.0, b"x[3]": 1.0})
-    stats = solver.solve(tee=True)
-    if stats.mip_warm_start_value is not None:
-        assert_equals(stats.mip_warm_start_value, 725.0)
-
-    solver.set_warm_start({b"x[0]": 1.0, b"x[1]": 1.0, b"x[2]": 1.0, b"x[3]": 1.0})
-    stats = solver.solve(tee=True)
-    assert stats.mip_warm_start_value is None
-
-    solver.fix({b"x[0]": 1.0, b"x[1]": 0.0, b"x[2]": 0.0, b"x[3]": 1.0})
-    stats = solver.solve(tee=True)
-    assert_equals(stats.mip_lower_bound, 725.0)
-    assert_equals(stats.mip_upper_bound, 725.0)
-
-
-def run_infeasibility_tests(solver: InternalSolver) -> None:
-    instance = solver.build_test_instance_infeasible()
-    solver.set_instance(instance)
-    mip_stats = solver.solve()
-    assert solver.is_infeasible()
-    assert solver.get_solution() is None
-    assert mip_stats.mip_upper_bound is None
-    assert mip_stats.mip_lower_bound is None
-    lp_stats = solver.solve_lp()
-    assert solver.get_solution() is None
-    assert lp_stats.lp_value is None
-
-
-def run_iteration_cb_tests(solver: InternalSolver) -> None:
-    instance = solver.build_test_instance_knapsack()
-    solver.set_instance(instance)
-    count = 0
-
-    def custom_iteration_cb() -> bool:
-        nonlocal count
-        count += 1
-        return count < 5
-
-    solver.solve(iteration_cb=custom_iteration_cb)
-    assert_equals(count, 5)
-
-
-def run_lazy_cb_tests(solver: InternalSolver) -> None:
-    instance = solver.build_test_instance_knapsack()
-    model = instance.to_model()
-
-    def lazy_cb(cb_solver: InternalSolver, cb_model: Any) -> None:
-        relsol = cb_solver.get_solution()
-        assert relsol is not None
-        assert relsol[b"x[0]"] is not None
-        if relsol[b"x[0]"] > 0:
-            instance.enforce_lazy_constraint(cb_solver, cb_model, None)
-
-    solver.set_instance(instance, model)
-    solver.solve(lazy_cb=lazy_cb)
-    solution = solver.get_solution()
-    assert solution is not None
-    assert_equals(solution[b"x[0]"], 0.0)
-
-
-def _equals_preprocess(obj: Any) -> Any:
-    if isinstance(obj, np.ndarray):
-        if obj.dtype == "float64":
-            return np.round(obj, decimals=6).tolist()
-        else:
-            return obj.tolist()
-    elif isinstance(obj, coo_matrix):
-        return obj.todense().tolist()
-    elif isinstance(obj, (int, str, bool, np.bool_, np.bytes_, bytes, bytearray)):
-        return obj
-    elif isinstance(obj, float):
-        return round(obj, 6)
-    elif isinstance(obj, list):
-        return [_equals_preprocess(i) for i in obj]
-    elif isinstance(obj, tuple):
-        return tuple(_equals_preprocess(i) for i in obj)
-    elif obj is None:
-        return None
-    elif isinstance(obj, dict):
-        return {k: _equals_preprocess(v) for (k, v) in obj.items()}
-    else:
-        for key in obj.__dict__.keys():
-            obj.__dict__[key] = _equals_preprocess(obj.__dict__[key])
-        return obj
-
-
-def assert_equals(left: Any, right: Any) -> None:
-    left = _equals_preprocess(left)
-    right = _equals_preprocess(right)
-    assert left == right, f"left:\n{left}\nright:\n{right}"
--- a/miplearn/types.py
+++ b/miplearn/types.py
@@ -1,52 +0,0 @@
-#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
-#  Released under the modified BSD license. See COPYING.md for more details.
-
-from typing import Optional, Dict, Callable, Any, Union, TYPE_CHECKING
-
-from mypy_extensions import TypedDict
-
-if TYPE_CHECKING:
-    # noinspection PyUnresolvedReferences
-    from miplearn.solvers.learning import InternalSolver
-
-Category = bytes
-ConstraintName = bytes
-ConstraintCategory = bytes
-IterationCallback = Callable[[], bool]
-LazyCallback = Callable[[Any, Any], None]
-SolverParams = Dict[str, Any]
-UserCutCallback = Callable[["InternalSolver", Any], None]
-Solution = Dict[bytes, Optional[float]]
-
-LearningSolveStats = TypedDict(
-    "LearningSolveStats",
-    {
-        "Gap": Optional[float],
-        "Instance": Union[str, int],
-        "lp_log": str,
-        "lp_value": Optional[float],
-        "lp_wallclock_time": Optional[float],
-        "mip_lower_bound": Optional[float],
-        "mip_log": str,
-        "Mode": str,
-        "mip_nodes": Optional[int],
-        "Objective: Predicted lower bound": float,
-        "Objective: Predicted upper bound": float,
-        "Primal: Free": int,
-        "Primal: One": int,
-        "Primal: Zero": int,
-        "Sense": str,
-        "Solver": str,
-        "mip_upper_bound": Optional[float],
-        "mip_wallclock_time": float,
-        "mip_warm_start_value": Optional[float],
-        "LazyStatic: Removed": int,
-        "LazyStatic: Kept": int,
-        "LazyStatic: Restored": int,
-        "LazyStatic: Iterations": int,
-        "UserCuts: Added ahead-of-time": int,
-        "UserCuts: Added in callback": int,
-    },
-    total=False,
-)