diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6a8c299..588686c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -33,6 +33,7 @@
   )
   ```
 - `LazyConstraintComponent` has been renamed to `DynamicLazyConstraintsComponent`.
+- Categories, lazy constraints and cutting plane identifiers must now be strings, instead `Hashable`. This change was required for compatibility with HDF5 data format.
 
 ### Removed
 
diff --git a/Makefile b/Makefile
index 1341104..9a6c54a 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@ PYTHON      := python3
 PYTEST      := pytest
 PIP         := $(PYTHON) -m pip
 MYPY        := $(PYTHON) -m mypy
-PYTEST_ARGS := -W ignore::DeprecationWarning -vv -x --log-level=DEBUG
+PYTEST_ARGS := -W ignore::DeprecationWarning -vv --log-level=DEBUG
 VERSION     := 0.2
 
 all: docs test
diff --git a/miplearn/components/component.py b/miplearn/components/component.py
index cf7e104..d47be48 100644
--- a/miplearn/components/component.py
+++ b/miplearn/components/component.py
@@ -2,14 +2,14 @@
 #  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
 #  Released under the modified BSD license. See COPYING.md for more details.
 
-from typing import Any, List, TYPE_CHECKING, Tuple, Dict, Hashable, Optional
+from typing import Any, List, TYPE_CHECKING, Tuple, Dict, Optional
 
 import numpy as np
 from p_tqdm import p_umap
 
-from miplearn.features import Sample
+from miplearn.features.sample import Sample
 from miplearn.instance.base import Instance
-from miplearn.types import LearningSolveStats
+from miplearn.types import LearningSolveStats, Category
 
 if TYPE_CHECKING:
     from miplearn.solvers.learning import LearningSolver
@@ -101,8 +101,8 @@ class Component:
 
     def fit_xy(
         self,
-        x: Dict[Hashable, np.ndarray],
-        y: Dict[Hashable, np.ndarray],
+        x: Dict[Category, np.ndarray],
+        y: Dict[Category, np.ndarray],
     ) -> None:
         """
         Given two dictionaries x and y, mapping the name of the category to matrices
@@ -152,7 +152,7 @@ class Component:
         self,
         instance: Optional[Instance],
         sample: Sample,
-    ) -> Dict[Hashable, Dict[str, float]]:
+    ) -> Dict[str, Dict[str, float]]:
         return {}
 
     def sample_xy(
diff --git a/miplearn/components/dynamic_common.py b/miplearn/components/dynamic_common.py
index c33e7c6..dbfd5e7 100644
--- a/miplearn/components/dynamic_common.py
+++ b/miplearn/components/dynamic_common.py
@@ -3,17 +3,19 @@
 #  Released under the modified BSD license. See COPYING.md for more details.
 
 import logging
-from typing import Dict, Hashable, List, Tuple, Optional, Any, Set
+from typing import Dict, List, Tuple, Optional, Any, Set
 
 import numpy as np
 from overrides import overrides
 
+from miplearn.features.extractor import FeaturesExtractor
 from miplearn.classifiers import Classifier
 from miplearn.classifiers.threshold import Threshold
 from miplearn.components import classifier_evaluation_dict
 from miplearn.components.component import Component
-from miplearn.features import Sample
+from miplearn.features.sample import Sample
 from miplearn.instance.base import Instance
+from miplearn.types import ConstraintCategory, ConstraintName
 
 logger = logging.getLogger(__name__)
 
@@ -32,9 +34,9 @@ class DynamicConstraintsComponent(Component):
         assert isinstance(classifier, Classifier)
         self.threshold_prototype: Threshold = threshold
         self.classifier_prototype: Classifier = classifier
-        self.classifiers: Dict[Hashable, Classifier] = {}
-        self.thresholds: Dict[Hashable, Threshold] = {}
-        self.known_cids: List[str] = []
+        self.classifiers: Dict[ConstraintCategory, Classifier] = {}
+        self.thresholds: Dict[ConstraintCategory, Threshold] = {}
+        self.known_cids: List[ConstraintName] = []
         self.attr = attr
 
     def sample_xy_with_cids(
@@ -42,52 +44,48 @@ class DynamicConstraintsComponent(Component):
         instance: Optional[Instance],
         sample: Sample,
     ) -> Tuple[
-        Dict[Hashable, List[List[float]]],
-        Dict[Hashable, List[List[bool]]],
-        Dict[Hashable, List[str]],
+        Dict[ConstraintCategory, List[List[float]]],
+        Dict[ConstraintCategory, List[List[bool]]],
+        Dict[ConstraintCategory, List[ConstraintName]],
     ]:
+        if len(self.known_cids) == 0:
+            return {}, {}, {}
         assert instance is not None
-        x: Dict[Hashable, List[List[float]]] = {}
-        y: Dict[Hashable, List[List[bool]]] = {}
-        cids: Dict[Hashable, List[str]] = {}
-        constr_categories_dict = instance.get_constraint_categories()
-        constr_features_dict = instance.get_constraint_features()
-        for cid in self.known_cids:
-            # Initialize categories
-            if cid in constr_categories_dict:
-                category = constr_categories_dict[cid]
-            else:
-                category = cid
-            if category is None:
-                continue
-            if category not in x:
-                x[category] = []
-                y[category] = []
-                cids[category] = []
-
-            # Features
-            features = []
-            assert sample.after_load is not None
-            assert sample.after_load.instance is not None
-            features.extend(sample.after_load.instance.to_list())
-            if cid in constr_features_dict:
-                features.extend(constr_features_dict[cid])
-            for ci in features:
-                assert isinstance(ci, float), (
-                    f"Constraint features must be a list of floats. "
-                    f"Found {ci.__class__.__name__} instead."
-                )
-            x[category].append(features)
-            cids[category].append(cid)
-
-            # Labels
-            if sample.after_mip is not None:
-                assert sample.after_mip.extra is not None
-                if sample.after_mip.extra[self.attr] is not None:
-                    if cid in sample.after_mip.extra[self.attr]:
-                        y[category] += [[False, True]]
-                    else:
-                        y[category] += [[True, False]]
+        x: Dict[ConstraintCategory, List[List[float]]] = {}
+        y: Dict[ConstraintCategory, List[List[bool]]] = {}
+        cids: Dict[ConstraintCategory, List[ConstraintName]] = {}
+        known_cids = np.array(self.known_cids, dtype="S")
+
+        enforced_cids = None
+        enforced_cids_np = sample.get_array(self.attr)
+        if enforced_cids_np is not None:
+            enforced_cids = list(enforced_cids_np)
+
+        # Get user-provided constraint features
+        (
+            constr_features,
+            constr_categories,
+            constr_lazy,
+        ) = FeaturesExtractor._extract_user_features_constrs(instance, known_cids)
+
+        # Augment with instance features
+        instance_features = sample.get_array("static_instance_features")
+        assert instance_features is not None
+        constr_features = np.hstack(
+            [
+                instance_features.reshape(1, -1).repeat(len(known_cids), axis=0),
+                constr_features,
+            ]
+        )
+
+        categories = np.unique(constr_categories)
+        for c in categories:
+            x[c] = constr_features[constr_categories == c].tolist()
+            cids[c] = known_cids[constr_categories == c].tolist()
+            if enforced_cids is not None:
+                tmp = np.isin(cids[c], enforced_cids).reshape(-1, 1)
+                y[c] = np.hstack([~tmp, tmp]).tolist()  # type: ignore
+
         return x, y, cids
 
     @overrides
@@ -104,7 +102,7 @@ class DynamicConstraintsComponent(Component):
         assert pre is not None
         known_cids: Set = set()
         for cids in pre:
-            known_cids |= cids
+            known_cids |= set(list(cids))
         self.known_cids.clear()
         self.known_cids.extend(sorted(known_cids))
 
@@ -112,8 +110,8 @@ class DynamicConstraintsComponent(Component):
         self,
         instance: Instance,
         sample: Sample,
-    ) -> List[Hashable]:
-        pred: List[Hashable] = []
+    ) -> List[ConstraintName]:
+        pred: List[ConstraintName] = []
         if len(self.known_cids) == 0:
             logger.info("Classifiers not fitted. Skipping.")
             return pred
@@ -133,19 +131,13 @@ class DynamicConstraintsComponent(Component):
 
     @overrides
     def pre_sample_xy(self, instance: Instance, sample: Sample) -> Any:
-        if (
-            sample.after_mip is None
-            or sample.after_mip.extra is None
-            or sample.after_mip.extra[self.attr] is None
-        ):
-            return
-        return sample.after_mip.extra[self.attr]
+        return sample.get_array(self.attr)
 
     @overrides
     def fit_xy(
         self,
-        x: Dict[Hashable, np.ndarray],
-        y: Dict[Hashable, np.ndarray],
+        x: Dict[ConstraintCategory, np.ndarray],
+        y: Dict[ConstraintCategory, np.ndarray],
     ) -> None:
         for category in x.keys():
             self.classifiers[category] = self.classifier_prototype.clone()
@@ -160,42 +152,20 @@ class DynamicConstraintsComponent(Component):
         self,
         instance: Instance,
         sample: Sample,
-    ) -> Dict[Hashable, Dict[str, float]]:
-        assert sample.after_mip is not None
-        assert sample.after_mip.extra is not None
-        assert self.attr in sample.after_mip.extra
-        actual = sample.after_mip.extra[self.attr]
+    ) -> Dict[str, float]:
+        actual = sample.get_array(self.attr)
+        assert actual is not None
         pred = set(self.sample_predict(instance, sample))
-        tp: Dict[Hashable, int] = {}
-        tn: Dict[Hashable, int] = {}
-        fp: Dict[Hashable, int] = {}
-        fn: Dict[Hashable, int] = {}
-        constr_categories_dict = instance.get_constraint_categories()
+        tp, tn, fp, fn = 0, 0, 0, 0
         for cid in self.known_cids:
-            if cid not in constr_categories_dict:
-                continue
-            category = constr_categories_dict[cid]
-            if category not in tp.keys():
-                tp[category] = 0
-                tn[category] = 0
-                fp[category] = 0
-                fn[category] = 0
             if cid in pred:
                 if cid in actual:
-                    tp[category] += 1
+                    tp += 1
                 else:
-                    fp[category] += 1
+                    fp += 1
             else:
                 if cid in actual:
-                    fn[category] += 1
+                    fn += 1
                 else:
-                    tn[category] += 1
-        return {
-            category: classifier_evaluation_dict(
-                tp=tp[category],
-                tn=tn[category],
-                fp=fp[category],
-                fn=fn[category],
-            )
-            for category in tp.keys()
-        }
+                    tn += 1
+        return classifier_evaluation_dict(tp=tp, tn=tn, fp=fp, fn=fn)
diff --git a/miplearn/components/dynamic_lazy.py b/miplearn/components/dynamic_lazy.py
index d0843d6..7756e64 100644
--- a/miplearn/components/dynamic_lazy.py
+++ b/miplearn/components/dynamic_lazy.py
@@ -3,7 +3,8 @@
 #  Released under the modified BSD license. See COPYING.md for more details.
 
 import logging
-from typing import Dict, List, TYPE_CHECKING, Hashable, Tuple, Any, Optional, Set
+import pdb
+from typing import Dict, List, TYPE_CHECKING, Tuple, Any, Optional, Set
 
 import numpy as np
 from overrides import overrides
@@ -13,9 +14,9 @@ from miplearn.classifiers.counting import CountingClassifier
 from miplearn.classifiers.threshold import MinProbabilityThreshold, Threshold
 from miplearn.components.component import Component
 from miplearn.components.dynamic_common import DynamicConstraintsComponent
-from miplearn.features import Sample
+from miplearn.features.sample import Sample
 from miplearn.instance.base import Instance
-from miplearn.types import LearningSolveStats
+from miplearn.types import LearningSolveStats, ConstraintName, ConstraintCategory
 
 logger = logging.getLogger(__name__)
 
@@ -36,16 +37,16 @@ class DynamicLazyConstraintsComponent(Component):
         self.dynamic: DynamicConstraintsComponent = DynamicConstraintsComponent(
             classifier=classifier,
             threshold=threshold,
-            attr="lazy_enforced",
+            attr="mip_constr_lazy_enforced",
         )
         self.classifiers = self.dynamic.classifiers
         self.thresholds = self.dynamic.thresholds
         self.known_cids = self.dynamic.known_cids
-        self.lazy_enforced: Set[Hashable] = set()
+        self.lazy_enforced: Set[ConstraintName] = set()
 
     @staticmethod
     def enforce(
-        cids: List[Hashable],
+        cids: List[ConstraintName],
         instance: Instance,
         model: Any,
         solver: "LearningSolver",
@@ -78,9 +79,10 @@ class DynamicLazyConstraintsComponent(Component):
         stats: LearningSolveStats,
         sample: Sample,
     ) -> None:
-        assert sample.after_mip is not None
-        assert sample.after_mip.extra is not None
-        sample.after_mip.extra["lazy_enforced"] = set(self.lazy_enforced)
+        sample.put_array(
+            "mip_constr_lazy_enforced",
+            np.array(list(self.lazy_enforced), dtype="S"),
+        )
 
     @overrides
     def iteration_cb(
@@ -119,7 +121,7 @@ class DynamicLazyConstraintsComponent(Component):
         self,
         instance: Instance,
         sample: Sample,
-    ) -> List[Hashable]:
+    ) -> List[ConstraintName]:
         return self.dynamic.sample_predict(instance, sample)
 
     @overrides
@@ -129,8 +131,8 @@ class DynamicLazyConstraintsComponent(Component):
     @overrides
     def fit_xy(
         self,
-        x: Dict[Hashable, np.ndarray],
-        y: Dict[Hashable, np.ndarray],
+        x: Dict[ConstraintCategory, np.ndarray],
+        y: Dict[ConstraintCategory, np.ndarray],
     ) -> None:
         self.dynamic.fit_xy(x, y)
 
@@ -139,5 +141,5 @@ class DynamicLazyConstraintsComponent(Component):
         self,
         instance: Instance,
         sample: Sample,
-    ) -> Dict[Hashable, Dict[str, float]]:
+    ) -> Dict[ConstraintCategory, Dict[str, float]]:
         return self.dynamic.sample_evaluate(instance, sample)
diff --git a/miplearn/components/dynamic_user_cuts.py b/miplearn/components/dynamic_user_cuts.py
index ec12dc0..b48d7e7 100644
--- a/miplearn/components/dynamic_user_cuts.py
+++ b/miplearn/components/dynamic_user_cuts.py
@@ -3,7 +3,7 @@
 #  Released under the modified BSD license. See COPYING.md for more details.
 
 import logging
-from typing import Any, TYPE_CHECKING, Hashable, Set, Tuple, Dict, List, Optional
+from typing import Any, TYPE_CHECKING, Set, Tuple, Dict, List, Optional
 
 import numpy as np
 from overrides import overrides
@@ -13,9 +13,9 @@ from miplearn.classifiers.counting import CountingClassifier
 from miplearn.classifiers.threshold import Threshold, MinProbabilityThreshold
 from miplearn.components.component import Component
 from miplearn.components.dynamic_common import DynamicConstraintsComponent
-from miplearn.features import Sample
+from miplearn.features.sample import Sample
 from miplearn.instance.base import Instance
-from miplearn.types import LearningSolveStats
+from miplearn.types import LearningSolveStats, ConstraintName, ConstraintCategory
 
 logger = logging.getLogger(__name__)
 
@@ -32,9 +32,9 @@ class UserCutsComponent(Component):
         self.dynamic = DynamicConstraintsComponent(
             classifier=classifier,
             threshold=threshold,
-            attr="user_cuts_enforced",
+            attr="mip_user_cuts_enforced",
         )
-        self.enforced: Set[Hashable] = set()
+        self.enforced: Set[ConstraintName] = set()
         self.n_added_in_callback = 0
 
     @overrides
@@ -71,7 +71,7 @@ class UserCutsComponent(Component):
         for cid in cids:
             if cid in self.enforced:
                 continue
-            assert isinstance(cid, Hashable)
+            assert isinstance(cid, ConstraintName)
             instance.enforce_user_cut(solver.internal_solver, model, cid)
             self.enforced.add(cid)
             self.n_added_in_callback += 1
@@ -87,9 +87,10 @@ class UserCutsComponent(Component):
         stats: LearningSolveStats,
         sample: Sample,
     ) -> None:
-        assert sample.after_mip is not None
-        assert sample.after_mip.extra is not None
-        sample.after_mip.extra["user_cuts_enforced"] = set(self.enforced)
+        sample.put_array(
+            "mip_user_cuts_enforced",
+            np.array(list(self.enforced), dtype="S"),
+        )
         stats["UserCuts: Added in callback"] = self.n_added_in_callback
         if self.n_added_in_callback > 0:
             logger.info(f"{self.n_added_in_callback} user cuts added in callback")
@@ -112,7 +113,7 @@ class UserCutsComponent(Component):
         self,
         instance: "Instance",
         sample: Sample,
-    ) -> List[Hashable]:
+    ) -> List[ConstraintName]:
         return self.dynamic.sample_predict(instance, sample)
 
     @overrides
@@ -122,8 +123,8 @@ class UserCutsComponent(Component):
     @overrides
     def fit_xy(
         self,
-        x: Dict[Hashable, np.ndarray],
-        y: Dict[Hashable, np.ndarray],
+        x: Dict[ConstraintCategory, np.ndarray],
+        y: Dict[ConstraintCategory, np.ndarray],
     ) -> None:
         self.dynamic.fit_xy(x, y)
 
@@ -132,5 +133,5 @@ class UserCutsComponent(Component):
         self,
         instance: "Instance",
         sample: Sample,
-    ) -> Dict[Hashable, Dict[str, float]]:
+    ) -> Dict[ConstraintCategory, Dict[str, float]]:
         return self.dynamic.sample_evaluate(instance, sample)
diff --git a/miplearn/components/objective.py b/miplearn/components/objective.py
index 2fc5afd..af1316f 100644
--- a/miplearn/components/objective.py
+++ b/miplearn/components/objective.py
@@ -3,7 +3,7 @@
 #  Released under the modified BSD license. See COPYING.md for more details.
 
 import logging
-from typing import List, Dict, Any, TYPE_CHECKING, Tuple, Hashable, Optional
+from typing import List, Dict, Any, TYPE_CHECKING, Tuple, Optional, cast
 
 import numpy as np
 from overrides import overrides
@@ -12,7 +12,7 @@ from sklearn.linear_model import LinearRegression
 from miplearn.classifiers import Regressor
 from miplearn.classifiers.sklearn import ScikitLearnRegressor
 from miplearn.components.component import Component
-from miplearn.features import Sample
+from miplearn.features.sample import Sample
 from miplearn.instance.base import Instance
 from miplearn.types import LearningSolveStats
 
@@ -53,8 +53,8 @@ class ObjectiveValueComponent(Component):
     @overrides
     def fit_xy(
         self,
-        x: Dict[Hashable, np.ndarray],
-        y: Dict[Hashable, np.ndarray],
+        x: Dict[str, np.ndarray],
+        y: Dict[str, np.ndarray],
     ) -> None:
         for c in ["Upper bound", "Lower bound"]:
             if c in y:
@@ -76,32 +76,27 @@ class ObjectiveValueComponent(Component):
         self,
         _: Optional[Instance],
         sample: Sample,
-    ) -> Tuple[Dict[Hashable, List[List[float]]], Dict[Hashable, List[List[float]]]]:
-        # Instance features
-        assert sample.after_load is not None
-        assert sample.after_load.instance is not None
-        f = sample.after_load.instance.to_list()
-
-        # LP solve features
-        if sample.after_lp is not None:
-            assert sample.after_lp.lp_solve is not None
-            f.extend(sample.after_lp.lp_solve.to_list())
+    ) -> Tuple[Dict[str, List[List[float]]], Dict[str, List[List[float]]]]:
+        lp_instance_features_np = sample.get_array("lp_instance_features")
+        if lp_instance_features_np is None:
+            lp_instance_features_np = sample.get_array("static_instance_features")
+        assert lp_instance_features_np is not None
+        lp_instance_features = cast(List[float], lp_instance_features_np.tolist())
 
         # Features
-        x: Dict[Hashable, List[List[float]]] = {
-            "Upper bound": [f],
-            "Lower bound": [f],
+        x: Dict[str, List[List[float]]] = {
+            "Upper bound": [lp_instance_features],
+            "Lower bound": [lp_instance_features],
         }
 
         # Labels
-        y: Dict[Hashable, List[List[float]]] = {}
-        if sample.after_mip is not None:
-            mip_stats = sample.after_mip.mip_solve
-            assert mip_stats is not None
-            if mip_stats.mip_lower_bound is not None:
-                y["Lower bound"] = [[mip_stats.mip_lower_bound]]
-            if mip_stats.mip_upper_bound is not None:
-                y["Upper bound"] = [[mip_stats.mip_upper_bound]]
+        y: Dict[str, List[List[float]]] = {}
+        mip_lower_bound = sample.get_scalar("mip_lower_bound")
+        mip_upper_bound = sample.get_scalar("mip_upper_bound")
+        if mip_lower_bound is not None:
+            y["Lower bound"] = [[mip_lower_bound]]
+        if mip_upper_bound is not None:
+            y["Upper bound"] = [[mip_upper_bound]]
 
         return x, y
 
@@ -110,10 +105,7 @@ class ObjectiveValueComponent(Component):
         self,
         instance: Instance,
         sample: Sample,
-    ) -> Dict[Hashable, Dict[str, float]]:
-        assert sample.after_mip is not None
-        assert sample.after_mip.mip_solve is not None
-
+    ) -> Dict[str, Dict[str, float]]:
         def compare(y_pred: float, y_actual: float) -> Dict[str, float]:
             err = np.round(abs(y_pred - y_actual), 8)
             return {
@@ -123,10 +115,10 @@ class ObjectiveValueComponent(Component):
                 "Relative error": err / y_actual,
             }
 
-        result: Dict[Hashable, Dict[str, float]] = {}
+        result: Dict[str, Dict[str, float]] = {}
         pred = self.sample_predict(sample)
-        actual_ub = sample.after_mip.mip_solve.mip_upper_bound
-        actual_lb = sample.after_mip.mip_solve.mip_lower_bound
+        actual_ub = sample.get_scalar("mip_upper_bound")
+        actual_lb = sample.get_scalar("mip_lower_bound")
         if actual_ub is not None:
             result["Upper bound"] = compare(pred["Upper bound"], actual_ub)
         if actual_lb is not None:
diff --git a/miplearn/components/primal.py b/miplearn/components/primal.py
index c37701d..00a0140 100644
--- a/miplearn/components/primal.py
+++ b/miplearn/components/primal.py
@@ -3,15 +3,7 @@
 #  Released under the modified BSD license. See COPYING.md for more details.
 
 import logging
-from typing import (
-    Dict,
-    List,
-    Hashable,
-    Any,
-    TYPE_CHECKING,
-    Tuple,
-    Optional,
-)
+from typing import Dict, List, Any, TYPE_CHECKING, Tuple, Optional
 
 import numpy as np
 from overrides import overrides
@@ -21,7 +13,7 @@ from miplearn.classifiers.adaptive import AdaptiveClassifier
 from miplearn.classifiers.threshold import MinPrecisionThreshold, Threshold
 from miplearn.components import classifier_evaluation_dict
 from miplearn.components.component import Component
-from miplearn.features import Sample
+from miplearn.features.sample import Sample
 from miplearn.instance.base import Instance
 from miplearn.types import (
     LearningSolveStats,
@@ -55,8 +47,8 @@ class PrimalSolutionComponent(Component):
         assert isinstance(threshold, Threshold)
         assert mode in ["exact", "heuristic"]
         self.mode = mode
-        self.classifiers: Dict[Hashable, Classifier] = {}
-        self.thresholds: Dict[Hashable, Threshold] = {}
+        self.classifiers: Dict[Category, Classifier] = {}
+        self.thresholds: Dict[Category, Threshold] = {}
         self.threshold_prototype = threshold
         self.classifier_prototype = classifier
 
@@ -103,8 +95,10 @@ class PrimalSolutionComponent(Component):
         )
 
     def sample_predict(self, sample: Sample) -> Solution:
-        assert sample.after_load is not None
-        assert sample.after_load.variables is not None
+        var_names = sample.get_array("static_var_names")
+        var_categories = sample.get_array("static_var_categories")
+        assert var_names is not None
+        assert var_categories is not None
 
         # Compute y_pred
         x, _ = self.sample_xy(None, sample)
@@ -125,12 +119,10 @@ class PrimalSolutionComponent(Component):
             ).T
 
         # Convert y_pred into solution
-        assert sample.after_load.variables.names is not None
-        assert sample.after_load.variables.categories is not None
-        solution: Solution = {v: None for v in sample.after_load.variables.names}
-        category_offset: Dict[Hashable, int] = {cat: 0 for cat in x.keys()}
-        for (i, var_name) in enumerate(sample.after_load.variables.names):
-            category = sample.after_load.variables.categories[i]
+        solution: Solution = {v: None for v in var_names}
+        category_offset: Dict[Category, int] = {cat: 0 for cat in x.keys()}
+        for (i, var_name) in enumerate(var_names):
+            category = var_categories[i]
             if category not in category_offset:
                 continue
             offset = category_offset[category]
@@ -150,40 +142,41 @@ class PrimalSolutionComponent(Component):
     ) -> Tuple[Dict[Category, List[List[float]]], Dict[Category, List[List[float]]]]:
         x: Dict = {}
         y: Dict = {}
-        assert sample.after_load is not None
-        assert sample.after_load.instance is not None
-        assert sample.after_load.variables is not None
-        assert sample.after_load.variables.names is not None
-        assert sample.after_load.variables.categories is not None
-
-        for (i, var_name) in enumerate(sample.after_load.variables.names):
+        instance_features = sample.get_array("static_instance_features")
+        mip_var_values = sample.get_array("mip_var_values")
+        var_features = sample.get_array("lp_var_features")
+        var_names = sample.get_array("static_var_names")
+        var_categories = sample.get_array("static_var_categories")
+        if var_features is None:
+            var_features = sample.get_array("static_var_features")
+        assert instance_features is not None
+        assert var_features is not None
+        assert var_names is not None
+        assert var_categories is not None
+
+        for (i, var_name) in enumerate(var_names):
             # Initialize categories
-            category = sample.after_load.variables.categories[i]
-            if category is None:
+            category = var_categories[i]
+            if len(category) == 0:
                 continue
             if category not in x.keys():
                 x[category] = []
                 y[category] = []
 
             # Features
-            features = list(sample.after_load.instance.to_list())
-            features.extend(sample.after_load.variables.to_list(i))
-            if sample.after_lp is not None:
-                assert sample.after_lp.variables is not None
-                features.extend(sample.after_lp.variables.to_list(i))
+            features = list(instance_features)
+            features.extend(var_features[i])
             x[category].append(features)
 
             # Labels
-            if sample.after_mip is not None:
-                assert sample.after_mip.variables is not None
-                assert sample.after_mip.variables.values is not None
-                opt_value = sample.after_mip.variables.values[i]
+            if mip_var_values is not None:
+                opt_value = mip_var_values[i]
                 assert opt_value is not None
                 assert 0.0 - 1e-5 <= opt_value <= 1.0 + 1e-5, (
                     f"Variable {var_name} has non-binary value {opt_value} in the "
                     "optimal solution. Predicting values of non-binary "
                     "variables is not currently supported. Please set its "
-                    "category to None."
+                    "category to ''."
                 )
                 y[category].append([opt_value < 0.5, opt_value >= 0.5])
         return x, y
@@ -193,15 +186,14 @@ class PrimalSolutionComponent(Component):
         self,
         _: Optional[Instance],
         sample: Sample,
-    ) -> Dict[Hashable, Dict[str, float]]:
-        assert sample.after_mip is not None
-        assert sample.after_mip.variables is not None
-        assert sample.after_mip.variables.values is not None
-        assert sample.after_mip.variables.names is not None
+    ) -> Dict[str, Dict[str, float]]:
+        mip_var_values = sample.get_array("mip_var_values")
+        var_names = sample.get_array("static_var_names")
+        assert mip_var_values is not None
+        assert var_names is not None
 
         solution_actual = {
-            var_name: sample.after_mip.variables.values[i]
-            for (i, var_name) in enumerate(sample.after_mip.variables.names)
+            var_name: mip_var_values[i] for (i, var_name) in enumerate(var_names)
         }
         solution_pred = self.sample_predict(sample)
         vars_all, vars_one, vars_zero = set(), set(), set()
@@ -221,13 +213,13 @@ class PrimalSolutionComponent(Component):
         pred_one_negative = vars_all - pred_one_positive
         pred_zero_negative = vars_all - pred_zero_positive
         return {
-            0: classifier_evaluation_dict(
+            "0": classifier_evaluation_dict(
                 tp=len(pred_zero_positive & vars_zero),
                 tn=len(pred_zero_negative & vars_one),
                 fp=len(pred_zero_positive & vars_one),
                 fn=len(pred_zero_negative & vars_zero),
             ),
-            1: classifier_evaluation_dict(
+            "1": classifier_evaluation_dict(
                 tp=len(pred_one_positive & vars_one),
                 tn=len(pred_one_negative & vars_zero),
                 fp=len(pred_one_positive & vars_zero),
@@ -238,8 +230,8 @@ class PrimalSolutionComponent(Component):
     @overrides
     def fit_xy(
         self,
-        x: Dict[Hashable, np.ndarray],
-        y: Dict[Hashable, np.ndarray],
+        x: Dict[Category, np.ndarray],
+        y: Dict[Category, np.ndarray],
     ) -> None:
         for category in x.keys():
             clf = self.classifier_prototype.clone()
diff --git a/miplearn/components/static_lazy.py b/miplearn/components/static_lazy.py
index 141bbcb..e819755 100644
--- a/miplearn/components/static_lazy.py
+++ b/miplearn/components/static_lazy.py
@@ -3,7 +3,7 @@
 #  Released under the modified BSD license. See COPYING.md for more details.
 
 import logging
-from typing import Dict, Tuple, List, Hashable, Any, TYPE_CHECKING, Set, Optional
+from typing import Dict, Tuple, List, Any, TYPE_CHECKING, Set, Optional
 
 import numpy as np
 from overrides import overrides
@@ -12,9 +12,10 @@ from miplearn.classifiers import Classifier
 from miplearn.classifiers.counting import CountingClassifier
 from miplearn.classifiers.threshold import MinProbabilityThreshold, Threshold
 from miplearn.components.component import Component
-from miplearn.features import Sample, ConstraintFeatures
+from miplearn.features.sample import Sample
+from miplearn.solvers.internal import Constraints
 from miplearn.instance.base import Instance
-from miplearn.types import LearningSolveStats
+from miplearn.types import LearningSolveStats, ConstraintName, ConstraintCategory
 
 logger = logging.getLogger(__name__)
 
@@ -23,7 +24,7 @@ if TYPE_CHECKING:
 
 
 class LazyConstraint:
-    def __init__(self, cid: str, obj: Any) -> None:
+    def __init__(self, cid: ConstraintName, obj: Any) -> None:
         self.cid = cid
         self.obj = obj
 
@@ -43,11 +44,11 @@ class StaticLazyConstraintsComponent(Component):
         assert isinstance(classifier, Classifier)
         self.classifier_prototype: Classifier = classifier
         self.threshold_prototype: Threshold = threshold
-        self.classifiers: Dict[Hashable, Classifier] = {}
-        self.thresholds: Dict[Hashable, Threshold] = {}
-        self.pool: ConstraintFeatures = ConstraintFeatures()
+        self.classifiers: Dict[ConstraintCategory, Classifier] = {}
+        self.thresholds: Dict[ConstraintCategory, Threshold] = {}
+        self.pool: Constraints = Constraints()
         self.violation_tolerance: float = violation_tolerance
-        self.enforced_cids: Set[Hashable] = set()
+        self.enforced_cids: Set[ConstraintName] = set()
         self.n_restored: int = 0
         self.n_iterations: int = 0
 
@@ -60,9 +61,10 @@ class StaticLazyConstraintsComponent(Component):
         stats: LearningSolveStats,
         sample: Sample,
     ) -> None:
-        assert sample.after_mip is not None
-        assert sample.after_mip.extra is not None
-        sample.after_mip.extra["lazy_enforced"] = self.enforced_cids
+        sample.put_array(
+            "mip_constr_lazy_enforced",
+            np.array(list(self.enforced_cids), dtype="S"),
+        )
         stats["LazyStatic: Restored"] = self.n_restored
         stats["LazyStatic: Iterations"] = self.n_iterations
 
@@ -76,16 +78,15 @@ class StaticLazyConstraintsComponent(Component):
         sample: Sample,
     ) -> None:
         assert solver.internal_solver is not None
-        assert sample.after_load is not None
-        assert sample.after_load.instance is not None
+        static_lazy_count = sample.get_scalar("static_constr_lazy_count")
+        assert static_lazy_count is not None
 
         logger.info("Predicting violated (static) lazy constraints...")
-        if sample.after_load.instance.lazy_constraint_count == 0:
+        if static_lazy_count == 0:
             logger.info("Instance does not have static lazy constraints. Skipping.")
         self.enforced_cids = set(self.sample_predict(sample))
         logger.info("Moving lazy constraints to the pool...")
-        constraints = sample.after_load.constraints
-        assert constraints is not None
+        constraints = Constraints.from_sample(sample)
         assert constraints.lazy is not None
         assert constraints.names is not None
         selected = [
@@ -107,8 +108,8 @@ class StaticLazyConstraintsComponent(Component):
     @overrides
     def fit_xy(
         self,
-        x: Dict[Hashable, np.ndarray],
-        y: Dict[Hashable, np.ndarray],
+        x: Dict[ConstraintCategory, np.ndarray],
+        y: Dict[ConstraintCategory, np.ndarray],
     ) -> None:
         for c in y.keys():
             assert c in x
@@ -138,9 +139,9 @@ class StaticLazyConstraintsComponent(Component):
     ) -> None:
         self._check_and_add(solver)
 
-    def sample_predict(self, sample: Sample) -> List[Hashable]:
+    def sample_predict(self, sample: Sample) -> List[ConstraintName]:
         x, y, cids = self._sample_xy_with_cids(sample)
-        enforced_cids: List[Hashable] = []
+        enforced_cids: List[ConstraintName] = []
         for category in x.keys():
             if category not in self.classifiers:
                 continue
@@ -158,7 +159,10 @@ class StaticLazyConstraintsComponent(Component):
         self,
         _: Optional[Instance],
         sample: Sample,
-    ) -> Tuple[Dict[Hashable, List[List[float]]], Dict[Hashable, List[List[float]]]]:
+    ) -> Tuple[
+        Dict[ConstraintCategory, List[List[float]]],
+        Dict[ConstraintCategory, List[List[float]]],
+    ]:
         x, y, __ = self._sample_xy_with_cids(sample)
         return x, y
 
@@ -185,7 +189,7 @@ class StaticLazyConstraintsComponent(Component):
         logger.info(f"Found {n_violated} violated lazy constraints found")
         if n_violated > 0:
             logger.info(
-                "Enforcing {n_violated} lazy constraints; "
+                f"Enforcing {n_violated} lazy constraints; "
                 f"{n_satisfied} left in the pool..."
             )
             solver.internal_solver.add_constraints(violated_constraints)
@@ -199,25 +203,34 @@ class StaticLazyConstraintsComponent(Component):
     def _sample_xy_with_cids(
         self, sample: Sample
     ) -> Tuple[
-        Dict[Hashable, List[List[float]]],
-        Dict[Hashable, List[List[float]]],
-        Dict[Hashable, List[str]],
+        Dict[ConstraintCategory, List[List[float]]],
+        Dict[ConstraintCategory, List[List[float]]],
+        Dict[ConstraintCategory, List[ConstraintName]],
     ]:
-        x: Dict[Hashable, List[List[float]]] = {}
-        y: Dict[Hashable, List[List[float]]] = {}
-        cids: Dict[Hashable, List[str]] = {}
-        assert sample.after_load is not None
-        constraints = sample.after_load.constraints
-        assert constraints is not None
-        assert constraints.names is not None
-        assert constraints.lazy is not None
-        assert constraints.categories is not None
-        for (cidx, cname) in enumerate(constraints.names):
+        x: Dict[ConstraintCategory, List[List[float]]] = {}
+        y: Dict[ConstraintCategory, List[List[float]]] = {}
+        cids: Dict[ConstraintCategory, List[ConstraintName]] = {}
+        instance_features = sample.get_array("static_instance_features")
+        constr_features = sample.get_array("lp_constr_features")
+        constr_names = sample.get_array("static_constr_names")
+        constr_categories = sample.get_array("static_constr_categories")
+        constr_lazy = sample.get_array("static_constr_lazy")
+        lazy_enforced = sample.get_array("mip_constr_lazy_enforced")
+        if constr_features is None:
+            constr_features = sample.get_array("static_constr_features")
+
+        assert instance_features is not None
+        assert constr_features is not None
+        assert constr_names is not None
+        assert constr_categories is not None
+        assert constr_lazy is not None
+
+        for (cidx, cname) in enumerate(constr_names):
             # Initialize categories
-            if not constraints.lazy[cidx]:
+            if not constr_lazy[cidx]:
                 continue
-            category = constraints.categories[cidx]
-            if category is None:
+            category = constr_categories[cidx]
+            if len(category) == 0:
                 continue
             if category not in x:
                 x[category] = []
@@ -225,23 +238,14 @@ class StaticLazyConstraintsComponent(Component):
                 cids[category] = []
 
             # Features
-            sf = sample.after_load
-            if sample.after_lp is not None:
-                sf = sample.after_lp
-            assert sf.instance is not None
-            assert sf.constraints is not None
-            features = list(sf.instance.to_list())
-            features.extend(sf.constraints.to_list(cidx))
+            features = list(instance_features)
+            features.extend(constr_features[cidx])
             x[category].append(features)
             cids[category].append(cname)
 
             # Labels
-            if (
-                (sample.after_mip is not None)
-                and (sample.after_mip.extra is not None)
-                and ("lazy_enforced" in sample.after_mip.extra)
-            ):
-                if cname in sample.after_mip.extra["lazy_enforced"]:
+            if lazy_enforced is not None:
+                if cname in lazy_enforced:
                     y[category] += [[False, True]]
                 else:
                     y[category] += [[True, False]]
diff --git a/miplearn/features.py b/miplearn/features.py
deleted file mode 100644
index 1dccf03..0000000
--- a/miplearn/features.py
+++ /dev/null
@@ -1,384 +0,0 @@
-#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
-#  Released under the modified BSD license. See COPYING.md for more details.
-
-import collections
-import numbers
-from dataclasses import dataclass
-from math import log, isfinite
-from typing import TYPE_CHECKING, Dict, Optional, List, Hashable, Tuple
-
-import numpy as np
-
-if TYPE_CHECKING:
-    from miplearn.solvers.internal import InternalSolver, LPSolveStats, MIPSolveStats
-    from miplearn.instance.base import Instance
-
-
-@dataclass
-class InstanceFeatures:
-    user_features: Optional[List[float]] = None
-    lazy_constraint_count: int = 0
-
-    def to_list(self) -> List[float]:
-        features: List[float] = []
-        if self.user_features is not None:
-            features.extend(self.user_features)
-        _clip(features)
-        return features
-
-
-@dataclass
-class VariableFeatures:
-    names: Optional[List[str]] = None
-    basis_status: Optional[List[str]] = None
-    categories: Optional[List[Optional[Hashable]]] = None
-    lower_bounds: Optional[List[float]] = None
-    obj_coeffs: Optional[List[float]] = None
-    reduced_costs: Optional[List[float]] = None
-    sa_lb_down: Optional[List[float]] = None
-    sa_lb_up: Optional[List[float]] = None
-    sa_obj_down: Optional[List[float]] = None
-    sa_obj_up: Optional[List[float]] = None
-    sa_ub_down: Optional[List[float]] = None
-    sa_ub_up: Optional[List[float]] = None
-    types: Optional[List[str]] = None
-    upper_bounds: Optional[List[float]] = None
-    user_features: Optional[List[Optional[List[float]]]] = None
-    values: Optional[List[float]] = None
-
-    # Alvarez, A. M., Louveaux, Q., & Wehenkel, L. (2017). A machine learning-based
-    # approximation of strong branching. INFORMS Journal on Computing, 29(1), 185-195.
-    alvarez_2017: Optional[List[List[float]]] = None
-
-    def to_list(self, index: int) -> List[float]:
-        features: List[float] = []
-        for attr in [
-            "lower_bounds",
-            "obj_coeffs",
-            "reduced_costs",
-            "sa_lb_down",
-            "sa_lb_up",
-            "sa_obj_down",
-            "sa_obj_up",
-            "sa_ub_down",
-            "sa_ub_up",
-            "upper_bounds",
-            "values",
-        ]:
-            if getattr(self, attr) is not None:
-                features.append(getattr(self, attr)[index])
-        for attr in ["user_features", "alvarez_2017"]:
-            if getattr(self, attr) is not None:
-                if getattr(self, attr)[index] is not None:
-                    features.extend(getattr(self, attr)[index])
-        _clip(features)
-        return features
-
-
-@dataclass
-class ConstraintFeatures:
-    basis_status: Optional[List[str]] = None
-    categories: Optional[List[Optional[Hashable]]] = None
-    dual_values: Optional[List[float]] = None
-    names: Optional[List[str]] = None
-    lazy: Optional[List[bool]] = None
-    lhs: Optional[List[List[Tuple[str, float]]]] = None
-    rhs: Optional[List[float]] = None
-    sa_rhs_down: Optional[List[float]] = None
-    sa_rhs_up: Optional[List[float]] = None
-    senses: Optional[List[str]] = None
-    slacks: Optional[List[float]] = None
-    user_features: Optional[List[Optional[List[float]]]] = None
-
-    def to_list(self, index: int) -> List[float]:
-        features: List[float] = []
-        for attr in [
-            "dual_values",
-            "rhs",
-            "slacks",
-        ]:
-            if getattr(self, attr) is not None:
-                features.append(getattr(self, attr)[index])
-        for attr in ["user_features"]:
-            if getattr(self, attr) is not None:
-                if getattr(self, attr)[index] is not None:
-                    features.extend(getattr(self, attr)[index])
-        _clip(features)
-        return features
-
-    def __getitem__(self, selected: List[bool]) -> "ConstraintFeatures":
-        return ConstraintFeatures(
-            basis_status=self._filter(self.basis_status, selected),
-            categories=self._filter(self.categories, selected),
-            dual_values=self._filter(self.dual_values, selected),
-            names=self._filter(self.names, selected),
-            lazy=self._filter(self.lazy, selected),
-            lhs=self._filter(self.lhs, selected),
-            rhs=self._filter(self.rhs, selected),
-            sa_rhs_down=self._filter(self.sa_rhs_down, selected),
-            sa_rhs_up=self._filter(self.sa_rhs_up, selected),
-            senses=self._filter(self.senses, selected),
-            slacks=self._filter(self.slacks, selected),
-            user_features=self._filter(self.user_features, selected),
-        )
-
-    def _filter(
-        self,
-        obj: Optional[List],
-        selected: List[bool],
-    ) -> Optional[List]:
-        if obj is None:
-            return None
-        return [obj[i] for (i, selected_i) in enumerate(selected) if selected_i]
-
-
-@dataclass
-class Features:
-    instance: Optional[InstanceFeatures] = None
-    variables: Optional[VariableFeatures] = None
-    constraints: Optional[ConstraintFeatures] = None
-    lp_solve: Optional["LPSolveStats"] = None
-    mip_solve: Optional["MIPSolveStats"] = None
-    extra: Optional[Dict] = None
-
-
-@dataclass
-class Sample:
-    after_load: Optional[Features] = None
-    after_lp: Optional[Features] = None
-    after_mip: Optional[Features] = None
-
-
-class FeaturesExtractor:
-    def __init__(
-        self,
-        with_sa: bool = True,
-        with_lhs: bool = True,
-    ) -> None:
-        self.with_sa = with_sa
-        self.with_lhs = with_lhs
-
-    def extract(
-        self,
-        instance: "Instance",
-        solver: "InternalSolver",
-        with_static: bool = True,
-    ) -> Features:
-        features = Features()
-        features.variables = solver.get_variables(
-            with_static=with_static,
-            with_sa=self.with_sa,
-        )
-        features.constraints = solver.get_constraints(
-            with_static=with_static,
-            with_sa=self.with_sa,
-            with_lhs=self.with_lhs,
-        )
-        if with_static:
-            self._extract_user_features_vars(instance, features)
-            self._extract_user_features_constrs(instance, features)
-            self._extract_user_features_instance(instance, features)
-            self._extract_alvarez_2017(features)
-        return features
-
-    def _extract_user_features_vars(
-        self,
-        instance: "Instance",
-        features: Features,
-    ) -> None:
-        assert features.variables is not None
-        assert features.variables.names is not None
-        categories: List[Optional[Hashable]] = []
-        user_features: List[Optional[List[float]]] = []
-        var_features_dict = instance.get_variable_features()
-        var_categories_dict = instance.get_variable_categories()
-
-        for (i, var_name) in enumerate(features.variables.names):
-            if var_name not in var_categories_dict:
-                user_features.append(None)
-                categories.append(None)
-                continue
-            category: Hashable = var_categories_dict[var_name]
-            assert isinstance(category, collections.Hashable), (
-                f"Variable category must be be hashable. "
-                f"Found {type(category).__name__} instead for var={var_name}."
-            )
-            categories.append(category)
-            user_features_i: Optional[List[float]] = None
-            if var_name in var_features_dict:
-                user_features_i = var_features_dict[var_name]
-                if isinstance(user_features_i, np.ndarray):
-                    user_features_i = user_features_i.tolist()
-                assert isinstance(user_features_i, list), (
-                    f"Variable features must be a list. "
-                    f"Found {type(user_features_i).__name__} instead for "
-                    f"var={var_name}."
-                )
-                for v in user_features_i:
-                    assert isinstance(v, numbers.Real), (
-                        f"Variable features must be a list of numbers. "
-                        f"Found {type(v).__name__} instead "
-                        f"for var={var_name}."
-                    )
-                user_features_i = list(user_features_i)
-            user_features.append(user_features_i)
-        features.variables.categories = categories
-        features.variables.user_features = user_features
-
-    def _extract_user_features_constrs(
-        self,
-        instance: "Instance",
-        features: Features,
-    ) -> None:
-        assert features.constraints is not None
-        assert features.constraints.names is not None
-        has_static_lazy = instance.has_static_lazy_constraints()
-        user_features: List[Optional[List[float]]] = []
-        categories: List[Optional[Hashable]] = []
-        lazy: List[bool] = []
-        constr_categories_dict = instance.get_constraint_categories()
-        constr_features_dict = instance.get_constraint_features()
-
-        for (cidx, cname) in enumerate(features.constraints.names):
-            category: Optional[Hashable] = cname
-            if cname in constr_categories_dict:
-                category = constr_categories_dict[cname]
-            if category is None:
-                user_features.append(None)
-                categories.append(None)
-                continue
-            assert isinstance(category, collections.Hashable), (
-                f"Constraint category must be hashable. "
-                f"Found {type(category).__name__} instead for cname={cname}.",
-            )
-            categories.append(category)
-            cf: Optional[List[float]] = None
-            if cname in constr_features_dict:
-                cf = constr_features_dict[cname]
-                if isinstance(cf, np.ndarray):
-                    cf = cf.tolist()
-                assert isinstance(cf, list), (
-                    f"Constraint features must be a list. "
-                    f"Found {type(cf).__name__} instead for cname={cname}."
-                )
-                for f in cf:
-                    assert isinstance(f, numbers.Real), (
-                        f"Constraint features must be a list of numbers. "
-                        f"Found {type(f).__name__} instead for cname={cname}."
-                    )
-                cf = list(cf)
-            user_features.append(cf)
-            if has_static_lazy:
-                lazy.append(instance.is_constraint_lazy(cname))
-            else:
-                lazy.append(False)
-        features.constraints.user_features = user_features
-        features.constraints.lazy = lazy
-        features.constraints.categories = categories
-
-    def _extract_user_features_instance(
-        self,
-        instance: "Instance",
-        features: Features,
-    ) -> None:
-        user_features = instance.get_instance_features()
-        if isinstance(user_features, np.ndarray):
-            user_features = user_features.tolist()
-        assert isinstance(user_features, list), (
-            f"Instance features must be a list. "
-            f"Found {type(user_features).__name__} instead."
-        )
-        for v in user_features:
-            assert isinstance(v, numbers.Real), (
-                f"Instance features must be a list of numbers. "
-                f"Found {type(v).__name__} instead."
-            )
-        assert features.constraints is not None
-        assert features.constraints.lazy is not None
-        features.instance = InstanceFeatures(
-            user_features=user_features,
-            lazy_constraint_count=sum(features.constraints.lazy),
-        )
-
-    def _extract_alvarez_2017(self, features: Features) -> None:
-        assert features.variables is not None
-        assert features.variables.names is not None
-
-        obj_coeffs = features.variables.obj_coeffs
-        obj_sa_down = features.variables.sa_obj_down
-        obj_sa_up = features.variables.sa_obj_up
-        values = features.variables.values
-
-        pos_obj_coeff_sum = 0.0
-        neg_obj_coeff_sum = 0.0
-        if obj_coeffs is not None:
-            for coeff in obj_coeffs:
-                if coeff > 0:
-                    pos_obj_coeff_sum += coeff
-                if coeff < 0:
-                    neg_obj_coeff_sum += -coeff
-
-        features.variables.alvarez_2017 = []
-        for i in range(len(features.variables.names)):
-            f: List[float] = []
-            if obj_coeffs is not None:
-                # Feature 1
-                f.append(np.sign(obj_coeffs[i]))
-
-                # Feature 2
-                if pos_obj_coeff_sum > 0:
-                    f.append(abs(obj_coeffs[i]) / pos_obj_coeff_sum)
-                else:
-                    f.append(0.0)
-
-                # Feature 3
-                if neg_obj_coeff_sum > 0:
-                    f.append(abs(obj_coeffs[i]) / neg_obj_coeff_sum)
-                else:
-                    f.append(0.0)
-
-            if values is not None:
-                # Feature 37
-                f.append(
-                    min(
-                        values[i] - np.floor(values[i]),
-                        np.ceil(values[i]) - values[i],
-                    )
-                )
-
-            if obj_sa_up is not None:
-                assert obj_sa_down is not None
-                assert obj_coeffs is not None
-
-                # Convert inf into large finite numbers
-                sd = max(-1e20, obj_sa_down[i])
-                su = min(1e20, obj_sa_up[i])
-                obj = obj_coeffs[i]
-
-                # Features 44 and 46
-                f.append(np.sign(obj_sa_up[i]))
-                f.append(np.sign(obj_sa_down[i]))
-
-                # Feature 47
-                csign = np.sign(obj)
-                if csign != 0 and ((obj - sd) / csign) > 0.001:
-                    f.append(log((obj - sd) / csign))
-                else:
-                    f.append(0.0)
-
-                # Feature 48
-                if csign != 0 and ((su - obj) / csign) > 0.001:
-                    f.append(log((su - obj) / csign))
-                else:
-                    f.append(0.0)
-
-            for v in f:
-                assert isfinite(v), f"non-finite elements detected: {f}"
-            features.variables.alvarez_2017.append(f)
-
-
-def _clip(v: List[float]) -> None:
-    for (i, vi) in enumerate(v):
-        if not isfinite(vi):
-            v[i] = max(min(vi, 1e20), -1e20)
diff --git a/miplearn/features/__init__.py b/miplearn/features/__init__.py
new file mode 100644
index 0000000..5fbccb1
--- /dev/null
+++ b/miplearn/features/__init__.py
@@ -0,0 +1,3 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
diff --git a/miplearn/features/extractor.py b/miplearn/features/extractor.py
new file mode 100644
index 0000000..2f057e9
--- /dev/null
+++ b/miplearn/features/extractor.py
@@ -0,0 +1,432 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+
+from math import log, isfinite
+from typing import TYPE_CHECKING, List, Tuple, Optional
+
+import numpy as np
+from scipy.sparse import coo_matrix
+
+from miplearn.features.sample import Sample
+from miplearn.solvers.internal import LPSolveStats
+
+if TYPE_CHECKING:
+    from miplearn.solvers.internal import InternalSolver
+    from miplearn.instance.base import Instance
+
+
+# noinspection PyPep8Naming
+class FeaturesExtractor:
+    def __init__(
+        self,
+        with_sa: bool = True,
+        with_lhs: bool = True,
+    ) -> None:
+        self.with_sa = with_sa
+        self.with_lhs = with_lhs
+        self.var_features_user: Optional[np.ndarray] = None
+
+    def extract_after_load_features(
+        self,
+        instance: "Instance",
+        solver: "InternalSolver",
+        sample: Sample,
+    ) -> None:
+        variables = solver.get_variables(with_static=True)
+        constraints = solver.get_constraints(with_static=True, with_lhs=self.with_lhs)
+        assert constraints.names is not None
+        sample.put_array("static_var_lower_bounds", variables.lower_bounds)
+        sample.put_array("static_var_names", variables.names)
+        sample.put_array("static_var_obj_coeffs", variables.obj_coeffs)
+        sample.put_array("static_var_types", variables.types)
+        sample.put_array("static_var_upper_bounds", variables.upper_bounds)
+        sample.put_array("static_constr_names", constraints.names)
+        sample.put_sparse("static_constr_lhs", constraints.lhs)
+        sample.put_array("static_constr_rhs", constraints.rhs)
+        sample.put_array("static_constr_senses", constraints.senses)
+
+        # Instance features
+        self._extract_user_features_instance(instance, sample)
+
+        # Constraint features
+        (
+            constr_features,
+            constr_categories,
+            constr_lazy,
+        ) = FeaturesExtractor._extract_user_features_constrs(
+            instance,
+            constraints.names,
+        )
+        sample.put_array("static_constr_features", constr_features)
+        sample.put_array("static_constr_categories", constr_categories)
+        sample.put_array("static_constr_lazy", constr_lazy)
+        sample.put_scalar("static_constr_lazy_count", int(constr_lazy.sum()))
+
+        # Variable features
+        (
+            vars_features_user,
+            var_categories,
+        ) = self._extract_user_features_vars(instance, sample)
+        self.var_features_user = vars_features_user
+        sample.put_array("static_var_categories", var_categories)
+        assert variables.lower_bounds is not None
+        assert variables.obj_coeffs is not None
+        assert variables.upper_bounds is not None
+        sample.put_array(
+            "static_var_features",
+            np.hstack(
+                [
+                    vars_features_user,
+                    self._compute_AlvLouWeh2017(
+                        A=constraints.lhs,
+                        b=constraints.rhs,
+                        c=variables.obj_coeffs,
+                    ),
+                ]
+            ),
+        )
+
+    def extract_after_lp_features(
+        self,
+        solver: "InternalSolver",
+        sample: Sample,
+        lp_stats: LPSolveStats,
+    ) -> None:
+        for (k, v) in lp_stats.__dict__.items():
+            sample.put_scalar(k, v)
+        variables = solver.get_variables(with_static=False, with_sa=self.with_sa)
+        constraints = solver.get_constraints(with_static=False, with_sa=self.with_sa)
+        sample.put_array("lp_var_basis_status", variables.basis_status)
+        sample.put_array("lp_var_reduced_costs", variables.reduced_costs)
+        sample.put_array("lp_var_sa_lb_down", variables.sa_lb_down)
+        sample.put_array("lp_var_sa_lb_up", variables.sa_lb_up)
+        sample.put_array("lp_var_sa_obj_down", variables.sa_obj_down)
+        sample.put_array("lp_var_sa_obj_up", variables.sa_obj_up)
+        sample.put_array("lp_var_sa_ub_down", variables.sa_ub_down)
+        sample.put_array("lp_var_sa_ub_up", variables.sa_ub_up)
+        sample.put_array("lp_var_values", variables.values)
+        sample.put_array("lp_constr_basis_status", constraints.basis_status)
+        sample.put_array("lp_constr_dual_values", constraints.dual_values)
+        sample.put_array("lp_constr_sa_rhs_down", constraints.sa_rhs_down)
+        sample.put_array("lp_constr_sa_rhs_up", constraints.sa_rhs_up)
+        sample.put_array("lp_constr_slacks", constraints.slacks)
+
+        # Variable features
+        lp_var_features_list = []
+        for f in [
+            self.var_features_user,
+            self._compute_AlvLouWeh2017(
+                A=sample.get_sparse("static_constr_lhs"),
+                b=sample.get_array("static_constr_rhs"),
+                c=sample.get_array("static_var_obj_coeffs"),
+                c_sa_up=variables.sa_obj_up,
+                c_sa_down=variables.sa_obj_down,
+                values=variables.values,
+            ),
+        ]:
+            if f is not None:
+                lp_var_features_list.append(f)
+        for f in [
+            variables.reduced_costs,
+            variables.sa_lb_down,
+            variables.sa_lb_up,
+            variables.sa_obj_down,
+            variables.sa_obj_up,
+            variables.sa_ub_down,
+            variables.sa_ub_up,
+            variables.values,
+        ]:
+            if f is not None:
+                lp_var_features_list.append(f.reshape(-1, 1))
+        lp_var_features = np.hstack(lp_var_features_list)
+        _fix_infinity(lp_var_features)
+        sample.put_array("lp_var_features", lp_var_features)
+
+        # Constraint features
+        lp_constr_features_list = []
+        for f in [sample.get_array("static_constr_features")]:
+            if f is not None:
+                lp_constr_features_list.append(f)
+        for f in [
+            sample.get_array("lp_constr_dual_values"),
+            sample.get_array("lp_constr_sa_rhs_down"),
+            sample.get_array("lp_constr_sa_rhs_up"),
+            sample.get_array("lp_constr_slacks"),
+        ]:
+            if f is not None:
+                lp_constr_features_list.append(f.reshape(-1, 1))
+        lp_constr_features = np.hstack(lp_constr_features_list)
+        _fix_infinity(lp_constr_features)
+        sample.put_array("lp_constr_features", lp_constr_features)
+
+        # Build lp_instance_features
+        static_instance_features = sample.get_array("static_instance_features")
+        assert static_instance_features is not None
+        assert lp_stats.lp_value is not None
+        assert lp_stats.lp_wallclock_time is not None
+        sample.put_array(
+            "lp_instance_features",
+            np.hstack(
+                [
+                    static_instance_features,
+                    lp_stats.lp_value,
+                    lp_stats.lp_wallclock_time,
+                ]
+            ),
+        )
+
+    def extract_after_mip_features(
+        self,
+        solver: "InternalSolver",
+        sample: Sample,
+    ) -> None:
+        variables = solver.get_variables(with_static=False, with_sa=False)
+        constraints = solver.get_constraints(with_static=False, with_sa=False)
+        sample.put_array("mip_var_values", variables.values)
+        sample.put_array("mip_constr_slacks", constraints.slacks)
+
+    # noinspection DuplicatedCode
+    def _extract_user_features_vars(
+        self,
+        instance: "Instance",
+        sample: Sample,
+    ) -> Tuple[np.ndarray, np.ndarray]:
+        # Query variable names
+        var_names = sample.get_array("static_var_names")
+        assert var_names is not None
+
+        # Query variable features
+        var_features = instance.get_variable_features(var_names)
+        assert isinstance(var_features, np.ndarray), (
+            f"Variable features must be a numpy array. "
+            f"Found {var_features.__class__} instead."
+        )
+        assert len(var_features.shape) == 2, (
+            f"Variable features must be 2-dimensional array. "
+            f"Found array with shape {var_features.shape} instead."
+        )
+        assert var_features.shape[0] == len(var_names), (
+            f"Variable features must have exactly {len(var_names)} rows. "
+            f"Found {var_features.shape[0]} rows instead."
+        )
+        assert var_features.dtype.kind in ["f"], (
+            f"Variable features must be floating point numbers. "
+            f"Found {var_features.dtype} instead."
+        )
+
+        # Query variable categories
+        var_categories = instance.get_variable_categories(var_names)
+        assert isinstance(var_categories, np.ndarray), (
+            f"Variable categories must be a numpy array. "
+            f"Found {var_categories.__class__} instead."
+        )
+        assert len(var_categories.shape) == 1, (
+            f"Variable categories must be a vector. "
+            f"Found array with shape {var_categories.shape} instead."
+        )
+        assert len(var_categories) == len(var_names), (
+            f"Variable categories must have exactly {len(var_names)} elements. "
+            f"Found {var_categories.shape[0]} elements instead."
+        )
+        assert var_categories.dtype.kind == "S", (
+            f"Variable categories must be a numpy array with dtype='S'. "
+            f"Found {var_categories.dtype} instead."
+        )
+        return var_features, var_categories
+
+    # noinspection DuplicatedCode
+    @classmethod
+    def _extract_user_features_constrs(
+        cls,
+        instance: "Instance",
+        constr_names: np.ndarray,
+    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+        # Query constraint features
+        constr_features = instance.get_constraint_features(constr_names)
+        assert isinstance(constr_features, np.ndarray), (
+            f"get_constraint_features must return a numpy array. "
+            f"Found {constr_features.__class__} instead."
+        )
+        assert len(constr_features.shape) == 2, (
+            f"get_constraint_features must return a 2-dimensional array. "
+            f"Found array with shape {constr_features.shape} instead."
+        )
+        assert constr_features.shape[0] == len(constr_names), (
+            f"get_constraint_features must return an array with {len(constr_names)} "
+            f"rows. Found {constr_features.shape[0]} rows instead."
+        )
+        assert constr_features.dtype.kind in ["f"], (
+            f"get_constraint_features must return floating point numbers. "
+            f"Found {constr_features.dtype} instead."
+        )
+
+        # Query constraint categories
+        constr_categories = instance.get_constraint_categories(constr_names)
+        assert isinstance(constr_categories, np.ndarray), (
+            f"get_constraint_categories must return a numpy array. "
+            f"Found {constr_categories.__class__} instead."
+        )
+        assert len(constr_categories.shape) == 1, (
+            f"get_constraint_categories must return a vector. "
+            f"Found array with shape {constr_categories.shape} instead."
+        )
+        assert len(constr_categories) == len(constr_names), (
+            f"get_constraint_categories must return a vector with {len(constr_names)} "
+            f"elements. Found {constr_categories.shape[0]} elements instead."
+        )
+        assert constr_categories.dtype.kind == "S", (
+            f"get_constraint_categories must return a numpy array with dtype='S'. "
+            f"Found {constr_categories.dtype} instead."
+        )
+
+        # Query constraint lazy attribute
+        constr_lazy = instance.are_constraints_lazy(constr_names)
+        assert isinstance(constr_lazy, np.ndarray), (
+            f"are_constraints_lazy must return a numpy array. "
+            f"Found {constr_lazy.__class__} instead."
+        )
+        assert len(constr_lazy.shape) == 1, (
+            f"are_constraints_lazy must return a vector. "
+            f"Found array with shape {constr_lazy.shape} instead."
+        )
+        assert constr_lazy.shape[0] == len(constr_names), (
+            f"are_constraints_lazy must return a vector with {len(constr_names)} "
+            f"elements. Found {constr_lazy.shape[0]} elements instead."
+        )
+        assert constr_lazy.dtype.kind == "b", (
+            f"are_constraints_lazy must return a boolean array. "
+            f"Found {constr_lazy.dtype} instead."
+        )
+
+        return constr_features, constr_categories, constr_lazy
+
+    def _extract_user_features_instance(
+        self,
+        instance: "Instance",
+        sample: Sample,
+    ) -> None:
+        features = instance.get_instance_features()
+        assert isinstance(features, np.ndarray), (
+            f"Instance features must be a numpy array. "
+            f"Found {features.__class__} instead."
+        )
+        assert len(features.shape) == 1, (
+            f"Instance features must be a vector. "
+            f"Found array with shape {features.shape} instead."
+        )
+        assert features.dtype.kind in [
+            "f"
+        ], f"Instance features have unsupported {features.dtype}"
+        sample.put_array("static_instance_features", features)
+
+    @classmethod
+    def _compute_AlvLouWeh2017(
+        cls,
+        A: Optional[coo_matrix] = None,
+        b: Optional[np.ndarray] = None,
+        c: Optional[np.ndarray] = None,
+        c_sa_down: Optional[np.ndarray] = None,
+        c_sa_up: Optional[np.ndarray] = None,
+        values: Optional[np.ndarray] = None,
+    ) -> np.ndarray:
+        """
+        Computes static variable features described in:
+            Alvarez, A. M., Louveaux, Q., & Wehenkel, L. (2017). A machine learning-based
+            approximation of strong branching. INFORMS Journal on Computing, 29(1),
+            185-195.
+        """
+        assert b is not None
+        assert c is not None
+        nvars = len(c)
+
+        c_pos_sum = c[c > 0].sum()
+        c_neg_sum = -c[c < 0].sum()
+
+        curr = 0
+        max_n_features = 30
+        features = np.zeros((nvars, max_n_features))
+
+        def push(v: np.ndarray) -> None:
+            nonlocal curr
+            features[:, curr] = v
+            curr += 1
+
+        with np.errstate(divide="ignore", invalid="ignore"):
+            # Feature 1
+            push(np.sign(c))
+
+            # Feature 2
+            push(np.abs(c) / c_pos_sum)
+
+            # Feature 3
+            push(np.abs(c) / c_neg_sum)
+
+            if A is not None:
+                assert A.shape[1] == nvars
+                assert A.shape[0] == len(b)
+
+                M1 = A.T.multiply(1.0 / np.abs(b)).T.tocsr()
+                M1_pos = M1[b > 0, :]
+                if M1_pos.shape[0] > 0:
+                    M1_pos_max = M1_pos.max(axis=0).todense()
+                    M1_pos_min = M1_pos.min(axis=0).todense()
+                else:
+                    M1_pos_max = np.zeros(nvars)
+                    M1_pos_min = np.zeros(nvars)
+                M1_neg = M1[b < 0, :]
+                if M1_neg.shape[0] > 0:
+                    M1_neg_max = M1_neg.max(axis=0).todense()
+                    M1_neg_min = M1_neg.min(axis=0).todense()
+                else:
+                    M1_neg_max = np.zeros(nvars)
+                    M1_neg_min = np.zeros(nvars)
+
+                # Features 4-11
+                push(np.sign(M1_pos_min))
+                push(np.sign(M1_pos_max))
+                push(np.abs(M1_pos_min))
+                push(np.abs(M1_pos_max))
+                push(np.sign(M1_neg_min))
+                push(np.sign(M1_neg_max))
+                push(np.abs(M1_neg_min))
+                push(np.abs(M1_neg_max))
+
+            # Feature 37
+            if values is not None:
+                push(
+                    np.minimum(
+                        values - np.floor(values),
+                        np.ceil(values) - values,
+                    )
+                )
+
+            # Feature 44
+            if c_sa_up is not None:
+                push(np.sign(c_sa_up))
+
+            # Feature 46
+            if c_sa_down is not None:
+                push(np.sign(c_sa_down))
+
+            # Feature 47
+            if c_sa_down is not None:
+                push(np.log(c - c_sa_down / np.sign(c)))
+
+            # Feature 48
+            if c_sa_up is not None:
+                push(np.log(c - c_sa_up / np.sign(c)))
+
+        features = features[:, 0:curr]
+        _fix_infinity(features)
+        return features
+
+
+def _fix_infinity(m: Optional[np.ndarray]) -> None:
+    if m is None:
+        return
+    masked = np.ma.masked_invalid(m)
+    max_values = np.max(masked, axis=0)
+    min_values = np.min(masked, axis=0)
+    m[:] = np.maximum(np.minimum(m, max_values), min_values)
+    m[~np.isfinite(m)] = 0.0
diff --git a/miplearn/features/sample.py b/miplearn/features/sample.py
new file mode 100644
index 0000000..a0d6e43
--- /dev/null
+++ b/miplearn/features/sample.py
@@ -0,0 +1,226 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+import warnings
+from abc import ABC, abstractmethod
+from copy import deepcopy
+from typing import Dict, Optional, Any, Union, List, Tuple, cast, Set
+from scipy.sparse import coo_matrix
+
+import h5py
+import numpy as np
+from h5py import Dataset
+from overrides import overrides
+
+Bytes = Union[bytes, bytearray]
+Scalar = Union[None, bool, str, int, float]
+Vector = Union[
+    None,
+    List[bool],
+    List[str],
+    List[int],
+    List[float],
+    List[Optional[str]],
+    np.ndarray,
+]
+VectorList = Union[
+    List[List[bool]],
+    List[List[str]],
+    List[List[int]],
+    List[List[float]],
+    List[Optional[List[bool]]],
+    List[Optional[List[str]]],
+    List[Optional[List[int]]],
+    List[Optional[List[float]]],
+]
+
+
+class Sample(ABC):
+    """Abstract dictionary-like class that stores training data."""
+
+    @abstractmethod
+    def get_scalar(self, key: str) -> Optional[Any]:
+        pass
+
+    @abstractmethod
+    def put_scalar(self, key: str, value: Scalar) -> None:
+        pass
+
+    @abstractmethod
+    def put_array(self, key: str, value: Optional[np.ndarray]) -> None:
+        pass
+
+    @abstractmethod
+    def get_array(self, key: str) -> Optional[np.ndarray]:
+        pass
+
+    @abstractmethod
+    def put_sparse(self, key: str, value: coo_matrix) -> None:
+        pass
+
+    @abstractmethod
+    def get_sparse(self, key: str) -> Optional[coo_matrix]:
+        pass
+
+    def _assert_is_scalar(self, value: Any) -> None:
+        if value is None:
+            return
+        if isinstance(value, (str, bool, int, float, bytes, np.bytes_)):
+            return
+        assert False, f"scalar expected; found instead: {value} ({value.__class__})"
+
+    def _assert_is_array(self, value: np.ndarray) -> None:
+        assert isinstance(
+            value, np.ndarray
+        ), f"np.ndarray expected; found instead: {value.__class__}"
+        assert value.dtype.kind in "biufS", f"Unsupported dtype: {value.dtype}"
+
+    def _assert_is_sparse(self, value: Any) -> None:
+        assert isinstance(
+            value, coo_matrix
+        ), f"coo_matrix expected; found: {value.__class__}"
+        self._assert_is_array(value.data)
+
+
+class MemorySample(Sample):
+    """Dictionary-like class that stores training data in-memory."""
+
+    def __init__(
+        self,
+        data: Optional[Dict[str, Any]] = None,
+    ) -> None:
+        if data is None:
+            data = {}
+        self._data: Dict[str, Any] = data
+
+    @overrides
+    def get_scalar(self, key: str) -> Optional[Any]:
+        return self._get(key)
+
+    @overrides
+    def put_scalar(self, key: str, value: Scalar) -> None:
+        if value is None:
+            return
+        self._assert_is_scalar(value)
+        self._put(key, value)
+
+    def _get(self, key: str) -> Optional[Any]:
+        if key in self._data:
+            return self._data[key]
+        else:
+            return None
+
+    def _put(self, key: str, value: Any) -> None:
+        self._data[key] = value
+
+    @overrides
+    def put_array(self, key: str, value: Optional[np.ndarray]) -> None:
+        if value is None:
+            return
+        self._assert_is_array(value)
+        self._put(key, value)
+
+    @overrides
+    def get_array(self, key: str) -> Optional[np.ndarray]:
+        return cast(Optional[np.ndarray], self._get(key))
+
+    @overrides
+    def put_sparse(self, key: str, value: coo_matrix) -> None:
+        if value is None:
+            return
+        self._assert_is_sparse(value)
+        self._put(key, value)
+
+    @overrides
+    def get_sparse(self, key: str) -> Optional[coo_matrix]:
+        return cast(Optional[coo_matrix], self._get(key))
+
+
+class Hdf5Sample(Sample):
+    """
+    Dictionary-like class that stores training data in an HDF5 file.
+
+    Unlike MemorySample, this class only loads to memory the parts of the data set that
+    are actually accessed, and therefore it is more scalable.
+    """
+
+    def __init__(
+        self,
+        filename: str,
+        mode: str = "r+",
+    ) -> None:
+        self.file = h5py.File(filename, mode, libver="latest")
+
+    @overrides
+    def get_scalar(self, key: str) -> Optional[Any]:
+        if key not in self.file:
+            return None
+        ds = self.file[key]
+        assert (
+            len(ds.shape) == 0
+        ), f"0-dimensional array expected; found shape {ds.shape}"
+        if h5py.check_string_dtype(ds.dtype):
+            return ds.asstr()[()]
+        else:
+            return ds[()].tolist()
+
+    @overrides
+    def put_scalar(self, key: str, value: Any) -> None:
+        if value is None:
+            return
+        self._assert_is_scalar(value)
+        if key in self.file:
+            del self.file[key]
+        self.file.create_dataset(key, data=value)
+
+    @overrides
+    def put_array(self, key: str, value: Optional[np.ndarray]) -> None:
+        if value is None:
+            return
+        self._assert_is_array(value)
+        if value.dtype.kind == "f":
+            value = value.astype("float32")
+        if key in self.file:
+            del self.file[key]
+        return self.file.create_dataset(key, data=value, compression="gzip")
+
+    @overrides
+    def get_array(self, key: str) -> Optional[np.ndarray]:
+        if key not in self.file:
+            return None
+        return self.file[key][:]
+
+    @overrides
+    def put_sparse(self, key: str, value: coo_matrix) -> None:
+        if value is None:
+            return
+        self._assert_is_sparse(value)
+        self.put_array(f"{key}_row", value.row)
+        self.put_array(f"{key}_col", value.col)
+        self.put_array(f"{key}_data", value.data)
+
+    @overrides
+    def get_sparse(self, key: str) -> Optional[coo_matrix]:
+        row = self.get_array(f"{key}_row")
+        if row is None:
+            return None
+        col = self.get_array(f"{key}_col")
+        data = self.get_array(f"{key}_data")
+        assert col is not None
+        assert data is not None
+        return coo_matrix((data, (row, col)))
+
+    def get_bytes(self, key: str) -> Optional[Bytes]:
+        if key not in self.file:
+            return None
+        ds = self.file[key]
+        assert (
+            len(ds.shape) == 1
+        ), f"1-dimensional array expected; found shape {ds.shape}"
+        return ds[()].tobytes()
+
+    def put_bytes(self, key: str, value: Bytes) -> None:
+        assert isinstance(
+            value, (bytes, bytearray)
+        ), f"bytes expected; found: {value.__class__}"  # type: ignore
+        self.put_array(key, np.frombuffer(value, dtype="uint8"))
diff --git a/miplearn/instance/base.py b/miplearn/instance/base.py
index c14df41..01f75e4 100644
--- a/miplearn/instance/base.py
+++ b/miplearn/instance/base.py
@@ -4,9 +4,12 @@
 
 import logging
 from abc import ABC, abstractmethod
-from typing import Any, List, Hashable, TYPE_CHECKING, Dict
+from typing import Any, List, TYPE_CHECKING, Dict
 
-from miplearn.features import Sample
+import numpy as np
+
+from miplearn.features.sample import Sample, MemorySample
+from miplearn.types import ConstraintName, ConstraintCategory
 
 logger = logging.getLogger(__name__)
 
@@ -37,7 +40,7 @@ class Instance(ABC):
         """
         pass
 
-    def get_instance_features(self) -> List[float]:
+    def get_instance_features(self) -> np.ndarray:
         """
         Returns a 1-dimensional array of (numerical) features describing the
         entire instance.
@@ -59,9 +62,9 @@ class Instance(ABC):
 
         By default, returns [0.0].
         """
-        return [0.0]
+        return np.zeros(1)
 
-    def get_variable_features(self) -> Dict[str, List[float]]:
+    def get_variable_features(self, names: np.ndarray) -> np.ndarray:
         """
         Returns dictionary mapping the name of each variable to a (1-dimensional) list
         of numerical features describing a particular decision variable.
@@ -79,11 +82,11 @@ class Instance(ABC):
         If features are not provided for a given variable, MIPLearn will use a
         default set of features.
 
-        By default, returns {}.
+        By default, returns [[0.0], ..., [0.0]].
         """
-        return {}
+        return np.zeros((len(names), 1))
 
-    def get_variable_categories(self) -> Dict[str, Hashable]:
+    def get_variable_categories(self, names: np.ndarray) -> np.ndarray:
         """
         Returns a dictionary mapping the name of each variable to its category.
 
@@ -91,31 +94,27 @@ class Instance(ABC):
         internal ML model to predict the values of both variables. If a variable is not
         listed in the dictionary, ML models will ignore the variable.
 
-        A category can be any hashable type, such as strings, numbers or tuples.
-        By default, returns {}.
+        By default, returns `names`.
         """
-        return {}
-
-    def get_constraint_features(self) -> Dict[str, List[float]]:
-        return {}
+        return names
 
-    def get_constraint_categories(self) -> Dict[str, Hashable]:
-        return {}
+    def get_constraint_features(self, names: np.ndarray) -> np.ndarray:
+        return np.zeros((len(names), 1))
 
-    def has_static_lazy_constraints(self) -> bool:
-        return False
+    def get_constraint_categories(self, names: np.ndarray) -> np.ndarray:
+        return names
 
     def has_dynamic_lazy_constraints(self) -> bool:
         return False
 
-    def is_constraint_lazy(self, cid: str) -> bool:
-        return False
+    def are_constraints_lazy(self, names: np.ndarray) -> np.ndarray:
+        return np.zeros(len(names), dtype=bool)
 
     def find_violated_lazy_constraints(
         self,
         solver: "InternalSolver",
         model: Any,
-    ) -> List[Hashable]:
+    ) -> List[ConstraintName]:
         """
         Returns lazy constraint violations found for the current solution.
 
@@ -125,10 +124,10 @@ class Instance(ABC):
         resolve the problem. The process repeats until no further lazy constraint
         violations are found.
 
-        Each "violation" is simply a string, a tuple or any other hashable type which
-        allows the instance to identify unambiguously which lazy constraint should be
-        generated. In the Traveling Salesman Problem, for example, a subtour
-        violation could be a frozen set containing the cities in the subtour.
+        Each "violation" is simply a string which allows the instance to identify
+        unambiguously which lazy constraint should be generated. In the Traveling
+        Salesman Problem, for example, a subtour violation could be a string
+        containing the cities in the subtour.
 
         The current solution can be queried with `solver.get_solution()`. If the solver
         is configured to use lazy callbacks, this solution may be non-integer.
@@ -141,7 +140,7 @@ class Instance(ABC):
         self,
         solver: "InternalSolver",
         model: Any,
-        violation: Hashable,
+        violation: ConstraintName,
     ) -> None:
         """
         Adds constraints to the model to ensure that the given violation is fixed.
@@ -167,14 +166,14 @@ class Instance(ABC):
     def has_user_cuts(self) -> bool:
         return False
 
-    def find_violated_user_cuts(self, model: Any) -> List[Hashable]:
+    def find_violated_user_cuts(self, model: Any) -> List[ConstraintName]:
         return []
 
     def enforce_user_cut(
         self,
         solver: "InternalSolver",
         model: Any,
-        violation: Hashable,
+        violation: ConstraintName,
     ) -> Any:
         return None
 
@@ -193,5 +192,7 @@ class Instance(ABC):
     def get_samples(self) -> List[Sample]:
         return self._samples
 
-    def push_sample(self, sample: Sample) -> None:
+    def create_sample(self) -> Sample:
+        sample = MemorySample()
         self._samples.append(sample)
+        return sample
diff --git a/miplearn/instance/file.py b/miplearn/instance/file.py
new file mode 100644
index 0000000..46e7609
--- /dev/null
+++ b/miplearn/instance/file.py
@@ -0,0 +1,131 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+import gc
+import os
+from typing import Any, Optional, List, Dict, TYPE_CHECKING
+import pickle
+
+import numpy as np
+from overrides import overrides
+
+from miplearn.features.sample import Hdf5Sample, Sample
+from miplearn.instance.base import Instance
+from miplearn.types import ConstraintName, ConstraintCategory
+
+if TYPE_CHECKING:
+    from miplearn.solvers.learning import InternalSolver
+
+
+class FileInstance(Instance):
+    def __init__(self, filename: str) -> None:
+        super().__init__()
+        assert os.path.exists(filename), f"File not found: {filename}"
+        self.h5 = Hdf5Sample(filename)
+        self.instance: Optional[Instance] = None
+
+    # Delegation
+    # -------------------------------------------------------------------------
+    @overrides
+    def to_model(self) -> Any:
+        assert self.instance is not None
+        return self.instance.to_model()
+
+    @overrides
+    def get_instance_features(self) -> np.ndarray:
+        assert self.instance is not None
+        return self.instance.get_instance_features()
+
+    @overrides
+    def get_variable_features(self, names: np.ndarray) -> np.ndarray:
+        assert self.instance is not None
+        return self.instance.get_variable_features(names)
+
+    @overrides
+    def get_variable_categories(self, names: np.ndarray) -> np.ndarray:
+        assert self.instance is not None
+        return self.instance.get_variable_categories(names)
+
+    @overrides
+    def get_constraint_features(self, names: np.ndarray) -> np.ndarray:
+        assert self.instance is not None
+        return self.instance.get_constraint_features(names)
+
+    @overrides
+    def get_constraint_categories(self, names: np.ndarray) -> np.ndarray:
+        assert self.instance is not None
+        return self.instance.get_constraint_categories(names)
+
+    @overrides
+    def has_dynamic_lazy_constraints(self) -> bool:
+        assert self.instance is not None
+        return self.instance.has_dynamic_lazy_constraints()
+
+    @overrides
+    def are_constraints_lazy(self, names: np.ndarray) -> np.ndarray:
+        assert self.instance is not None
+        return self.instance.are_constraints_lazy(names)
+
+    @overrides
+    def find_violated_lazy_constraints(
+        self,
+        solver: "InternalSolver",
+        model: Any,
+    ) -> List[ConstraintName]:
+        assert self.instance is not None
+        return self.instance.find_violated_lazy_constraints(solver, model)
+
+    @overrides
+    def enforce_lazy_constraint(
+        self,
+        solver: "InternalSolver",
+        model: Any,
+        violation: ConstraintName,
+    ) -> None:
+        assert self.instance is not None
+        self.instance.enforce_lazy_constraint(solver, model, violation)
+
+    @overrides
+    def find_violated_user_cuts(self, model: Any) -> List[ConstraintName]:
+        assert self.instance is not None
+        return self.instance.find_violated_user_cuts(model)
+
+    @overrides
+    def enforce_user_cut(
+        self,
+        solver: "InternalSolver",
+        model: Any,
+        violation: ConstraintName,
+    ) -> None:
+        assert self.instance is not None
+        self.instance.enforce_user_cut(solver, model, violation)
+
+    # Input & Output
+    # -------------------------------------------------------------------------
+    @overrides
+    def free(self) -> None:
+        self.instance = None
+        gc.collect()
+
+    @overrides
+    def load(self) -> None:
+        if self.instance is not None:
+            return
+        pkl = self.h5.get_bytes("pickled")
+        assert pkl is not None
+        self.instance = pickle.loads(pkl)
+        assert isinstance(self.instance, Instance)
+
+    @classmethod
+    def save(cls, instance: Instance, filename: str) -> None:
+        h5 = Hdf5Sample(filename, mode="w")
+        instance_pkl = pickle.dumps(instance)
+        h5.put_bytes("pickled", instance_pkl)
+
+    @overrides
+    def create_sample(self) -> Sample:
+        return self.h5
+
+    @overrides
+    def get_samples(self) -> List[Sample]:
+        return [self.h5]
diff --git a/miplearn/instance/picklegz.py b/miplearn/instance/picklegz.py
index 9cb4e2e..41cf9b2 100644
--- a/miplearn/instance/picklegz.py
+++ b/miplearn/instance/picklegz.py
@@ -6,12 +6,14 @@ import gc
 import gzip
 import os
 import pickle
-from typing import Optional, Any, List, Hashable, cast, IO, TYPE_CHECKING, Dict
+from typing import Optional, Any, List, cast, IO, TYPE_CHECKING, Dict
 
+import numpy as np
 from overrides import overrides
 
-from miplearn.features import Sample
+from miplearn.features.sample import Sample
 from miplearn.instance.base import Instance
+from miplearn.types import ConstraintName, ConstraintCategory
 
 if TYPE_CHECKING:
     from miplearn.solvers.learning import InternalSolver
@@ -42,34 +44,29 @@ class PickleGzInstance(Instance):
         return self.instance.to_model()
 
     @overrides
-    def get_instance_features(self) -> List[float]:
+    def get_instance_features(self) -> np.ndarray:
         assert self.instance is not None
         return self.instance.get_instance_features()
 
     @overrides
-    def get_variable_features(self) -> Dict[str, List[float]]:
+    def get_variable_features(self, names: np.ndarray) -> np.ndarray:
         assert self.instance is not None
-        return self.instance.get_variable_features()
+        return self.instance.get_variable_features(names)
 
     @overrides
-    def get_variable_categories(self) -> Dict[str, Hashable]:
+    def get_variable_categories(self, names: np.ndarray) -> np.ndarray:
         assert self.instance is not None
-        return self.instance.get_variable_categories()
+        return self.instance.get_variable_categories(names)
 
     @overrides
-    def get_constraint_features(self) -> Dict[str, List[float]]:
+    def get_constraint_features(self, names: np.ndarray) -> np.ndarray:
         assert self.instance is not None
-        return self.instance.get_constraint_features()
+        return self.instance.get_constraint_features(names)
 
     @overrides
-    def get_constraint_categories(self) -> Dict[str, Hashable]:
+    def get_constraint_categories(self, names: np.ndarray) -> np.ndarray:
         assert self.instance is not None
-        return self.instance.get_constraint_categories()
-
-    @overrides
-    def has_static_lazy_constraints(self) -> bool:
-        assert self.instance is not None
-        return self.instance.has_static_lazy_constraints()
+        return self.instance.get_constraint_categories(names)
 
     @overrides
     def has_dynamic_lazy_constraints(self) -> bool:
@@ -77,16 +74,16 @@ class PickleGzInstance(Instance):
         return self.instance.has_dynamic_lazy_constraints()
 
     @overrides
-    def is_constraint_lazy(self, cid: str) -> bool:
+    def are_constraints_lazy(self, names: np.ndarray) -> np.ndarray:
         assert self.instance is not None
-        return self.instance.is_constraint_lazy(cid)
+        return self.instance.are_constraints_lazy(names)
 
     @overrides
     def find_violated_lazy_constraints(
         self,
         solver: "InternalSolver",
         model: Any,
-    ) -> List[Hashable]:
+    ) -> List[ConstraintName]:
         assert self.instance is not None
         return self.instance.find_violated_lazy_constraints(solver, model)
 
@@ -95,13 +92,13 @@ class PickleGzInstance(Instance):
         self,
         solver: "InternalSolver",
         model: Any,
-        violation: Hashable,
+        violation: ConstraintName,
     ) -> None:
         assert self.instance is not None
         self.instance.enforce_lazy_constraint(solver, model, violation)
 
     @overrides
-    def find_violated_user_cuts(self, model: Any) -> List[Hashable]:
+    def find_violated_user_cuts(self, model: Any) -> List[ConstraintName]:
         assert self.instance is not None
         return self.instance.find_violated_user_cuts(model)
 
@@ -110,7 +107,7 @@ class PickleGzInstance(Instance):
         self,
         solver: "InternalSolver",
         model: Any,
-        violation: Hashable,
+        violation: ConstraintName,
     ) -> None:
         assert self.instance is not None
         self.instance.enforce_user_cut(solver, model, violation)
@@ -137,9 +134,9 @@ class PickleGzInstance(Instance):
         return self.instance.get_samples()
 
     @overrides
-    def push_sample(self, sample: Sample) -> None:
+    def create_sample(self) -> Sample:
         assert self.instance is not None
-        self.instance.push_sample(sample)
+        return self.instance.create_sample()
 
 
 def write_pickle_gz(obj: Any, filename: str) -> None:
diff --git a/miplearn/problems/knapsack.py b/miplearn/problems/knapsack.py
index 7dfddba..1dd06ef 100644
--- a/miplearn/problems/knapsack.py
+++ b/miplearn/problems/knapsack.py
@@ -1,7 +1,8 @@
 #  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
 #  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
 #  Released under the modified BSD license. See COPYING.md for more details.
-from typing import List, Dict, Optional, Hashable, Any
+
+from typing import List, Dict, Optional
 
 import numpy as np
 import pyomo.environ as pe
@@ -10,7 +11,6 @@ from scipy.stats import uniform, randint, rv_discrete
 from scipy.stats.distributions import rv_frozen
 
 from miplearn.instance.base import Instance
-from miplearn.types import VariableName, Category
 
 
 class ChallengeA:
@@ -94,15 +94,17 @@ class MultiKnapsackInstance(Instance):
         return model
 
     @overrides
-    def get_instance_features(self) -> List[float]:
-        return [float(np.mean(self.prices))] + list(self.capacities)
+    def get_instance_features(self) -> np.ndarray:
+        return np.array([float(np.mean(self.prices))] + list(self.capacities))
 
     @overrides
-    def get_variable_features(self) -> Dict[str, List[float]]:
-        return {
-            f"x[{i}]": [self.prices[i] + list(self.weights[:, i])]
-            for i in range(self.n)
-        }
+    def get_variable_features(self, names: np.ndarray) -> np.ndarray:
+        features = []
+        for i in range(len(self.weights)):
+            f = [self.prices[i]]
+            f.extend(self.weights[:, i])
+            features.append(f)
+        return np.array(features)
 
 
 # noinspection PyPep8Naming
diff --git a/miplearn/problems/stab.py b/miplearn/problems/stab.py
index 423aebc..a64fb3c 100644
--- a/miplearn/problems/stab.py
+++ b/miplearn/problems/stab.py
@@ -1,7 +1,7 @@
 #  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
 #  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
 #  Released under the modified BSD license. See COPYING.md for more details.
-from typing import List, Dict, Hashable
+from typing import List, Dict
 
 import networkx as nx
 import numpy as np
@@ -12,7 +12,6 @@ from scipy.stats import uniform, randint
 from scipy.stats.distributions import rv_frozen
 
 from miplearn.instance.base import Instance
-from miplearn.types import VariableName, Category
 
 
 class ChallengeA:
@@ -67,9 +66,11 @@ class MaxWeightStableSetInstance(Instance):
         return model
 
     @overrides
-    def get_variable_features(self) -> Dict[str, List[float]]:
-        features = {}
-        for v1 in self.nodes:
+    def get_variable_features(self, names: np.ndarray) -> np.ndarray:
+        features = []
+        assert len(names) == len(self.nodes)
+        for i, v1 in enumerate(self.nodes):
+            assert names[i] == f"x[{v1}]".encode()
             neighbor_weights = [0.0] * 15
             neighbor_degrees = [100.0] * 15
             for v2 in self.graph.neighbors(v1):
@@ -81,12 +82,12 @@ class MaxWeightStableSetInstance(Instance):
             f += neighbor_weights[:5]
             f += neighbor_degrees[:5]
             f += [self.graph.degree(v1)]
-            features[f"x[{v1}]"] = f
-        return features
+            features.append(f)
+        return np.array(features)
 
     @overrides
-    def get_variable_categories(self) -> Dict[str, Hashable]:
-        return {f"x[{v}]": "default" for v in self.nodes}
+    def get_variable_categories(self, names: np.ndarray) -> np.ndarray:
+        return np.array(["default" for _ in names], dtype="S")
 
 
 class MaxWeightStableSetGenerator:
diff --git a/miplearn/problems/tsp.py b/miplearn/problems/tsp.py
index 8fa2598..b277e3a 100644
--- a/miplearn/problems/tsp.py
+++ b/miplearn/problems/tsp.py
@@ -1,7 +1,7 @@
 #  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
 #  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
 #  Released under the modified BSD license. See COPYING.md for more details.
-from typing import List, Tuple, FrozenSet, Any, Optional, Hashable, Dict
+from typing import List, Tuple, FrozenSet, Any, Optional, Dict
 
 import networkx as nx
 import numpy as np
@@ -11,10 +11,10 @@ from scipy.spatial.distance import pdist, squareform
 from scipy.stats import uniform, randint
 from scipy.stats.distributions import rv_frozen
 
+from miplearn.instance.base import Instance
 from miplearn.solvers.learning import InternalSolver
 from miplearn.solvers.pyomo.base import BasePyomoSolver
-from miplearn.instance.base import Instance
-from miplearn.types import VariableName, Category
+from miplearn.types import ConstraintName
 
 
 class ChallengeA:
@@ -81,24 +81,19 @@ class TravelingSalesmanInstance(Instance):
             )
         return model
 
-    @overrides
-    def get_variable_categories(self) -> Dict[str, Hashable]:
-        return {f"x[{e}]": f"x[{e}]" for e in self.edges}
-
     @overrides
     def find_violated_lazy_constraints(
         self,
         solver: InternalSolver,
         model: Any,
-    ) -> List[FrozenSet]:
+    ) -> List[ConstraintName]:
         selected_edges = [e for e in self.edges if model.x[e].value > 0.5]
         graph = nx.Graph()
         graph.add_edges_from(selected_edges)
-        components = [frozenset(c) for c in list(nx.connected_components(graph))]
         violations = []
-        for c in components:
+        for c in list(nx.connected_components(graph)):
             if len(c) < self.n_cities:
-                violations += [c]
+                violations.append(",".join(map(str, c)).encode())
         return violations
 
     @overrides
@@ -106,9 +101,10 @@ class TravelingSalesmanInstance(Instance):
         self,
         solver: InternalSolver,
         model: Any,
-        component: FrozenSet,
+        violation: ConstraintName,
     ) -> None:
         assert isinstance(solver, BasePyomoSolver)
+        component = [int(v) for v in violation.decode().split(",")]
         cut_edges = [
             e
             for e in self.edges
diff --git a/miplearn/solvers/__init__.py b/miplearn/solvers/__init__.py
index e172895..060153c 100644
--- a/miplearn/solvers/__init__.py
+++ b/miplearn/solvers/__init__.py
@@ -4,7 +4,7 @@
 
 import logging
 import sys
-from typing import Any, List, TextIO, cast
+from typing import Any, List, TextIO, cast, TypeVar, Optional, Sized
 
 logger = logging.getLogger(__name__)
 
@@ -38,7 +38,10 @@ class _RedirectOutput:
         sys.stderr = self._original_stderr
 
 
-def _none_if_empty(obj: Any) -> Any:
+T = TypeVar("T", bound=Sized)
+
+
+def _none_if_empty(obj: T) -> Optional[T]:
     if len(obj) == 0:
         return None
     else:
diff --git a/miplearn/solvers/gurobi.py b/miplearn/solvers/gurobi.py
index edfa182..751f66c 100644
--- a/miplearn/solvers/gurobi.py
+++ b/miplearn/solvers/gurobi.py
@@ -6,11 +6,12 @@ import re
 import sys
 from io import StringIO
 from random import randint
-from typing import List, Any, Dict, Optional, Hashable, Tuple, TYPE_CHECKING
+from typing import List, Any, Dict, Optional, TYPE_CHECKING
 
+import numpy as np
 from overrides import overrides
+from scipy.sparse import coo_matrix, lil_matrix
 
-from miplearn.features import VariableFeatures, ConstraintFeatures
 from miplearn.instance.base import Instance
 from miplearn.solvers import _RedirectOutput
 from miplearn.solvers.internal import (
@@ -19,6 +20,8 @@ from miplearn.solvers.internal import (
     IterationCallback,
     LazyCallback,
     MIPSolveStats,
+    Variables,
+    Constraints,
 )
 from miplearn.solvers.pyomo.base import PyomoTestInstanceKnapsack
 from miplearn.types import (
@@ -71,16 +74,16 @@ class GurobiSolver(InternalSolver):
         self._has_lp_solution = False
         self._has_mip_solution = False
 
-        self._varname_to_var: Dict[str, "gurobipy.Var"] = {}
+        self._varname_to_var: Dict[bytes, "gurobipy.Var"] = {}
         self._cname_to_constr: Dict[str, "gurobipy.Constr"] = {}
         self._gp_vars: List["gurobipy.Var"] = []
         self._gp_constrs: List["gurobipy.Constr"] = []
-        self._var_names: List[str] = []
+        self._var_names: np.ndarray = np.empty(0)
         self._constr_names: List[str] = []
-        self._var_types: List[str] = []
-        self._var_lbs: List[float] = []
-        self._var_ubs: List[float] = []
-        self._var_obj_coeffs: List[float] = []
+        self._var_types: np.ndarray = np.empty(0)
+        self._var_lbs: np.ndarray = np.empty(0)
+        self._var_ubs: np.ndarray = np.empty(0)
+        self._var_obj_coeffs: np.ndarray = np.empty(0)
 
         if self.lazy_cb_frequency == 1:
             self.lazy_cb_where = [self.gp.GRB.Callback.MIPSOL]
@@ -91,23 +94,27 @@ class GurobiSolver(InternalSolver):
             ]
 
     @overrides
-    def add_constraints(self, cf: ConstraintFeatures) -> None:
+    def add_constraints(self, cf: Constraints) -> None:
         assert cf.names is not None
         assert cf.senses is not None
         assert cf.lhs is not None
         assert cf.rhs is not None
         assert self.model is not None
+        lhs = cf.lhs.tocsr()
         for i in range(len(cf.names)):
             sense = cf.senses[i]
-            lhs = self.gp.quicksum(
-                self._varname_to_var[varname] * coeff for (varname, coeff) in cf.lhs[i]
+            row = lhs[i, :]
+            row_expr = self.gp.quicksum(
+                self._gp_vars[row.indices[j]] * row.data[j] for j in range(row.getnnz())
             )
-            if sense == "=":
-                self.model.addConstr(lhs == cf.rhs[i], name=cf.names[i])
-            elif sense == "<":
-                self.model.addConstr(lhs <= cf.rhs[i], name=cf.names[i])
+            if sense == b"=":
+                self.model.addConstr(row_expr == cf.rhs[i], name=cf.names[i])
+            elif sense == b"<":
+                self.model.addConstr(row_expr <= cf.rhs[i], name=cf.names[i])
+            elif sense == b">":
+                self.model.addConstr(row_expr >= cf.rhs[i], name=cf.names[i])
             else:
-                self.model.addConstr(lhs >= cf.rhs[i], name=cf.names[i])
+                raise Exception(f"Unknown sense: {sense}")
         self.model.update()
         self._dirty = True
         self._has_lp_solution = False
@@ -120,7 +127,7 @@ class GurobiSolver(InternalSolver):
     @overrides
     def are_constraints_satisfied(
         self,
-        cf: ConstraintFeatures,
+        cf: Constraints,
         tol: float = 1e-5,
     ) -> List[bool]:
         assert cf.names is not None
@@ -129,18 +136,18 @@ class GurobiSolver(InternalSolver):
         assert cf.rhs is not None
         assert self.model is not None
         result = []
+        x = np.array(self.model.getAttr("x", self.model.getVars()))
+        lhs = cf.lhs.tocsr() * x
         for i in range(len(cf.names)):
             sense = cf.senses[i]
-            lhs = sum(
-                self._varname_to_var[varname].x * coeff
-                for (varname, coeff) in cf.lhs[i]
-            )
-            if sense == "<":
-                result.append(lhs <= cf.rhs[i] + tol)
-            elif sense == ">":
-                result.append(lhs >= cf.rhs[i] - tol)
+            if sense == b"<":
+                result.append(lhs[i] <= cf.rhs[i] + tol)
+            elif sense == b">":
+                result.append(lhs[i] >= cf.rhs[i] - tol)
+            elif sense == b"<":
+                result.append(abs(cf.rhs[i] - lhs[i]) <= tol)
             else:
-                result.append(abs(cf.rhs[i] - lhs) <= tol)
+                raise Exception(f"unknown sense: {sense}")
         return result
 
     @overrides
@@ -196,7 +203,7 @@ class GurobiSolver(InternalSolver):
         with_static: bool = True,
         with_sa: bool = True,
         with_lhs: bool = True,
-    ) -> ConstraintFeatures:
+    ) -> Constraints:
         model = self.model
         assert model is not None
         assert model.numVars == len(self._gp_vars)
@@ -209,39 +216,40 @@ class GurobiSolver(InternalSolver):
             raise Exception(f"unknown cbasis: {v}")
 
         gp_constrs = model.getConstrs()
-        constr_names = model.getAttr("constrName", gp_constrs)
-        lhs: Optional[List] = None
+        constr_names = np.array(model.getAttr("constrName", gp_constrs), dtype="S")
+        lhs: Optional[coo_matrix] = None
         rhs, senses, slacks, basis_status = None, None, None, None
         dual_value, basis_status, sa_rhs_up, sa_rhs_down = None, None, None, None
 
         if with_static:
-            rhs = model.getAttr("rhs", gp_constrs)
-            senses = model.getAttr("sense", gp_constrs)
+            rhs = np.array(model.getAttr("rhs", gp_constrs), dtype=float)
+            senses = np.array(model.getAttr("sense", gp_constrs), dtype="S")
             if with_lhs:
-                lhs = [None for _ in gp_constrs]
+                nrows = len(gp_constrs)
+                ncols = len(self._var_names)
+                tmp = lil_matrix((nrows, ncols), dtype=float)
                 for (i, gp_constr) in enumerate(gp_constrs):
                     expr = model.getRow(gp_constr)
-                    lhs[i] = [
-                        (self._var_names[expr.getVar(j).index], expr.getCoeff(j))
-                        for j in range(expr.size())
-                    ]
+                    for j in range(expr.size()):
+                        tmp[i, expr.getVar(j).index] = expr.getCoeff(j)
+                lhs = tmp.tocoo()
 
         if self._has_lp_solution:
-            dual_value = model.getAttr("pi", gp_constrs)
-            basis_status = list(
-                map(
-                    _parse_gurobi_cbasis,
-                    model.getAttr("cbasis", gp_constrs),
-                )
+            dual_value = np.array(model.getAttr("pi", gp_constrs), dtype=float)
+            basis_status = np.array(
+                [_parse_gurobi_cbasis(c) for c in model.getAttr("cbasis", gp_constrs)],
+                dtype="S",
             )
             if with_sa:
-                sa_rhs_up = model.getAttr("saRhsUp", gp_constrs)
-                sa_rhs_down = model.getAttr("saRhsLow", gp_constrs)
+                sa_rhs_up = np.array(model.getAttr("saRhsUp", gp_constrs), dtype=float)
+                sa_rhs_down = np.array(
+                    model.getAttr("saRhsLow", gp_constrs), dtype=float
+                )
 
         if self._has_lp_solution or self._has_mip_solution:
-            slacks = model.getAttr("slack", gp_constrs)
+            slacks = np.array(model.getAttr("slack", gp_constrs), dtype=float)
 
-        return ConstraintFeatures(
+        return Constraints(
             basis_status=basis_status,
             dual_values=dual_value,
             lhs=lhs,
@@ -259,11 +267,13 @@ class GurobiSolver(InternalSolver):
         if self.cb_where is not None:
             if self.cb_where == self.gp.GRB.Callback.MIPNODE:
                 return {
-                    v.varName: self.model.cbGetNodeRel(v) for v in self.model.getVars()
+                    v.varName.encode(): self.model.cbGetNodeRel(v)
+                    for v in self.model.getVars()
                 }
             elif self.cb_where == self.gp.GRB.Callback.MIPSOL:
                 return {
-                    v.varName: self.model.cbGetSolution(v) for v in self.model.getVars()
+                    v.varName.encode(): self.model.cbGetSolution(v)
+                    for v in self.model.getVars()
                 }
             else:
                 raise Exception(
@@ -272,7 +282,7 @@ class GurobiSolver(InternalSolver):
                 )
         if self.model.solCount == 0:
             return None
-        return {v.varName: v.x for v in self.model.getVars()}
+        return {v.varName.encode(): v.x for v in self.model.getVars()}
 
     @overrides
     def get_variable_attrs(self) -> List[str]:
@@ -300,7 +310,7 @@ class GurobiSolver(InternalSolver):
         self,
         with_static: bool = True,
         with_sa: bool = True,
-    ) -> VariableFeatures:
+    ) -> Variables:
         model = self.model
         assert model is not None
 
@@ -316,8 +326,9 @@ class GurobiSolver(InternalSolver):
             else:
                 raise Exception(f"unknown vbasis: {basis_status}")
 
+        basis_status: Optional[np.ndarray] = None
         upper_bounds, lower_bounds, types, values = None, None, None, None
-        obj_coeffs, reduced_costs, basis_status = None, None, None
+        obj_coeffs, reduced_costs = None, None
         sa_obj_up, sa_ub_up, sa_lb_up = None, None, None
         sa_obj_down, sa_ub_down, sa_lb_down = None, None, None
 
@@ -328,26 +339,45 @@ class GurobiSolver(InternalSolver):
             obj_coeffs = self._var_obj_coeffs
 
         if self._has_lp_solution:
-            reduced_costs = model.getAttr("rc", self._gp_vars)
-            basis_status = list(
-                map(
-                    _parse_gurobi_vbasis,
-                    model.getAttr("vbasis", self._gp_vars),
-                )
+            reduced_costs = np.array(model.getAttr("rc", self._gp_vars), dtype=float)
+            basis_status = np.array(
+                [
+                    _parse_gurobi_vbasis(b)
+                    for b in model.getAttr("vbasis", self._gp_vars)
+                ],
+                dtype="S",
             )
 
             if with_sa:
-                sa_obj_up = model.getAttr("saobjUp", self._gp_vars)
-                sa_obj_down = model.getAttr("saobjLow", self._gp_vars)
-                sa_ub_up = model.getAttr("saubUp", self._gp_vars)
-                sa_ub_down = model.getAttr("saubLow", self._gp_vars)
-                sa_lb_up = model.getAttr("salbUp", self._gp_vars)
-                sa_lb_down = model.getAttr("salbLow", self._gp_vars)
+                sa_obj_up = np.array(
+                    model.getAttr("saobjUp", self._gp_vars),
+                    dtype=float,
+                )
+                sa_obj_down = np.array(
+                    model.getAttr("saobjLow", self._gp_vars),
+                    dtype=float,
+                )
+                sa_ub_up = np.array(
+                    model.getAttr("saubUp", self._gp_vars),
+                    dtype=float,
+                )
+                sa_ub_down = np.array(
+                    model.getAttr("saubLow", self._gp_vars),
+                    dtype=float,
+                )
+                sa_lb_up = np.array(
+                    model.getAttr("salbUp", self._gp_vars),
+                    dtype=float,
+                )
+                sa_lb_down = np.array(
+                    model.getAttr("salbLow", self._gp_vars),
+                    dtype=float,
+                )
 
         if model.solCount > 0:
-            values = model.getAttr("x", self._gp_vars)
+            values = np.array(model.getAttr("x", self._gp_vars), dtype=float)
 
-        return VariableFeatures(
+        return Variables(
             names=self._var_names,
             upper_bounds=upper_bounds,
             lower_bounds=lower_bounds,
@@ -489,7 +519,7 @@ class GurobiSolver(InternalSolver):
         self._apply_params(streams)
         assert self.model is not None
         for (i, var) in enumerate(self._gp_vars):
-            if self._var_types[i] == "B":
+            if self._var_types[i] == b"B":
                 var.vtype = self.gp.GRB.CONTINUOUS
                 var.lb = 0.0
                 var.ub = 1.0
@@ -497,7 +527,7 @@ class GurobiSolver(InternalSolver):
             self.model.optimize()
             self._dirty = False
         for (i, var) in enumerate(self._gp_vars):
-            if self._var_types[i] == "B":
+            if self._var_types[i] == b"B":
                 var.vtype = self.gp.GRB.BINARY
         log = streams[0].getvalue()
         self._has_lp_solution = self.model.solCount > 0
@@ -562,32 +592,47 @@ class GurobiSolver(InternalSolver):
         assert self.model is not None
         gp_vars: List["gurobipy.Var"] = self.model.getVars()
         gp_constrs: List["gurobipy.Constr"] = self.model.getConstrs()
-        var_names: List[str] = self.model.getAttr("varName", gp_vars)
-        var_types: List[str] = self.model.getAttr("vtype", gp_vars)
-        var_ubs: List[float] = self.model.getAttr("ub", gp_vars)
-        var_lbs: List[float] = self.model.getAttr("lb", gp_vars)
-        var_obj_coeffs: List[float] = self.model.getAttr("obj", gp_vars)
+        var_names: np.ndarray = np.array(
+            self.model.getAttr("varName", gp_vars),
+            dtype="S",
+        )
+        var_types: np.ndarray = np.array(
+            self.model.getAttr("vtype", gp_vars),
+            dtype="S",
+        )
+        var_ubs: np.ndarray = np.array(
+            self.model.getAttr("ub", gp_vars),
+            dtype=float,
+        )
+        var_lbs: np.ndarray = np.array(
+            self.model.getAttr("lb", gp_vars),
+            dtype=float,
+        )
+        var_obj_coeffs: np.ndarray = np.array(
+            self.model.getAttr("obj", gp_vars),
+            dtype=float,
+        )
         constr_names: List[str] = self.model.getAttr("constrName", gp_constrs)
-        varname_to_var: Dict = {}
+        varname_to_var: Dict[bytes, "gurobipy.Var"] = {}
         cname_to_constr: Dict = {}
         for (i, gp_var) in enumerate(gp_vars):
             assert var_names[i] not in varname_to_var, (
                 f"Duplicated variable name detected: {var_names[i]}. "
                 f"Unique variable names are currently required."
             )
-            if var_types[i] == "I":
+            if var_types[i] == b"I":
                 assert var_ubs[i] == 1.0, (
                     "Only binary and continuous variables are currently supported. "
-                    "Integer variable {var.varName} has upper bound {var.ub}."
+                    f"Integer variable {var_names[i]} has upper bound {var_ubs[i]}."
                 )
                 assert var_lbs[i] == 0.0, (
                     "Only binary and continuous variables are currently supported. "
-                    "Integer variable {var.varName} has lower bound {var.ub}."
+                    f"Integer variable {var_names[i]} has lower bound {var_ubs[i]}."
                 )
-                var_types[i] = "B"
-            assert var_types[i] in ["B", "C"], (
+                var_types[i] = b"B"
+            assert var_types[i] in [b"B", b"C"], (
                 "Only binary and continuous variables are currently supported. "
-                "Variable {var.varName} has type {vtype}."
+                f"Variable {var_names[i]} has type {var_types[i]}."
             )
             varname_to_var[var_names[i]] = gp_var
         for (i, gp_constr) in enumerate(gp_constrs):
@@ -671,7 +716,7 @@ class GurobiTestInstanceKnapsack(PyomoTestInstanceKnapsack):
         self,
         solver: InternalSolver,
         model: Any,
-        violation: Hashable,
+        violation: str,
     ) -> None:
         x0 = model.getVarByName("x[0]")
         model.cbLazy(x0 <= 0)
diff --git a/miplearn/solvers/internal.py b/miplearn/solvers/internal.py
index f689e44..9688cb0 100644
--- a/miplearn/solvers/internal.py
+++ b/miplearn/solvers/internal.py
@@ -5,9 +5,11 @@
 import logging
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
-from typing import Any, List, Optional, List
+from typing import Any, Optional, List, TYPE_CHECKING
+
+import numpy as np
+from scipy.sparse import coo_matrix
 
-from miplearn.features import VariableFeatures, ConstraintFeatures
 from miplearn.instance.base import Instance
 from miplearn.types import (
     IterationCallback,
@@ -18,6 +20,9 @@ from miplearn.types import (
 
 logger = logging.getLogger(__name__)
 
+if TYPE_CHECKING:
+    from miplearn.features.sample import Sample
+
 
 @dataclass
 class LPSolveStats:
@@ -44,20 +49,87 @@ class MIPSolveStats:
     mip_warm_start_value: Optional[float] = None
 
 
+@dataclass
+class Variables:
+    names: Optional[np.ndarray] = None
+    basis_status: Optional[np.ndarray] = None
+    lower_bounds: Optional[np.ndarray] = None
+    obj_coeffs: Optional[np.ndarray] = None
+    reduced_costs: Optional[np.ndarray] = None
+    sa_lb_down: Optional[np.ndarray] = None
+    sa_lb_up: Optional[np.ndarray] = None
+    sa_obj_down: Optional[np.ndarray] = None
+    sa_obj_up: Optional[np.ndarray] = None
+    sa_ub_down: Optional[np.ndarray] = None
+    sa_ub_up: Optional[np.ndarray] = None
+    types: Optional[np.ndarray] = None
+    upper_bounds: Optional[np.ndarray] = None
+    values: Optional[np.ndarray] = None
+
+
+@dataclass
+class Constraints:
+    basis_status: Optional[np.ndarray] = None
+    dual_values: Optional[np.ndarray] = None
+    lazy: Optional[np.ndarray] = None
+    lhs: Optional[coo_matrix] = None
+    names: Optional[np.ndarray] = None
+    rhs: Optional[np.ndarray] = None
+    sa_rhs_down: Optional[np.ndarray] = None
+    sa_rhs_up: Optional[np.ndarray] = None
+    senses: Optional[np.ndarray] = None
+    slacks: Optional[np.ndarray] = None
+
+    @staticmethod
+    def from_sample(sample: "Sample") -> "Constraints":
+        return Constraints(
+            basis_status=sample.get_array("lp_constr_basis_status"),
+            dual_values=sample.get_array("lp_constr_dual_values"),
+            lazy=sample.get_array("static_constr_lazy"),
+            # lhs=sample.get_vector("static_constr_lhs"),
+            names=sample.get_array("static_constr_names"),
+            rhs=sample.get_array("static_constr_rhs"),
+            sa_rhs_down=sample.get_array("lp_constr_sa_rhs_down"),
+            sa_rhs_up=sample.get_array("lp_constr_sa_rhs_up"),
+            senses=sample.get_array("static_constr_senses"),
+            slacks=sample.get_array("lp_constr_slacks"),
+        )
+
+    def __getitem__(self, selected: List[bool]) -> "Constraints":
+        return Constraints(
+            basis_status=(
+                None if self.basis_status is None else self.basis_status[selected]
+            ),
+            dual_values=(
+                None if self.dual_values is None else self.dual_values[selected]
+            ),
+            names=(None if self.names is None else self.names[selected]),
+            lazy=(None if self.lazy is None else self.lazy[selected]),
+            lhs=(None if self.lhs is None else self.lhs.tocsr()[selected].tocoo()),
+            rhs=(None if self.rhs is None else self.rhs[selected]),
+            sa_rhs_down=(
+                None if self.sa_rhs_down is None else self.sa_rhs_down[selected]
+            ),
+            sa_rhs_up=(None if self.sa_rhs_up is None else self.sa_rhs_up[selected]),
+            senses=(None if self.senses is None else self.senses[selected]),
+            slacks=(None if self.slacks is None else self.slacks[selected]),
+        )
+
+
 class InternalSolver(ABC):
     """
     Abstract class representing the MIP solver used internally by LearningSolver.
     """
 
     @abstractmethod
-    def add_constraints(self, cf: ConstraintFeatures) -> None:
+    def add_constraints(self, cf: Constraints) -> None:
         """Adds the given constraints to the model."""
         pass
 
     @abstractmethod
     def are_constraints_satisfied(
         self,
-        cf: ConstraintFeatures,
+        cf: Constraints,
         tol: float = 1e-5,
     ) -> List[bool]:
         """
@@ -133,7 +205,7 @@ class InternalSolver(ABC):
         with_static: bool = True,
         with_sa: bool = True,
         with_lhs: bool = True,
-    ) -> ConstraintFeatures:
+    ) -> Constraints:
         pass
 
     @abstractmethod
@@ -149,7 +221,7 @@ class InternalSolver(ABC):
         self,
         with_static: bool = True,
         with_sa: bool = True,
-    ) -> VariableFeatures:
+    ) -> Variables:
         """
         Returns a description of the decision variables in the problem.
 
@@ -176,7 +248,7 @@ class InternalSolver(ABC):
         pass
 
     @abstractmethod
-    def remove_constraints(self, names: List[str]) -> None:
+    def remove_constraints(self, names: np.ndarray) -> None:
         """
         Removes the given constraints from the model.
         """
diff --git a/miplearn/solvers/learning.py b/miplearn/solvers/learning.py
index e7e07ee..5e54bee 100644
--- a/miplearn/solvers/learning.py
+++ b/miplearn/solvers/learning.py
@@ -3,8 +3,8 @@
 #  Released under the modified BSD license. See COPYING.md for more details.
 
 import logging
-import traceback
 import time
+import traceback
 from typing import Optional, List, Any, cast, Dict, Tuple
 
 from p_tqdm import p_map
@@ -14,7 +14,7 @@ from miplearn.components.dynamic_lazy import DynamicLazyConstraintsComponent
 from miplearn.components.dynamic_user_cuts import UserCutsComponent
 from miplearn.components.objective import ObjectiveValueComponent
 from miplearn.components.primal import PrimalSolutionComponent
-from miplearn.features import FeaturesExtractor, Sample
+from miplearn.features.extractor import FeaturesExtractor
 from miplearn.instance.base import Instance
 from miplearn.instance.picklegz import PickleGzInstance
 from miplearn.solvers import _RedirectOutput
@@ -149,8 +149,7 @@ class LearningSolver:
 
         # Initialize training sample
         # -------------------------------------------------------
-        sample = Sample()
-        instance.push_sample(sample)
+        sample = instance.create_sample()
 
         # Initialize stats
         # -------------------------------------------------------
@@ -168,13 +167,13 @@ class LearningSolver:
         # -------------------------------------------------------
         logger.info("Extracting features (after-load)...")
         initial_time = time.time()
-        features = self.extractor.extract(instance, self.internal_solver)
+        self.extractor.extract_after_load_features(
+            instance, self.internal_solver, sample
+        )
         logger.info(
             "Features (after-load) extracted in %.2f seconds"
             % (time.time() - initial_time)
         )
-        features.extra = {}
-        sample.after_load = features
 
         callback_args = (
             self,
@@ -208,18 +207,13 @@ class LearningSolver:
             # -------------------------------------------------------
             logger.info("Extracting features (after-lp)...")
             initial_time = time.time()
-            features = self.extractor.extract(
-                instance,
-                self.internal_solver,
-                with_static=False,
+            self.extractor.extract_after_lp_features(
+                self.internal_solver, sample, lp_stats
             )
             logger.info(
                 "Features (after-lp) extracted in %.2f seconds"
                 % (time.time() - initial_time)
             )
-            features.extra = {}
-            features.lp_solve = lp_stats
-            sample.after_lp = features
 
         # Callback wrappers
         # -------------------------------------------------------
@@ -281,18 +275,13 @@ class LearningSolver:
         # -------------------------------------------------------
         logger.info("Extracting features (after-mip)...")
         initial_time = time.time()
-        features = self.extractor.extract(
-            instance,
-            self.internal_solver,
-            with_static=False,
-        )
+        for (k, v) in mip_stats.__dict__.items():
+            sample.put_scalar(k, v)
+        self.extractor.extract_after_mip_features(self.internal_solver, sample)
         logger.info(
             "Features (after-mip) extracted in %.2f seconds"
             % (time.time() - initial_time)
         )
-        features.mip_solve = mip_stats
-        features.extra = {}
-        sample.after_mip = features
 
         # After-solve callbacks
         # -------------------------------------------------------
diff --git a/miplearn/solvers/pyomo/base.py b/miplearn/solvers/pyomo/base.py
index 5889976..5292eb0 100644
--- a/miplearn/solvers/pyomo/base.py
+++ b/miplearn/solvers/pyomo/base.py
@@ -6,7 +6,7 @@ import logging
 import re
 import sys
 from io import StringIO
-from typing import Any, List, Dict, Optional, Tuple, Hashable
+from typing import Any, List, Dict, Optional
 
 import numpy as np
 import pyomo
@@ -18,8 +18,8 @@ from pyomo.core.base.constraint import ConstraintList
 from pyomo.core.expr.numeric_expr import SumExpression, MonomialTermExpression
 from pyomo.opt import TerminationCondition
 from pyomo.opt.base.solvers import SolverFactory
+from scipy.sparse import coo_matrix
 
-from miplearn.features import VariableFeatures, ConstraintFeatures
 from miplearn.instance.base import Instance
 from miplearn.solvers import _RedirectOutput, _none_if_empty
 from miplearn.solvers.internal import (
@@ -28,13 +28,13 @@ from miplearn.solvers.internal import (
     IterationCallback,
     LazyCallback,
     MIPSolveStats,
+    Variables,
+    Constraints,
 )
 from miplearn.types import (
     SolverParams,
     UserCutCallback,
     Solution,
-    VariableName,
-    Category,
 )
 
 logger = logging.getLogger(__name__)
@@ -58,7 +58,8 @@ class BasePyomoSolver(InternalSolver):
         self._is_warm_start_available: bool = False
         self._pyomo_solver: SolverFactory = solver_factory
         self._obj_sense: str = "min"
-        self._varname_to_var: Dict[str, pe.Var] = {}
+        self._varname_to_var: Dict[bytes, pe.Var] = {}
+        self._varname_to_idx: Dict[str, int] = {}
         self._cname_to_constr: Dict[str, pe.Constraint] = {}
         self._termination_condition: str = ""
         self._has_lp_solution = False
@@ -79,27 +80,30 @@ class BasePyomoSolver(InternalSolver):
         self._has_mip_solution = False
 
     @overrides
-    def add_constraints(self, cf: ConstraintFeatures) -> None:
+    def add_constraints(self, cf: Constraints) -> None:
         assert cf.names is not None
         assert cf.senses is not None
         assert cf.lhs is not None
         assert cf.rhs is not None
         assert self.model is not None
-        for (i, name) in enumerate(cf.names):
-            lhs = 0.0
-            for (varname, coeff) in cf.lhs[i]:
-                var = self._varname_to_var[varname]
-                lhs += var * coeff
-            if cf.senses[i] == "=":
-                expr = lhs == cf.rhs[i]
-            elif cf.senses[i] == "<":
-                expr = lhs <= cf.rhs[i]
+        lhs = cf.lhs.tocsr()
+        for i in range(len(cf.names)):
+            row = lhs[i, :]
+            lhsi = 0.0
+            for j in range(row.getnnz()):
+                lhsi += self._all_vars[row.indices[j]] * row.data[j]
+            if cf.senses[i] == b"=":
+                expr = lhsi == cf.rhs[i]
+            elif cf.senses[i] == b"<":
+                expr = lhsi <= cf.rhs[i]
+            elif cf.senses[i] == b">":
+                expr = lhsi >= cf.rhs[i]
             else:
-                expr = lhs >= cf.rhs[i]
-            cl = pe.Constraint(expr=expr, name=name)
-            self.model.add_component(name, cl)
+                raise Exception(f"Unknown sense: {cf.senses[i]}")
+            cl = pe.Constraint(expr=expr, name=cf.names[i])
+            self.model.add_component(cf.names[i].decode(), cl)
             self._pyomo_solver.add_constraint(cl)
-            self._cname_to_constr[name] = cl
+            self._cname_to_constr[cf.names[i]] = cl
         self._termination_condition = ""
         self._has_lp_solution = False
         self._has_mip_solution = False
@@ -111,25 +115,25 @@ class BasePyomoSolver(InternalSolver):
     @overrides
     def are_constraints_satisfied(
         self,
-        cf: ConstraintFeatures,
+        cf: Constraints,
         tol: float = 1e-5,
     ) -> List[bool]:
         assert cf.names is not None
         assert cf.lhs is not None
         assert cf.rhs is not None
         assert cf.senses is not None
+        x = [v.value for v in self._all_vars]
+        lhs = cf.lhs.tocsr() * x
         result = []
-        for (i, name) in enumerate(cf.names):
-            lhs = 0.0
-            for (varname, coeff) in cf.lhs[i]:
-                var = self._varname_to_var[varname]
-                lhs += var.value * coeff
-            if cf.senses[i] == "<":
-                result.append(lhs <= cf.rhs[i] + tol)
-            elif cf.senses[i] == ">":
-                result.append(lhs >= cf.rhs[i] - tol)
+        for i in range(len(lhs)):
+            if cf.senses[i] == b"<":
+                result.append(lhs[i] <= cf.rhs[i] + tol)
+            elif cf.senses[i] == b">":
+                result.append(lhs[i] >= cf.rhs[i] - tol)
+            elif cf.senses[i] == b"=":
+                result.append(abs(cf.rhs[i] - lhs[i]) < tol)
             else:
-                result.append(abs(cf.rhs[i] - lhs) < tol)
+                raise Exception(f"unknown sense: {cf.senses[i]}")
         return result
 
     @overrides
@@ -159,18 +163,20 @@ class BasePyomoSolver(InternalSolver):
         with_static: bool = True,
         with_sa: bool = True,
         with_lhs: bool = True,
-    ) -> ConstraintFeatures:
+    ) -> Constraints:
         model = self.model
         assert model is not None
-
         names: List[str] = []
         rhs: List[float] = []
-        lhs: List[List[Tuple[str, float]]] = []
         senses: List[str] = []
         dual_values: List[float] = []
         slacks: List[float] = []
+        lhs_row: List[int] = []
+        lhs_col: List[int] = []
+        lhs_data: List[float] = []
+        lhs: Optional[coo_matrix] = None
 
-        def _parse_constraint(c: pe.Constraint) -> None:
+        def _parse_constraint(c: pe.Constraint, row: int) -> None:
             assert model is not None
             if with_static:
                 # Extract RHS and sense
@@ -191,30 +197,31 @@ class BasePyomoSolver(InternalSolver):
 
                 if with_lhs:
                     # Extract LHS
-                    lhsc = []
                     expr = c.body
                     if isinstance(expr, SumExpression):
                         for term in expr._args_:
                             if isinstance(term, MonomialTermExpression):
-                                lhsc.append(
-                                    (
-                                        term._args_[1].name,
-                                        float(term._args_[0]),
-                                    )
+                                lhs_row.append(row)
+                                lhs_col.append(
+                                    self._varname_to_idx[term._args_[1].name]
                                 )
+                                lhs_data.append(float(term._args_[0]))
                             elif isinstance(term, _GeneralVarData):
-                                lhsc.append((term.name, 1.0))
+                                lhs_row.append(row)
+                                lhs_col.append(self._varname_to_idx[term.name])
+                                lhs_data.append(1.0)
                             else:
                                 raise Exception(
                                     f"Unknown term type: {term.__class__.__name__}"
                                 )
                     elif isinstance(expr, _GeneralVarData):
-                        lhsc.append((expr.name, 1.0))
+                        lhs_row.append(row)
+                        lhs_col.append(self._varname_to_idx[expr.name])
+                        lhs_data.append(1.0)
                     else:
                         raise Exception(
                             f"Unknown expression type: {expr.__class__.__name__}"
                         )
-                    lhs.append(lhsc)
 
             # Extract dual values
             if self._has_lp_solution:
@@ -224,22 +231,28 @@ class BasePyomoSolver(InternalSolver):
             if self._has_mip_solution or self._has_lp_solution:
                 slacks.append(model.slack[c])
 
-        for constr in model.component_objects(pyomo.core.Constraint):
+        curr_row = 0
+        for (i, constr) in enumerate(model.component_objects(pyomo.core.Constraint)):
             if isinstance(constr, pe.ConstraintList):
                 for idx in constr:
-                    names.append(f"{constr.name}[{idx}]")
-                    _parse_constraint(constr[idx])
+                    names.append(constr[idx].name)
+                    _parse_constraint(constr[idx], curr_row)
+                    curr_row += 1
             else:
                 names.append(constr.name)
-                _parse_constraint(constr)
-
-        return ConstraintFeatures(
-            names=_none_if_empty(names),
-            rhs=_none_if_empty(rhs),
-            senses=_none_if_empty(senses),
-            lhs=_none_if_empty(lhs),
-            slacks=_none_if_empty(slacks),
-            dual_values=_none_if_empty(dual_values),
+                _parse_constraint(constr, curr_row)
+                curr_row += 1
+
+        if len(lhs_data) > 0:
+            lhs = coo_matrix((lhs_data, (lhs_row, lhs_col))).tocoo()
+
+        return Constraints(
+            names=_none_if_empty(np.array(names, dtype="S")),
+            rhs=_none_if_empty(np.array(rhs, dtype=float)),
+            senses=_none_if_empty(np.array(senses, dtype="S")),
+            lhs=lhs,
+            slacks=_none_if_empty(np.array(slacks, dtype=float)),
+            dual_values=_none_if_empty(np.array(dual_values, dtype=float)),
         )
 
     @overrides
@@ -263,7 +276,7 @@ class BasePyomoSolver(InternalSolver):
             for index in var:
                 if var[index].fixed:
                     continue
-                solution[f"{var}[{index}]"] = var[index].value
+                solution[var[index].name.encode()] = var[index].value
         return solution
 
     @overrides
@@ -271,7 +284,7 @@ class BasePyomoSolver(InternalSolver):
         self,
         with_static: bool = True,
         with_sa: bool = True,
-    ) -> VariableFeatures:
+    ) -> Variables:
         assert self.model is not None
 
         names: List[str] = []
@@ -288,9 +301,9 @@ class BasePyomoSolver(InternalSolver):
 
                 # Variable name
                 if idx is None:
-                    names.append(str(var))
+                    names.append(var.name)
                 else:
-                    names.append(f"{var}[{idx}]")
+                    names.append(var[idx].name)
 
                 if with_static:
                     # Variable type
@@ -326,14 +339,14 @@ class BasePyomoSolver(InternalSolver):
                 if self._has_lp_solution or self._has_mip_solution:
                     values.append(v.value)
 
-        return VariableFeatures(
-            names=_none_if_empty(names),
-            types=_none_if_empty(types),
-            upper_bounds=_none_if_empty(upper_bounds),
-            lower_bounds=_none_if_empty(lower_bounds),
-            obj_coeffs=_none_if_empty(obj_coeffs),
-            reduced_costs=_none_if_empty(reduced_costs),
-            values=_none_if_empty(values),
+        return Variables(
+            names=_none_if_empty(np.array(names, dtype="S")),
+            types=_none_if_empty(np.array(types, dtype="S")),
+            upper_bounds=_none_if_empty(np.array(upper_bounds, dtype=float)),
+            lower_bounds=_none_if_empty(np.array(lower_bounds, dtype=float)),
+            obj_coeffs=_none_if_empty(np.array(obj_coeffs, dtype=float)),
+            reduced_costs=_none_if_empty(np.array(reduced_costs, dtype=float)),
+            values=_none_if_empty(np.array(values, dtype=float)),
         )
 
     @overrides
@@ -555,12 +568,14 @@ class BasePyomoSolver(InternalSolver):
         self._all_vars = []
         self._bin_vars = []
         self._varname_to_var = {}
+        self._varname_to_idx = {}
         for var in self.model.component_objects(Var):
             for idx in var:
-                varname = f"{var.name}[{idx}]"
-                if idx is None:
-                    varname = var.name
-                self._varname_to_var[varname] = var[idx]
+                varname = var.name
+                if idx is not None:
+                    varname = var[idx].name
+                self._varname_to_var[varname.encode()] = var[idx]
+                self._varname_to_idx[varname] = len(self._all_vars)
                 self._all_vars += [var[idx]]
                 if var[idx].domain == pyomo.core.base.set_types.Binary:
                     self._bin_vars += [var[idx]]
@@ -574,7 +589,7 @@ class BasePyomoSolver(InternalSolver):
         for constr in self.model.component_objects(pyomo.core.Constraint):
             if isinstance(constr, pe.ConstraintList):
                 for idx in constr:
-                    self._cname_to_constr[f"{constr.name}[{idx}]"] = constr[idx]
+                    self._cname_to_constr[constr[idx].name] = constr[idx]
             else:
                 self._cname_to_constr[constr.name] = constr
 
@@ -604,6 +619,7 @@ class PyomoTestInstanceKnapsack(Instance):
         self.weights = weights
         self.prices = prices
         self.capacity = capacity
+        self.n = len(weights)
 
     @overrides
     def to_model(self) -> pe.ConcreteModel:
@@ -621,22 +637,26 @@ class PyomoTestInstanceKnapsack(Instance):
         return model
 
     @overrides
-    def get_instance_features(self) -> List[float]:
-        return [
-            self.capacity,
-            np.average(self.weights),
-        ]
+    def get_instance_features(self) -> np.ndarray:
+        return np.array(
+            [
+                self.capacity,
+                np.average(self.weights),
+            ]
+        )
 
     @overrides
-    def get_variable_features(self) -> Dict[str, List[float]]:
-        return {
-            f"x[{i}]": [
-                self.weights[i],
-                self.prices[i],
+    def get_variable_features(self, names: np.ndarray) -> np.ndarray:
+        return np.vstack(
+            [
+                [[self.weights[i], self.prices[i]] for i in range(self.n)],
+                [0.0, 0.0],
             ]
-            for i in range(len(self.weights))
-        }
+        )
 
     @overrides
-    def get_variable_categories(self) -> Dict[str, Hashable]:
-        return {f"x[{i}]": "default" for i in range(len(self.weights))}
+    def get_variable_categories(self, names: np.ndarray) -> np.ndarray:
+        return np.array(
+            ["default" if n.decode().startswith("x") else "" for n in names],
+            dtype="S",
+        )
diff --git a/miplearn/solvers/tests/__init__.py b/miplearn/solvers/tests/__init__.py
index 75c79b4..3bc74d3 100644
--- a/miplearn/solvers/tests/__init__.py
+++ b/miplearn/solvers/tests/__init__.py
@@ -5,12 +5,13 @@
 from typing import Any, List
 
 import numpy as np
+from scipy.sparse import coo_matrix
 
-from miplearn.features import VariableFeatures, ConstraintFeatures
-from miplearn.solvers.internal import InternalSolver
+from miplearn.solvers.internal import InternalSolver, Variables, Constraints
 
 inf = float("inf")
 
+
 # NOTE:
 # This file is in the main source folder, so that it can be called from Julia.
 
@@ -40,31 +41,23 @@ def run_basic_usage_tests(solver: InternalSolver) -> None:
     # Fetch variables (after-load)
     assert_equals(
         solver.get_variables(),
-        VariableFeatures(
-            names=["x[0]", "x[1]", "x[2]", "x[3]", "z"],
-            lower_bounds=[0.0, 0.0, 0.0, 0.0, 0.0],
-            upper_bounds=[1.0, 1.0, 1.0, 1.0, 67.0],
-            types=["B", "B", "B", "B", "C"],
-            obj_coeffs=[505.0, 352.0, 458.0, 220.0, 0.0],
+        Variables(
+            names=np.array(["x[0]", "x[1]", "x[2]", "x[3]", "z"], dtype="S"),
+            lower_bounds=np.array([0.0, 0.0, 0.0, 0.0, 0.0]),
+            upper_bounds=np.array([1.0, 1.0, 1.0, 1.0, 67.0]),
+            types=np.array(["B", "B", "B", "B", "C"], dtype="S"),
+            obj_coeffs=np.array([505.0, 352.0, 458.0, 220.0, 0.0]),
         ),
     )
 
     # Fetch constraints (after-load)
     assert_equals(
         solver.get_constraints(),
-        ConstraintFeatures(
-            names=["eq_capacity"],
-            rhs=[0.0],
-            lhs=[
-                [
-                    ("x[0]", 23.0),
-                    ("x[1]", 26.0),
-                    ("x[2]", 20.0),
-                    ("x[3]", 18.0),
-                    ("z", -1.0),
-                ],
-            ],
-            senses=["="],
+        Constraints(
+            names=np.array(["eq_capacity"], dtype="S"),
+            rhs=np.array([0.0]),
+            lhs=coo_matrix([[23.0, 26.0, 20.0, 18.0, -1.0]]),
+            senses=np.array(["="], dtype="S"),
         ),
     )
 
@@ -83,17 +76,21 @@ def run_basic_usage_tests(solver: InternalSolver) -> None:
         solver.get_variables(with_static=False),
         _filter_attrs(
             solver.get_variable_attrs(),
-            VariableFeatures(
-                names=["x[0]", "x[1]", "x[2]", "x[3]", "z"],
-                basis_status=["U", "B", "U", "L", "U"],
-                reduced_costs=[193.615385, 0.0, 187.230769, -23.692308, 13.538462],
-                sa_lb_down=[-inf, -inf, -inf, -0.111111, -inf],
-                sa_lb_up=[1.0, 0.923077, 1.0, 1.0, 67.0],
-                sa_obj_down=[311.384615, 317.777778, 270.769231, -inf, -13.538462],
-                sa_obj_up=[inf, 570.869565, inf, 243.692308, inf],
-                sa_ub_down=[0.913043, 0.923077, 0.9, 0.0, 43.0],
-                sa_ub_up=[2.043478, inf, 2.2, inf, 69.0],
-                values=[1.0, 0.923077, 1.0, 0.0, 67.0],
+            Variables(
+                names=np.array(["x[0]", "x[1]", "x[2]", "x[3]", "z"], dtype="S"),
+                basis_status=np.array(["U", "B", "U", "L", "U"], dtype="S"),
+                reduced_costs=np.array(
+                    [193.615385, 0.0, 187.230769, -23.692308, 13.538462]
+                ),
+                sa_lb_down=np.array([-inf, -inf, -inf, -0.111111, -inf]),
+                sa_lb_up=np.array([1.0, 0.923077, 1.0, 1.0, 67.0]),
+                sa_obj_down=np.array(
+                    [311.384615, 317.777778, 270.769231, -inf, -13.538462]
+                ),
+                sa_obj_up=np.array([inf, 570.869565, inf, 243.692308, inf]),
+                sa_ub_down=np.array([0.913043, 0.923077, 0.9, 0.0, 43.0]),
+                sa_ub_up=np.array([2.043478, inf, 2.2, inf, 69.0]),
+                values=np.array([1.0, 0.923077, 1.0, 0.0, 67.0]),
             ),
         ),
     )
@@ -103,13 +100,13 @@ def run_basic_usage_tests(solver: InternalSolver) -> None:
         solver.get_constraints(with_static=False),
         _filter_attrs(
             solver.get_constraint_attrs(),
-            ConstraintFeatures(
-                basis_status=["N"],
-                dual_values=[13.538462],
-                names=["eq_capacity"],
-                sa_rhs_down=[-24.0],
-                sa_rhs_up=[2.0],
-                slacks=[0.0],
+            Constraints(
+                basis_status=np.array(["N"], dtype="S"),
+                dual_values=np.array([13.538462]),
+                names=np.array(["eq_capacity"], dtype="S"),
+                sa_rhs_down=np.array([-24.0]),
+                sa_rhs_up=np.array([2.0]),
+                slacks=np.array([0.0]),
             ),
         ),
     )
@@ -136,9 +133,9 @@ def run_basic_usage_tests(solver: InternalSolver) -> None:
         solver.get_variables(with_static=False),
         _filter_attrs(
             solver.get_variable_attrs(),
-            VariableFeatures(
-                names=["x[0]", "x[1]", "x[2]", "x[3]", "z"],
-                values=[1.0, 0.0, 1.0, 1.0, 61.0],
+            Variables(
+                names=np.array(["x[0]", "x[1]", "x[2]", "x[3]", "z"], dtype="S"),
+                values=np.array([1.0, 0.0, 1.0, 1.0, 61.0]),
             ),
         ),
     )
@@ -148,19 +145,19 @@ def run_basic_usage_tests(solver: InternalSolver) -> None:
         solver.get_constraints(with_static=False),
         _filter_attrs(
             solver.get_constraint_attrs(),
-            ConstraintFeatures(
-                names=["eq_capacity"],
-                slacks=[0.0],
+            Constraints(
+                names=np.array(["eq_capacity"], dtype="S"),
+                slacks=np.array([0.0]),
             ),
         ),
     )
 
     # Build new constraint and verify that it is violated
-    cf = ConstraintFeatures(
-        names=["cut"],
-        lhs=[[("x[0]", 1.0)]],
-        rhs=[0.0],
-        senses=["<"],
+    cf = Constraints(
+        names=np.array(["cut"], dtype="S"),
+        lhs=coo_matrix([[1.0, 0.0, 0.0, 0.0, 0.0]]),
+        rhs=np.array([0.0]),
+        senses=np.array(["<"], dtype="S"),
     )
     assert_equals(solver.are_constraints_satisfied(cf), [False])
 
@@ -170,22 +167,16 @@ def run_basic_usage_tests(solver: InternalSolver) -> None:
         solver.get_constraints(with_static=True),
         _filter_attrs(
             solver.get_constraint_attrs(),
-            ConstraintFeatures(
-                names=["eq_capacity", "cut"],
-                rhs=[0.0, 0.0],
-                lhs=[
-                    [
-                        ("x[0]", 23.0),
-                        ("x[1]", 26.0),
-                        ("x[2]", 20.0),
-                        ("x[3]", 18.0),
-                        ("z", -1.0),
-                    ],
+            Constraints(
+                names=np.array(["eq_capacity", "cut"], dtype="S"),
+                rhs=np.array([0.0, 0.0]),
+                lhs=coo_matrix(
                     [
-                        ("x[0]", 1.0),
-                    ],
-                ],
-                senses=["=", "<"],
+                        [23.0, 26.0, 20.0, 18.0, -1.0],
+                        [1.0, 0.0, 0.0, 0.0, 0.0],
+                    ]
+                ),
+                senses=np.array(["=", "<"], dtype="S"),
             ),
         ),
     )
@@ -194,7 +185,7 @@ def run_basic_usage_tests(solver: InternalSolver) -> None:
     assert_equals(solver.are_constraints_satisfied(cf), [True])
 
     # Remove the new constraint
-    solver.remove_constraints(["cut"])
+    solver.remove_constraints(np.array(["cut"], dtype="S"))
 
     # New constraint should no longer affect solution
     stats = solver.solve()
@@ -205,16 +196,16 @@ def run_warm_start_tests(solver: InternalSolver) -> None:
     instance = solver.build_test_instance_knapsack()
     model = instance.to_model()
     solver.set_instance(instance, model)
-    solver.set_warm_start({"x[0]": 1.0, "x[1]": 0.0, "x[2]": 0.0, "x[3]": 1.0})
+    solver.set_warm_start({b"x[0]": 1.0, b"x[1]": 0.0, b"x[2]": 0.0, b"x[3]": 1.0})
     stats = solver.solve(tee=True)
     if stats.mip_warm_start_value is not None:
         assert_equals(stats.mip_warm_start_value, 725.0)
 
-    solver.set_warm_start({"x[0]": 1.0, "x[1]": 1.0, "x[2]": 1.0, "x[3]": 1.0})
+    solver.set_warm_start({b"x[0]": 1.0, b"x[1]": 1.0, b"x[2]": 1.0, b"x[3]": 1.0})
     stats = solver.solve(tee=True)
     assert stats.mip_warm_start_value is None
 
-    solver.fix({"x[0]": 1.0, "x[1]": 0.0, "x[2]": 0.0, "x[3]": 1.0})
+    solver.fix({b"x[0]": 1.0, b"x[1]": 0.0, b"x[2]": 0.0, b"x[3]": 1.0})
     stats = solver.solve(tee=True)
     assert_equals(stats.mip_lower_bound, 725.0)
     assert_equals(stats.mip_upper_bound, 725.0)
@@ -254,15 +245,15 @@ def run_lazy_cb_tests(solver: InternalSolver) -> None:
     def lazy_cb(cb_solver: InternalSolver, cb_model: Any) -> None:
         relsol = cb_solver.get_solution()
         assert relsol is not None
-        assert relsol["x[0]"] is not None
-        if relsol["x[0]"] > 0:
-            instance.enforce_lazy_constraint(cb_solver, cb_model, "cut")
+        assert relsol[b"x[0]"] is not None
+        if relsol[b"x[0]"] > 0:
+            instance.enforce_lazy_constraint(cb_solver, cb_model, b"cut")
 
     solver.set_instance(instance, model)
     solver.solve(lazy_cb=lazy_cb)
     solution = solver.get_solution()
     assert solution is not None
-    assert_equals(solution["x[0]"], 0.0)
+    assert_equals(solution[b"x[0]"], 0.0)
 
 
 def _equals_preprocess(obj: Any) -> Any:
@@ -271,7 +262,9 @@ def _equals_preprocess(obj: Any) -> Any:
             return np.round(obj, decimals=6).tolist()
         else:
             return obj.tolist()
-    elif isinstance(obj, (int, str)):
+    elif isinstance(obj, coo_matrix):
+        return obj.todense().tolist()
+    elif isinstance(obj, (int, str, bool, np.bool_, np.bytes_, bytes, bytearray)):
         return obj
     elif isinstance(obj, float):
         return round(obj, 6)
diff --git a/miplearn/types.py b/miplearn/types.py
index ca1cfc4..2fd0345 100644
--- a/miplearn/types.py
+++ b/miplearn/types.py
@@ -2,7 +2,7 @@
 #  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
 #  Released under the modified BSD license. See COPYING.md for more details.
 
-from typing import Optional, Dict, Callable, Any, Union, TYPE_CHECKING, Hashable
+from typing import Optional, Dict, Callable, Any, Union, TYPE_CHECKING
 
 from mypy_extensions import TypedDict
 
@@ -10,13 +10,14 @@ if TYPE_CHECKING:
     # noinspection PyUnresolvedReferences
     from miplearn.solvers.learning import InternalSolver
 
-Category = Hashable
+Category = bytes
+ConstraintName = bytes
+ConstraintCategory = bytes
 IterationCallback = Callable[[], bool]
 LazyCallback = Callable[[Any, Any], None]
 SolverParams = Dict[str, Any]
 UserCutCallback = Callable[["InternalSolver", Any], None]
-VariableName = str
-Solution = Dict[VariableName, Optional[float]]
+Solution = Dict[bytes, Optional[float]]
 
 LearningSolveStats = TypedDict(
     "LearningSolveStats",
diff --git a/setup.py b/setup.py
index c0fe2ef..c82b8ac 100644
--- a/setup.py
+++ b/setup.py
@@ -9,7 +9,7 @@ with open("README.md", "r") as fh:
 
 setup(
     name="miplearn",
-    version="0.2.0.dev10",
+    version="0.2.0.dev11",
     author="Alinson S. Xavier",
     author_email="axavier@anl.gov",
     description="Extensible framework for Learning-Enhanced Mixed-Integer Optimization",
@@ -19,20 +19,21 @@ setup(
     packages=find_namespace_packages(),
     python_requires=">=3.7",
     install_requires=[
+        "decorator>=4,<5",
+        "h5py>=3,<4",
         "matplotlib>=3,<4",
+        "mypy==0.790",
         "networkx>=2,<3",
         "numpy>=1,<1.21",
+        "overrides>=3,<4",
         "p_tqdm>=1,<2",
         "pandas>=1,<2",
         "pyomo>=5,<6",
         "pytest>=6,<7",
         "python-markdown-math>=0.8,<0.9",
-        "seaborn>=0.11,<0.12",
         "scikit-learn>=0.24,<0.25",
+        "seaborn>=0.11,<0.12",
         "tqdm>=4,<5",
-        "mypy==0.790",
-        "decorator>=4,<5",
-        "overrides>=3,<4",
     ],
     extras_require={
         "dev": [
diff --git a/tests/components/test_dynamic_lazy.py b/tests/components/test_dynamic_lazy.py
index bd43eaa..4fbdc0b 100644
--- a/tests/components/test_dynamic_lazy.py
+++ b/tests/components/test_dynamic_lazy.py
@@ -11,11 +11,7 @@ from miplearn.classifiers import Classifier
 from miplearn.classifiers.threshold import MinProbabilityThreshold
 from miplearn.components import classifier_evaluation_dict
 from miplearn.components.dynamic_lazy import DynamicLazyConstraintsComponent
-from miplearn.features import (
-    Features,
-    InstanceFeatures,
-    Sample,
-)
+from miplearn.features.sample import MemorySample
 from miplearn.instance.base import Instance
 from miplearn.solvers.tests import assert_equals
 
@@ -26,70 +22,78 @@ E = 0.1
 def training_instances() -> List[Instance]:
     instances = [cast(Instance, Mock(spec=Instance)) for _ in range(2)]
     samples_0 = [
-        Sample(
-            after_load=Features(instance=InstanceFeatures()),
-            after_mip=Features(extra={"lazy_enforced": {"c1", "c2"}}),
+        MemorySample(
+            {
+                "mip_constr_lazy_enforced": np.array(["c1", "c2"], dtype="S"),
+                "static_instance_features": np.array([5.0]),
+            },
         ),
-        Sample(
-            after_load=Features(instance=InstanceFeatures()),
-            after_mip=Features(extra={"lazy_enforced": {"c2", "c3"}}),
+        MemorySample(
+            {
+                "mip_constr_lazy_enforced": np.array(["c2", "c3"], dtype="S"),
+                "static_instance_features": np.array([5.0]),
+            },
         ),
     ]
-    samples_0[0].after_load.instance.to_list = Mock(return_value=[5.0])  # type: ignore
-    samples_0[1].after_load.instance.to_list = Mock(return_value=[5.0])  # type: ignore
     instances[0].get_samples = Mock(return_value=samples_0)  # type: ignore
     instances[0].get_constraint_categories = Mock(  # type: ignore
-        return_value={
-            "c1": "type-a",
-            "c2": "type-a",
-            "c3": "type-b",
-            "c4": "type-b",
-        }
+        return_value=np.array(["type-a", "type-a", "type-b", "type-b"], dtype="S")
     )
     instances[0].get_constraint_features = Mock(  # type: ignore
-        return_value={
-            "c1": [1.0, 2.0, 3.0],
-            "c2": [4.0, 5.0, 6.0],
-            "c3": [1.0, 2.0],
-            "c4": [3.0, 4.0],
-        }
+        return_value=np.array(
+            [
+                [1.0, 2.0, 3.0],
+                [4.0, 5.0, 6.0],
+                [1.0, 2.0, 0.0],
+                [3.0, 4.0, 0.0],
+            ]
+        )
+    )
+    instances[0].are_constraints_lazy = Mock(  # type: ignore
+        return_value=np.zeros(4, dtype=bool)
     )
     samples_1 = [
-        Sample(
-            after_load=Features(instance=InstanceFeatures()),
-            after_mip=Features(extra={"lazy_enforced": {"c3", "c4"}}),
+        MemorySample(
+            {
+                "mip_constr_lazy_enforced": np.array(["c3", "c4"], dtype="S"),
+                "static_instance_features": np.array([8.0]),
+            },
         )
     ]
-    samples_1[0].after_load.instance.to_list = Mock(return_value=[8.0])  # type: ignore
     instances[1].get_samples = Mock(return_value=samples_1)  # type: ignore
     instances[1].get_constraint_categories = Mock(  # type: ignore
-        return_value={
-            "c1": None,
-            "c2": "type-a",
-            "c3": "type-b",
-            "c4": "type-b",
-        }
+        return_value=np.array(["", "type-a", "type-b", "type-b"], dtype="S")
     )
     instances[1].get_constraint_features = Mock(  # type: ignore
-        return_value={
-            "c2": [7.0, 8.0, 9.0],
-            "c3": [5.0, 6.0],
-            "c4": [7.0, 8.0],
-        }
+        return_value=np.array(
+            [
+                [7.0, 8.0, 9.0],
+                [5.0, 6.0, 0.0],
+                [7.0, 8.0, 0.0],
+            ]
+        )
+    )
+    instances[1].are_constraints_lazy = Mock(  # type: ignore
+        return_value=np.zeros(4, dtype=bool)
     )
     return instances
 
 
 def test_sample_xy(training_instances: List[Instance]) -> None:
     comp = DynamicLazyConstraintsComponent()
-    comp.pre_fit([{"c1", "c2", "c3", "c4"}])
+    comp.pre_fit(
+        [
+            np.array(["c1", "c3", "c4"], dtype="S"),
+            np.array(["c1", "c2", "c4"], dtype="S"),
+        ]
+    )
     x_expected = {
-        "type-a": [[5.0, 1.0, 2.0, 3.0], [5.0, 4.0, 5.0, 6.0]],
-        "type-b": [[5.0, 1.0, 2.0], [5.0, 3.0, 4.0]],
+        b"type-a": np.array([[5.0, 1.0, 2.0, 3.0], [5.0, 4.0, 5.0, 6.0]]),
+        b"type-b": np.array([[5.0, 1.0, 2.0, 0.0], [5.0, 3.0, 4.0, 0.0]]),
     }
     y_expected = {
-        "type-a": [[False, True], [False, True]],
-        "type-b": [[True, False], [True, False]],
+        b"type-a": np.array([[False, True], [False, True]]),
+        b"type-b": np.array([[True, False], [True, False]]),
     }
     x_actual, y_actual = comp.sample_xy(
         training_instances[0],
@@ -99,95 +103,26 @@ def test_sample_xy(training_instances: List[Instance]) -> None:
     assert_equals(y_actual, y_expected)
 
 
-# def test_fit(training_instances: List[Instance]) -> None:
-#     clf = Mock(spec=Classifier)
-#     clf.clone = Mock(side_effect=lambda: Mock(spec=Classifier))
-#     comp = DynamicLazyConstraintsComponent(classifier=clf)
-#     comp.fit(training_instances)
-#     assert clf.clone.call_count == 2
-#
-#     assert "type-a" in comp.classifiers
-#     clf_a = comp.classifiers["type-a"]
-#     assert clf_a.fit.call_count == 1  # type: ignore
-#     assert_array_equal(
-#         clf_a.fit.call_args[0][0],  # type: ignore
-#         np.array(
-#             [
-#                 [5.0, 1.0, 2.0, 3.0],
-#                 [5.0, 4.0, 5.0, 6.0],
-#                 [5.0, 1.0, 2.0, 3.0],
-#                 [5.0, 4.0, 5.0, 6.0],
-#                 [8.0, 7.0, 8.0, 9.0],
-#             ]
-#         ),
-#     )
-#     assert_array_equal(
-#         clf_a.fit.call_args[0][1],  # type: ignore
-#         np.array(
-#             [
-#                 [False, True],
-#                 [False, True],
-#                 [True, False],
-#                 [False, True],
-#                 [True, False],
-#             ]
-#         ),
-#     )
-#
-#     assert "type-b" in comp.classifiers
-#     clf_b = comp.classifiers["type-b"]
-#     assert clf_b.fit.call_count == 1  # type: ignore
-#     assert_array_equal(
-#         clf_b.fit.call_args[0][0],  # type: ignore
-#         np.array(
-#             [
-#                 [5.0, 1.0, 2.0],
-#                 [5.0, 3.0, 4.0],
-#                 [5.0, 1.0, 2.0],
-#                 [5.0, 3.0, 4.0],
-#                 [8.0, 5.0, 6.0],
-#                 [8.0, 7.0, 8.0],
-#             ]
-#         ),
-#     )
-#     assert_array_equal(
-#         clf_b.fit.call_args[0][1],  # type: ignore
-#         np.array(
-#             [
-#                 [True, False],
-#                 [True, False],
-#                 [False, True],
-#                 [True, False],
-#                 [False, True],
-#                 [False, True],
-#             ]
-#         ),
-#     )
-
-
 def test_sample_predict_evaluate(training_instances: List[Instance]) -> None:
     comp = DynamicLazyConstraintsComponent()
-    comp.known_cids.extend(["c1", "c2", "c3", "c4"])
-    comp.thresholds["type-a"] = MinProbabilityThreshold([0.5, 0.5])
-    comp.thresholds["type-b"] = MinProbabilityThreshold([0.5, 0.5])
-    comp.classifiers["type-a"] = Mock(spec=Classifier)
-    comp.classifiers["type-b"] = Mock(spec=Classifier)
-    comp.classifiers["type-a"].predict_proba = Mock(  # type: ignore
+    comp.known_cids.extend([b"c1", b"c2", b"c3", b"c4"])
+    comp.thresholds[b"type-a"] = MinProbabilityThreshold([0.5, 0.5])
+    comp.thresholds[b"type-b"] = MinProbabilityThreshold([0.5, 0.5])
+    comp.classifiers[b"type-a"] = Mock(spec=Classifier)
+    comp.classifiers[b"type-b"] = Mock(spec=Classifier)
+    comp.classifiers[b"type-a"].predict_proba = Mock(  # type: ignore
         side_effect=lambda _: np.array([[0.1, 0.9], [0.8, 0.2]])
     )
-    comp.classifiers["type-b"].predict_proba = Mock(  # type: ignore
+    comp.classifiers[b"type-b"].predict_proba = Mock(  # type: ignore
         side_effect=lambda _: np.array([[0.9, 0.1], [0.1, 0.9]])
     )
     pred = comp.sample_predict(
         training_instances[0],
         training_instances[0].get_samples()[0],
     )
-    assert pred == ["c1", "c4"]
+    assert pred == [b"c1", b"c4"]
     ev = comp.sample_evaluate(
         training_instances[0],
         training_instances[0].get_samples()[0],
     )
-    assert ev == {
-        "type-a": classifier_evaluation_dict(tp=1, fp=0, tn=0, fn=1),
-        "type-b": classifier_evaluation_dict(tp=0, fp=1, tn=1, fn=0),
-    }
+    assert ev == classifier_evaluation_dict(tp=1, fp=1, tn=1, fn=1)
diff --git a/tests/components/test_dynamic_user_cuts.py b/tests/components/test_dynamic_user_cuts.py
index c46a955..2bae1a6 100644
--- a/tests/components/test_dynamic_user_cuts.py
+++ b/tests/components/test_dynamic_user_cuts.py
@@ -3,7 +3,7 @@
 #  Released under the modified BSD license. See COPYING.md for more details.
 
 import logging
-from typing import Any, FrozenSet, Hashable, List
+from typing import Any, FrozenSet, List
 
 import gurobipy as gp
 import networkx as nx
@@ -17,6 +17,7 @@ from miplearn.components.dynamic_user_cuts import UserCutsComponent
 from miplearn.instance.base import Instance
 from miplearn.solvers.gurobi import GurobiSolver
 from miplearn.solvers.learning import LearningSolver
+from miplearn.types import ConstraintName, ConstraintCategory
 
 logger = logging.getLogger(__name__)
 
@@ -40,13 +41,13 @@ class GurobiStableSetProblem(Instance):
         return True
 
     @overrides
-    def find_violated_user_cuts(self, model: Any) -> List[FrozenSet]:
+    def find_violated_user_cuts(self, model: Any) -> List[ConstraintName]:
         assert isinstance(model, gp.Model)
         vals = model.cbGetNodeRel(model.getVars())
         violations = []
         for clique in nx.find_cliques(self.graph):
             if sum(vals[i] for i in clique) > 1:
-                violations += [frozenset(clique)]
+                violations.append(",".join([str(i) for i in clique]).encode())
         return violations
 
     @overrides
@@ -54,11 +55,11 @@ class GurobiStableSetProblem(Instance):
         self,
         solver: InternalSolver,
         model: Any,
-        cid: Hashable,
+        cid: ConstraintName,
     ) -> Any:
-        assert isinstance(cid, FrozenSet)
+        clique = [int(i) for i in cid.decode().split(",")]
         x = model.getVars()
-        model.addConstr(gp.quicksum([x[i] for i in cid]) <= 1)
+        model.addConstr(gp.quicksum([x[i] for i in clique]) <= 1)
 
 
 @pytest.fixture
@@ -81,10 +82,9 @@ def test_usage(
 ) -> None:
     stats_before = solver.solve(stab_instance)
     sample = stab_instance.get_samples()[0]
-    assert sample.after_mip is not None
-    assert sample.after_mip.extra is not None
-    assert len(sample.after_mip.extra["user_cuts_enforced"]) > 0
-    print(stats_before)
+    user_cuts_enforced = sample.get_array("mip_user_cuts_enforced")
+    assert user_cuts_enforced is not None
+    assert len(user_cuts_enforced) > 0
     assert stats_before["UserCuts: Added ahead-of-time"] == 0
     assert stats_before["UserCuts: Added in callback"] > 0
 
diff --git a/tests/components/test_objective.py b/tests/components/test_objective.py
index bba86a7..fc45083 100644
--- a/tests/components/test_objective.py
+++ b/tests/components/test_objective.py
@@ -1,7 +1,7 @@
 #  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
 #  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
 #  Released under the modified BSD license. See COPYING.md for more details.
-from typing import Hashable, Dict
+from typing import Dict
 from unittest.mock import Mock
 
 import numpy as np
@@ -10,55 +10,46 @@ from numpy.testing import assert_array_equal
 
 from miplearn.classifiers import Regressor
 from miplearn.components.objective import ObjectiveValueComponent
-from miplearn.features import InstanceFeatures, Features, Sample
-from miplearn.solvers.internal import MIPSolveStats, LPSolveStats
+from miplearn.features.sample import Sample, MemorySample
 from miplearn.solvers.learning import LearningSolver
 from miplearn.solvers.pyomo.gurobi import GurobiPyomoSolver
+from miplearn.solvers.tests import assert_equals
 
 
 @pytest.fixture
 def sample() -> Sample:
-    sample = Sample(
-        after_load=Features(
-            instance=InstanceFeatures(),
-        ),
-        after_lp=Features(
-            lp_solve=LPSolveStats(),
-        ),
-        after_mip=Features(
-            mip_solve=MIPSolveStats(
-                mip_lower_bound=1.0,
-                mip_upper_bound=2.0,
-            )
-        ),
+    sample = MemorySample(
+        {
+            "mip_lower_bound": 1.0,
+            "mip_upper_bound": 2.0,
+            "lp_instance_features": np.array([1.0, 2.0, 3.0]),
+        },
     )
-    sample.after_load.instance.to_list = Mock(return_value=[1.0, 2.0])  # type: ignore
-    sample.after_lp.lp_solve.to_list = Mock(return_value=[3.0])  # type: ignore
     return sample
 
 
 def test_sample_xy(sample: Sample) -> None:
     x_expected = {
-        "Lower bound": [[1.0, 2.0, 3.0]],
-        "Upper bound": [[1.0, 2.0, 3.0]],
+        "Lower bound": np.array([[1.0, 2.0, 3.0]]),
+        "Upper bound": np.array([[1.0, 2.0, 3.0]]),
     }
     y_expected = {
-        "Lower bound": [[1.0]],
-        "Upper bound": [[2.0]],
+        "Lower bound": np.array([[1.0]]),
+        "Upper bound": np.array([[2.0]]),
     }
     xy = ObjectiveValueComponent().sample_xy(None, sample)
     assert xy is not None
     x_actual, y_actual = xy
-    assert x_actual == x_expected
-    assert y_actual == y_expected
+    assert_equals(x_actual, x_expected)
+    assert_equals(y_actual, y_expected)
 
 
 def test_fit_xy() -> None:
-    x: Dict[Hashable, np.ndarray] = {
+    x: Dict[str, np.ndarray] = {
         "Lower bound": np.array([[0.0, 0.0], [1.0, 2.0]]),
         "Upper bound": np.array([[0.0, 0.0], [1.0, 2.0]]),
     }
-    y: Dict[Hashable, np.ndarray] = {
+    y: Dict[str, np.ndarray] = {
         "Lower bound": np.array([[100.0]]),
         "Upper bound": np.array([[200.0]]),
     }
diff --git a/tests/components/test_primal.py b/tests/components/test_primal.py
index 1f83bc7..a77cad0 100644
--- a/tests/components/test_primal.py
+++ b/tests/components/test_primal.py
@@ -12,12 +12,7 @@ from miplearn.classifiers import Classifier
 from miplearn.classifiers.threshold import Threshold
 from miplearn.components import classifier_evaluation_dict
 from miplearn.components.primal import PrimalSolutionComponent
-from miplearn.features import (
-    Features,
-    Sample,
-    InstanceFeatures,
-    VariableFeatures,
-)
+from miplearn.features.sample import Sample, MemorySample
 from miplearn.problems.tsp import TravelingSalesmanGenerator
 from miplearn.solvers.learning import LearningSolver
 from miplearn.solvers.tests import assert_equals
@@ -25,54 +20,46 @@ from miplearn.solvers.tests import assert_equals
 
 @pytest.fixture
 def sample() -> Sample:
-    sample = Sample(
-        after_load=Features(
-            instance=InstanceFeatures(),
-            variables=VariableFeatures(
-                names=["x[0]", "x[1]", "x[2]", "x[3]"],
-                categories=["default", None, "default", "default"],
+    sample = MemorySample(
+        {
+            "static_var_names": np.array(["x[0]", "x[1]", "x[2]", "x[3]"], dtype="S"),
+            "static_var_categories": np.array(
+                ["default", "", "default", "default"],
+                dtype="S",
             ),
-        ),
-        after_lp=Features(
-            variables=VariableFeatures(),
-        ),
-        after_mip=Features(
-            variables=VariableFeatures(
-                names=["x[0]", "x[1]", "x[2]", "x[3]"],
-                values=[0.0, 1.0, 1.0, 0.0],
-            )
-        ),
-    )
-    sample.after_load.instance.to_list = Mock(return_value=[5.0])  # type: ignore
-    sample.after_load.variables.to_list = Mock(  # type:ignore
-        side_effect=lambda i: [
-            [0.0, 0.0],
-            None,
-            [1.0, 0.0],
-            [1.0, 1.0],
-        ][i]
-    )
-    sample.after_lp.variables.to_list = Mock(  # type:ignore
-        side_effect=lambda i: [
-            [2.0, 2.0],
-            None,
-            [3.0, 2.0],
-            [3.0, 3.0],
-        ][i]
+            "mip_var_values": np.array([0.0, 1.0, 1.0, 0.0]),
+            "static_instance_features": np.array([5.0]),
+            "static_var_features": np.array(
+                [
+                    [0.0, 0.0],
+                    [0.0, 0.0],
+                    [1.0, 0.0],
+                    [1.0, 1.0],
+                ]
+            ),
+            "lp_var_features": np.array(
+                [
+                    [0.0, 0.0, 2.0, 2.0],
+                    [0.0, 0.0, 0.0, 0.0],
+                    [1.0, 0.0, 3.0, 2.0],
+                    [1.0, 1.0, 3.0, 3.0],
+                ]
+            ),
+        },
     )
     return sample
 
 
 def test_xy(sample: Sample) -> None:
     x_expected = {
-        "default": [
+        b"default": [
             [5.0, 0.0, 0.0, 2.0, 2.0],
             [5.0, 1.0, 0.0, 3.0, 2.0],
             [5.0, 1.0, 1.0, 3.0, 3.0],
         ]
     }
     y_expected = {
-        "default": [
+        b"default": [
             [True, False],
             [False, True],
             [True, False],
@@ -92,15 +79,15 @@ def test_fit_xy() -> None:
     thr.clone = lambda: Mock(spec=Threshold)
     comp = PrimalSolutionComponent(classifier=clf, threshold=thr)
     x = {
-        "type-a": np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]),
-        "type-b": np.array([[7.0, 8.0, 9.0]]),
+        b"type-a": np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]),
+        b"type-b": np.array([[7.0, 8.0, 9.0]]),
     }
     y = {
-        "type-a": np.array([[True, False], [False, True]]),
-        "type-b": np.array([[True, False]]),
+        b"type-a": np.array([[True, False], [False, True]]),
+        b"type-b": np.array([[True, False]]),
     }
     comp.fit_xy(x, y)
-    for category in ["type-a", "type-b"]:
+    for category in [b"type-a", b"type-b"]:
         assert category in comp.classifiers
         assert category in comp.thresholds
         clf = comp.classifiers[category]  # type: ignore
@@ -132,17 +119,17 @@ def test_usage() -> None:
 def test_evaluate(sample: Sample) -> None:
     comp = PrimalSolutionComponent()
     comp.sample_predict = lambda _: {  # type: ignore
-        "x[0]": 1.0,
-        "x[1]": 1.0,
-        "x[2]": 0.0,
-        "x[3]": None,
+        b"x[0]": 1.0,
+        b"x[1]": 1.0,
+        b"x[2]": 0.0,
+        b"x[3]": None,
     }
     ev = comp.sample_evaluate(None, sample)
     assert_equals(
         ev,
         {
-            0: classifier_evaluation_dict(tp=0, fp=1, tn=1, fn=2),
-            1: classifier_evaluation_dict(tp=1, fp=1, tn=1, fn=1),
+            "0": classifier_evaluation_dict(tp=0, fp=1, tn=1, fn=2),
+            "1": classifier_evaluation_dict(tp=1, fp=1, tn=1, fn=1),
         },
     )
 
@@ -162,16 +149,16 @@ def test_predict(sample: Sample) -> None:
     thr.predict = Mock(return_value=[0.75, 0.75])
     comp = PrimalSolutionComponent()
     x, _ = comp.sample_xy(None, sample)
-    comp.classifiers = {"default": clf}
-    comp.thresholds = {"default": thr}
+    comp.classifiers = {b"default": clf}
+    comp.thresholds = {b"default": thr}
     pred = comp.sample_predict(sample)
     clf.predict_proba.assert_called_once()
     thr.predict.assert_called_once()
-    assert_array_equal(x["default"], clf.predict_proba.call_args[0][0])
-    assert_array_equal(x["default"], thr.predict.call_args[0][0])
+    assert_array_equal(x[b"default"], clf.predict_proba.call_args[0][0])
+    assert_array_equal(x[b"default"], thr.predict.call_args[0][0])
     assert pred == {
-        "x[0]": 0.0,
-        "x[1]": None,
-        "x[2]": None,
-        "x[3]": 1.0,
+        b"x[0]": 0.0,
+        b"x[1]": None,
+        b"x[2]": None,
+        b"x[3]": 1.0,
     }
diff --git a/tests/components/test_static_lazy.py b/tests/components/test_static_lazy.py
index 67c9c5d..9455dcd 100644
--- a/tests/components/test_static_lazy.py
+++ b/tests/components/test_static_lazy.py
@@ -1,7 +1,7 @@
 #  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
 #  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
 #  Released under the modified BSD license. See COPYING.md for more details.
-from typing import Dict, cast, Hashable
+from typing import Dict, cast
 from unittest.mock import Mock, call
 
 import numpy as np
@@ -11,58 +11,43 @@ from numpy.testing import assert_array_equal
 from miplearn.classifiers import Classifier
 from miplearn.classifiers.threshold import Threshold, MinProbabilityThreshold
 from miplearn.components.static_lazy import StaticLazyConstraintsComponent
-from miplearn.features import (
-    InstanceFeatures,
-    Features,
-    Sample,
-    ConstraintFeatures,
-)
+from miplearn.features.sample import Sample, MemorySample
 from miplearn.instance.base import Instance
-from miplearn.solvers.internal import InternalSolver
+from miplearn.solvers.internal import InternalSolver, Constraints
 from miplearn.solvers.learning import LearningSolver
 from miplearn.types import (
     LearningSolveStats,
+    ConstraintCategory,
 )
+from miplearn.solvers.tests import assert_equals
 
 
 @pytest.fixture
 def sample() -> Sample:
-    sample = Sample(
-        after_load=Features(
-            instance=InstanceFeatures(
-                lazy_constraint_count=4,
-            ),
-            constraints=ConstraintFeatures(
-                names=["c1", "c2", "c3", "c4", "c5"],
-                categories=[
-                    "type-a",
-                    "type-a",
-                    "type-a",
-                    "type-b",
-                    "type-b",
-                ],
-                lazy=[True, True, True, True, False],
+    sample = MemorySample(
+        {
+            "static_constr_categories": [
+                b"type-a",
+                b"type-a",
+                b"type-a",
+                b"type-b",
+                b"type-b",
+            ],
+            "static_constr_lazy": np.array([True, True, True, True, False]),
+            "static_constr_names": np.array(["c1", "c2", "c3", "c4", "c5"], dtype="S"),
+            "static_instance_features": [5.0],
+            "mip_constr_lazy_enforced": np.array(["c1", "c2", "c4"], dtype="S"),
+            "lp_constr_features": np.array(
+                [
+                    [1.0, 1.0, 0.0],
+                    [1.0, 2.0, 0.0],
+                    [1.0, 3.0, 0.0],
+                    [1.0, 4.0, 0.0],
+                    [0.0, 0.0, 0.0],
+                ]
             ),
-        ),
-        after_lp=Features(
-            instance=InstanceFeatures(),
-            constraints=ConstraintFeatures(names=["c1", "c2", "c3", "c4", "c5"]),
-        ),
-        after_mip=Features(
-            extra={
-                "lazy_enforced": {"c1", "c2", "c4"},
-            }
-        ),
-    )
-    sample.after_lp.instance.to_list = Mock(return_value=[5.0])  # type: ignore
-    sample.after_lp.constraints.to_list = Mock(  # type: ignore
-        side_effect=lambda idx: {
-            0: [1.0, 1.0],
-            1: [1.0, 2.0],
-            2: [1.0, 3.0],
-            3: [1.0, 4.0, 0.0],
-            4: None,
-        }[idx]
+            "static_constr_lazy_count": 4,
+        },
     )
     return sample
 
@@ -87,13 +72,13 @@ def test_usage_with_solver(instance: Instance) -> None:
     )
 
     component = StaticLazyConstraintsComponent(violation_tolerance=1.0)
-    component.thresholds["type-a"] = MinProbabilityThreshold([0.5, 0.5])
-    component.thresholds["type-b"] = MinProbabilityThreshold([0.5, 0.5])
+    component.thresholds[b"type-a"] = MinProbabilityThreshold([0.5, 0.5])
+    component.thresholds[b"type-b"] = MinProbabilityThreshold([0.5, 0.5])
     component.classifiers = {
-        "type-a": Mock(spec=Classifier),
-        "type-b": Mock(spec=Classifier),
+        b"type-a": Mock(spec=Classifier),
+        b"type-b": Mock(spec=Classifier),
     }
-    component.classifiers["type-a"].predict_proba = Mock(  # type: ignore
+    component.classifiers[b"type-a"].predict_proba = Mock(  # type: ignore
         return_value=np.array(
             [
                 [0.00, 1.00],  # c1
@@ -102,7 +87,7 @@ def test_usage_with_solver(instance: Instance) -> None:
             ]
         )
     )
-    component.classifiers["type-b"].predict_proba = Mock(  # type: ignore
+    component.classifiers[b"type-b"].predict_proba = Mock(  # type: ignore
         return_value=np.array(
             [
                 [0.02, 0.98],  # c4
@@ -112,10 +97,7 @@ def test_usage_with_solver(instance: Instance) -> None:
 
     stats: LearningSolveStats = {}
     sample = instance.get_samples()[0]
-    assert sample.after_load is not None
-    assert sample.after_mip is not None
-    assert sample.after_mip.extra is not None
-    del sample.after_mip.extra["lazy_enforced"]
+    assert sample.get_array("mip_constr_lazy_enforced") is not None
 
     # LearningSolver calls before_solve_mip
     component.before_solve_mip(
@@ -127,12 +109,12 @@ def test_usage_with_solver(instance: Instance) -> None:
     )
 
     # Should ask ML to predict whether each lazy constraint should be enforced
-    component.classifiers["type-a"].predict_proba.assert_called_once()
-    component.classifiers["type-b"].predict_proba.assert_called_once()
+    component.classifiers[b"type-a"].predict_proba.assert_called_once()
+    component.classifiers[b"type-b"].predict_proba.assert_called_once()
 
     # Should ask internal solver to remove some constraints
     assert internal.remove_constraints.call_count == 1
-    internal.remove_constraints.assert_has_calls([call(["c3"])])
+    internal.remove_constraints.assert_has_calls([call([b"c3"])])
 
     # LearningSolver calls after_iteration (first time)
     should_repeat = component.iteration_cb(solver, instance, None)
@@ -140,8 +122,7 @@ def test_usage_with_solver(instance: Instance) -> None:
 
     # Should ask internal solver to verify if constraints in the pool are
     # satisfied and add the ones that are not
-    assert sample.after_load.constraints is not None
-    c = sample.after_load.constraints[[False, False, True, False, False]]
+    c = Constraints.from_sample(sample)[[False, False, True, False, False]]
     internal.are_constraints_satisfied.assert_called_once_with(c, tol=1.0)
     internal.are_constraints_satisfied.reset_mock()
     internal.add_constraints.assert_called_once_with(c)
@@ -165,8 +146,13 @@ def test_usage_with_solver(instance: Instance) -> None:
     )
 
     # Should update training sample
-    assert sample.after_mip.extra["lazy_enforced"] == {"c1", "c2", "c3", "c4"}
-    #
+    mip_constr_lazy_enforced = sample.get_array("mip_constr_lazy_enforced")
+    assert mip_constr_lazy_enforced is not None
+    assert_equals(
+        sorted(mip_constr_lazy_enforced),
+        np.array(["c1", "c2", "c3", "c4"], dtype="S"),
+    )
+
     # Should update stats
     assert stats["LazyStatic: Removed"] == 1
     assert stats["LazyStatic: Kept"] == 3
@@ -176,39 +162,39 @@ def test_usage_with_solver(instance: Instance) -> None:
 
 def test_sample_predict(sample: Sample) -> None:
     comp = StaticLazyConstraintsComponent()
-    comp.thresholds["type-a"] = MinProbabilityThreshold([0.5, 0.5])
-    comp.thresholds["type-b"] = MinProbabilityThreshold([0.5, 0.5])
-    comp.classifiers["type-a"] = Mock(spec=Classifier)
-    comp.classifiers["type-a"].predict_proba = lambda _: np.array(  # type:ignore
+    comp.thresholds[b"type-a"] = MinProbabilityThreshold([0.5, 0.5])
+    comp.thresholds[b"type-b"] = MinProbabilityThreshold([0.5, 0.5])
+    comp.classifiers[b"type-a"] = Mock(spec=Classifier)
+    comp.classifiers[b"type-a"].predict_proba = lambda _: np.array(  # type:ignore
         [
             [0.0, 1.0],  # c1
             [0.0, 0.9],  # c2
             [0.9, 0.1],  # c3
         ]
     )
-    comp.classifiers["type-b"] = Mock(spec=Classifier)
-    comp.classifiers["type-b"].predict_proba = lambda _: np.array(  # type:ignore
+    comp.classifiers[b"type-b"] = Mock(spec=Classifier)
+    comp.classifiers[b"type-b"].predict_proba = lambda _: np.array(  # type:ignore
         [
             [0.0, 1.0],  # c4
         ]
     )
     pred = comp.sample_predict(sample)
-    assert pred == ["c1", "c2", "c4"]
+    assert pred == [b"c1", b"c2", b"c4"]
 
 
 def test_fit_xy() -> None:
     x = cast(
-        Dict[Hashable, np.ndarray],
+        Dict[ConstraintCategory, np.ndarray],
         {
-            "type-a": np.array([[1.0, 1.0], [1.0, 2.0], [1.0, 3.0]]),
-            "type-b": np.array([[1.0, 4.0, 0.0]]),
+            b"type-a": np.array([[1.0, 1.0], [1.0, 2.0], [1.0, 3.0]]),
+            b"type-b": np.array([[1.0, 4.0, 0.0]]),
         },
     )
     y = cast(
-        Dict[Hashable, np.ndarray],
+        Dict[ConstraintCategory, np.ndarray],
         {
-            "type-a": np.array([[False, True], [False, True], [True, False]]),
-            "type-b": np.array([[False, True]]),
+            b"type-a": np.array([[False, True], [False, True], [True, False]]),
+            b"type-b": np.array([[False, True]]),
         },
     )
     clf: Classifier = Mock(spec=Classifier)
@@ -221,15 +207,15 @@ def test_fit_xy() -> None:
     )
     comp.fit_xy(x, y)
     assert clf.clone.call_count == 2
-    clf_a = comp.classifiers["type-a"]
-    clf_b = comp.classifiers["type-b"]
+    clf_a = comp.classifiers[b"type-a"]
+    clf_b = comp.classifiers[b"type-b"]
     assert clf_a.fit.call_count == 1  # type: ignore
     assert clf_b.fit.call_count == 1  # type: ignore
-    assert_array_equal(clf_a.fit.call_args[0][0], x["type-a"])  # type: ignore
-    assert_array_equal(clf_b.fit.call_args[0][0], x["type-b"])  # type: ignore
+    assert_array_equal(clf_a.fit.call_args[0][0], x[b"type-a"])  # type: ignore
+    assert_array_equal(clf_b.fit.call_args[0][0], x[b"type-b"])  # type: ignore
     assert thr.clone.call_count == 2
-    thr_a = comp.thresholds["type-a"]
-    thr_b = comp.thresholds["type-b"]
+    thr_a = comp.thresholds[b"type-a"]
+    thr_b = comp.thresholds[b"type-b"]
     assert thr_a.fit.call_count == 1  # type: ignore
     assert thr_b.fit.call_count == 1  # type: ignore
     assert thr_a.fit.call_args[0][0] == clf_a  # type: ignore
@@ -238,12 +224,12 @@ def test_fit_xy() -> None:
 
 def test_sample_xy(sample: Sample) -> None:
     x_expected = {
-        "type-a": [[5.0, 1.0, 1.0], [5.0, 1.0, 2.0], [5.0, 1.0, 3.0]],
-        "type-b": [[5.0, 1.0, 4.0, 0.0]],
+        b"type-a": [[5.0, 1.0, 1.0, 0.0], [5.0, 1.0, 2.0, 0.0], [5.0, 1.0, 3.0, 0.0]],
+        b"type-b": [[5.0, 1.0, 4.0, 0.0]],
     }
     y_expected = {
-        "type-a": [[False, True], [False, True], [True, False]],
-        "type-b": [[False, True]],
+        b"type-a": [[False, True], [False, True], [True, False]],
+        b"type-b": [[False, True]],
     }
     xy = StaticLazyConstraintsComponent().sample_xy(None, sample)
     assert xy is not None
diff --git a/tests/features/__init__.py b/tests/features/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/features/test_extractor.py b/tests/features/test_extractor.py
new file mode 100644
index 0000000..5468894
--- /dev/null
+++ b/tests/features/test_extractor.py
@@ -0,0 +1,456 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+
+import os
+import sys
+import time
+from typing import Any
+
+import numpy as np
+import gurobipy as gp
+from scipy.sparse import coo_matrix
+
+from miplearn.features.extractor import FeaturesExtractor
+from miplearn.features.sample import Hdf5Sample, MemorySample
+from miplearn.instance.base import Instance
+from miplearn.solvers.gurobi import GurobiSolver
+from miplearn.solvers.internal import Variables, Constraints
+from miplearn.solvers.tests import assert_equals
+import cProfile
+
+inf = float("inf")
+
+
+def test_knapsack() -> None:
+    solver = GurobiSolver()
+    instance = solver.build_test_instance_knapsack()
+    model = instance.to_model()
+    solver.set_instance(instance, model)
+    extractor = FeaturesExtractor()
+    sample = MemorySample()
+
+    # after-load
+    # -------------------------------------------------------
+    extractor.extract_after_load_features(instance, solver, sample)
+    assert_equals(
+        sample.get_array("static_instance_features"),
+        np.array([67.0, 21.75]),
+    )
+    assert_equals(
+        sample.get_array("static_var_names"),
+        np.array(["x[0]", "x[1]", "x[2]", "x[3]", "z"], dtype="S"),
+    )
+    assert_equals(
+        sample.get_array("static_var_lower_bounds"),
+        np.array([0.0, 0.0, 0.0, 0.0, 0.0]),
+    )
+    assert_equals(
+        sample.get_array("static_var_obj_coeffs"),
+        np.array([505.0, 352.0, 458.0, 220.0, 0.0]),
+    )
+    assert_equals(
+        sample.get_array("static_var_types"),
+        np.array(["B", "B", "B", "B", "C"], dtype="S"),
+    )
+    assert_equals(
+        sample.get_array("static_var_upper_bounds"),
+        np.array([1.0, 1.0, 1.0, 1.0, 67.0]),
+    )
+    assert_equals(
+        sample.get_array("static_var_categories"),
+        np.array(["default", "default", "default", "default", ""], dtype="S"),
+    )
+    assert_equals(
+        sample.get_array("static_var_features"),
+        np.array(
+            [
+                [
+                    23.0,
+                    505.0,
+                    1.0,
+                    0.32899,
+                    1e20,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                ],
+                [
+                    26.0,
+                    352.0,
+                    1.0,
+                    0.229316,
+                    1e20,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                ],
+                [
+                    20.0,
+                    458.0,
+                    1.0,
+                    0.298371,
+                    1e20,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                ],
+                [
+                    18.0,
+                    220.0,
+                    1.0,
+                    0.143322,
+                    1e20,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                ],
+                [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+            ]
+        ),
+    )
+    assert_equals(
+        sample.get_array("static_constr_names"),
+        np.array(["eq_capacity"], dtype="S"),
+    )
+    assert_equals(
+        sample.get_sparse("static_constr_lhs"),
+        [[23.0, 26.0, 20.0, 18.0, -1.0]],
+    )
+    assert_equals(
+        sample.get_array("static_constr_rhs"),
+        np.array([0.0]),
+    )
+    assert_equals(
+        sample.get_array("static_constr_senses"),
+        np.array(["="], dtype="S"),
+    )
+    assert_equals(
+        sample.get_array("static_constr_features"),
+        np.array([[0.0]]),
+    )
+    assert_equals(
+        sample.get_array("static_constr_categories"),
+        np.array(["eq_capacity"], dtype="S"),
+    )
+    assert_equals(
+        sample.get_array("static_constr_lazy"),
+        np.array([False]),
+    )
+    assert_equals(
+        sample.get_array("static_instance_features"),
+        np.array([67.0, 21.75]),
+    )
+    assert_equals(sample.get_scalar("static_constr_lazy_count"), 0)
+
+    # after-lp
+    # -------------------------------------------------------
+    lp_stats = solver.solve_lp()
+    extractor.extract_after_lp_features(solver, sample, lp_stats)
+    assert_equals(
+        sample.get_array("lp_var_basis_status"),
+        np.array(["U", "B", "U", "L", "U"], dtype="S"),
+    )
+    assert_equals(
+        sample.get_array("lp_var_reduced_costs"),
+        [193.615385, 0.0, 187.230769, -23.692308, 13.538462],
+    )
+    assert_equals(
+        sample.get_array("lp_var_sa_lb_down"),
+        [-inf, -inf, -inf, -0.111111, -inf],
+    )
+    assert_equals(
+        sample.get_array("lp_var_sa_lb_up"),
+        [1.0, 0.923077, 1.0, 1.0, 67.0],
+    )
+    assert_equals(
+        sample.get_array("lp_var_sa_obj_down"),
+        [311.384615, 317.777778, 270.769231, -inf, -13.538462],
+    )
+    assert_equals(
+        sample.get_array("lp_var_sa_obj_up"),
+        [inf, 570.869565, inf, 243.692308, inf],
+    )
+    assert_equals(
+        sample.get_array("lp_var_sa_ub_down"),
+        np.array([0.913043, 0.923077, 0.9, 0.0, 43.0]),
+    )
+    assert_equals(
+        sample.get_array("lp_var_sa_ub_up"),
+        np.array([2.043478, inf, 2.2, inf, 69.0]),
+    )
+    assert_equals(
+        sample.get_array("lp_var_values"),
+        np.array([1.0, 0.923077, 1.0, 0.0, 67.0]),
+    )
+    assert_equals(
+        sample.get_array("lp_var_features"),
+        np.array(
+            [
+                [
+                    23.0,
+                    505.0,
+                    1.0,
+                    0.32899,
+                    1e20,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    1.0,
+                    1.0,
+                    5.265874,
+                    0.0,
+                    193.615385,
+                    -0.111111,
+                    1.0,
+                    311.384615,
+                    570.869565,
+                    0.913043,
+                    2.043478,
+                    1.0,
+                ],
+                [
+                    26.0,
+                    352.0,
+                    1.0,
+                    0.229316,
+                    1e20,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.076923,
+                    1.0,
+                    1.0,
+                    3.532875,
+                    0.0,
+                    0.0,
+                    -0.111111,
+                    0.923077,
+                    317.777778,
+                    570.869565,
+                    0.923077,
+                    69.0,
+                    0.923077,
+                ],
+                [
+                    20.0,
+                    458.0,
+                    1.0,
+                    0.298371,
+                    1e20,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    1.0,
+                    1.0,
+                    5.232342,
+                    0.0,
+                    187.230769,
+                    -0.111111,
+                    1.0,
+                    270.769231,
+                    570.869565,
+                    0.9,
+                    2.2,
+                    1.0,
+                ],
+                [
+                    18.0,
+                    220.0,
+                    1.0,
+                    0.143322,
+                    1e20,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    1.0,
+                    -1.0,
+                    5.265874,
+                    0.0,
+                    -23.692308,
+                    -0.111111,
+                    1.0,
+                    -13.538462,
+                    243.692308,
+                    0.0,
+                    69.0,
+                    0.0,
+                ],
+                [
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    1.0,
+                    -1.0,
+                    5.265874,
+                    0.0,
+                    13.538462,
+                    -0.111111,
+                    67.0,
+                    -13.538462,
+                    570.869565,
+                    43.0,
+                    69.0,
+                    67.0,
+                ],
+            ]
+        ),
+    )
+    assert_equals(
+        sample.get_array("lp_constr_basis_status"),
+        np.array(["N"], dtype="S"),
+    )
+    assert_equals(
+        sample.get_array("lp_constr_dual_values"),
+        np.array([13.538462]),
+    )
+    assert_equals(
+        sample.get_array("lp_constr_sa_rhs_down"),
+        np.array([-24.0]),
+    )
+    assert_equals(
+        sample.get_array("lp_constr_sa_rhs_up"),
+        np.array([2.0]),
+    )
+    assert_equals(
+        sample.get_array("lp_constr_slacks"),
+        np.array([0.0]),
+    )
+    assert_equals(
+        sample.get_array("lp_constr_features"),
+        np.array([[0.0, 13.538462, -24.0, 2.0, 0.0]]),
+    )
+
+    # after-mip
+    # -------------------------------------------------------
+    solver.solve()
+    extractor.extract_after_mip_features(solver, sample)
+    assert_equals(
+        sample.get_array("mip_var_values"), np.array([1.0, 0.0, 1.0, 1.0, 61.0])
+    )
+    assert_equals(sample.get_array("mip_constr_slacks"), np.array([0.0]))
+
+
+def test_constraint_getindex() -> None:
+    cf = Constraints(
+        names=np.array(["c1", "c2", "c3"], dtype="S"),
+        rhs=np.array([1.0, 2.0, 3.0]),
+        senses=np.array(["=", "<", ">"], dtype="S"),
+        lhs=coo_matrix(
+            [
+                [1, 2, 3],
+                [4, 5, 6],
+                [7, 8, 9],
+            ]
+        ),
+    )
+    assert_equals(
+        cf[[True, False, True]],
+        Constraints(
+            names=np.array(["c1", "c3"], dtype="S"),
+            rhs=np.array([1.0, 3.0]),
+            senses=np.array(["=", ">"], dtype="S"),
+            lhs=coo_matrix(
+                [
+                    [1, 2, 3],
+                    [7, 8, 9],
+                ]
+            ),
+        ),
+    )
+
+
+def test_assert_equals() -> None:
+    assert_equals("hello", "hello")
+    assert_equals([1.0, 2.0], [1.0, 2.0])
+    assert_equals(np.array([1.0, 2.0]), np.array([1.0, 2.0]))
+    assert_equals(
+        np.array([[1.0, 2.0], [3.0, 4.0]]),
+        np.array([[1.0, 2.0], [3.0, 4.0]]),
+    )
+    assert_equals(
+        Variables(values=np.array([1.0, 2.0])),  # type: ignore
+        Variables(values=np.array([1.0, 2.0])),  # type: ignore
+    )
+    assert_equals(np.array([True, True]), [True, True])
+    assert_equals((1.0,), (1.0,))
+    assert_equals({"x": 10}, {"x": 10})
+
+
+class MpsInstance(Instance):
+    def __init__(self, filename: str) -> None:
+        super().__init__()
+        self.filename = filename
+
+    def to_model(self) -> Any:
+        return gp.read(self.filename)
+
+
+def main() -> None:
+    solver = GurobiSolver()
+    instance = MpsInstance(sys.argv[1])
+    solver.set_instance(instance)
+    extractor = FeaturesExtractor(with_lhs=False)
+    sample = Hdf5Sample("tmp/prof.h5", mode="w")
+    extractor.extract_after_load_features(instance, solver, sample)
+    lp_stats = solver.solve_lp(tee=True)
+    extractor.extract_after_lp_features(solver, sample, lp_stats)
+
+
+if __name__ == "__main__":
+    cProfile.run("main()", filename="tmp/prof")
+    os.system("flameprof tmp/prof > tmp/prof.svg")
diff --git a/tests/features/test_sample.py b/tests/features/test_sample.py
new file mode 100644
index 0000000..92b795d
--- /dev/null
+++ b/tests/features/test_sample.py
@@ -0,0 +1,71 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+from tempfile import NamedTemporaryFile
+from typing import Any
+
+import numpy as np
+from scipy.sparse import coo_matrix
+
+from miplearn.features.sample import MemorySample, Sample, Hdf5Sample
+
+
+def test_memory_sample() -> None:
+    _test_sample(MemorySample())
+
+
+def test_hdf5_sample() -> None:
+    file = NamedTemporaryFile()
+    _test_sample(Hdf5Sample(file.name))
+
+
+def _test_sample(sample: Sample) -> None:
+    _assert_roundtrip_scalar(sample, "A")
+    _assert_roundtrip_scalar(sample, True)
+    _assert_roundtrip_scalar(sample, 1)
+    _assert_roundtrip_scalar(sample, 1.0)
+    assert sample.get_scalar("unknown-key") is None
+
+    _assert_roundtrip_array(sample, np.array([True, False]))
+    _assert_roundtrip_array(sample, np.array([1, 2, 3]))
+    _assert_roundtrip_array(sample, np.array([1.0, 2.0, 3.0]))
+    _assert_roundtrip_array(sample, np.array(["A", "BB", "CCC"], dtype="S"))
+    assert sample.get_array("unknown-key") is None
+
+    _assert_roundtrip_sparse(
+        sample,
+        coo_matrix(
+            [
+                [1.0, 0.0, 0.0],
+                [0.0, 2.0, 3.0],
+                [0.0, 0.0, 4.0],
+            ],
+        ),
+    )
+    assert sample.get_sparse("unknown-key") is None
+
+
+def _assert_roundtrip_array(sample: Sample, original: np.ndarray) -> None:
+    sample.put_array("key", original)
+    recovered = sample.get_array("key")
+    assert recovered is not None
+    assert isinstance(recovered, np.ndarray)
+    assert (recovered == original).all()
+
+
+def _assert_roundtrip_scalar(sample: Sample, original: Any) -> None:
+    sample.put_scalar("key", original)
+    recovered = sample.get_scalar("key")
+    assert recovered == original
+    assert recovered is not None
+    assert isinstance(
+        recovered, original.__class__
+    ), f"Expected {original.__class__}, found {recovered.__class__} instead"
+
+
+def _assert_roundtrip_sparse(sample: Sample, original: coo_matrix) -> None:
+    sample.put_sparse("key", original)
+    recovered = sample.get_sparse("key")
+    assert recovered is not None
+    assert isinstance(recovered, coo_matrix)
+    assert (original != recovered).sum() == 0
diff --git a/tests/instance/test_file.py b/tests/instance/test_file.py
new file mode 100644
index 0000000..bad2fc5
--- /dev/null
+++ b/tests/instance/test_file.py
@@ -0,0 +1,32 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+
+import tempfile
+
+from miplearn.solvers.learning import LearningSolver
+from miplearn.solvers.gurobi import GurobiSolver
+from miplearn.features.sample import Hdf5Sample
+from miplearn.instance.file import FileInstance
+
+
+def test_usage() -> None:
+    # Create original instance
+    original = GurobiSolver().build_test_instance_knapsack()
+
+    # Save instance to disk
+    filename = tempfile.mktemp()
+    FileInstance.save(original, filename)
+    sample = Hdf5Sample(filename)
+    assert len(sample.get_array("pickled")) > 0
+
+    # Solve instance from disk
+    solver = LearningSolver(solver=GurobiSolver())
+    solver.solve(FileInstance(filename))
+
+    # Assert HDF5 contains training data
+    sample = FileInstance(filename).get_samples()[0]
+    assert sample.get_scalar("mip_lower_bound") == 1183.0
+    assert sample.get_scalar("mip_upper_bound") == 1183.0
+    assert len(sample.get_array("lp_var_values")) == 5
+    assert len(sample.get_array("mip_var_values")) == 5
diff --git a/tests/problems/test_tsp.py b/tests/problems/test_tsp.py
index 9b26ccd..8572635 100644
--- a/tests/problems/test_tsp.py
+++ b/tests/problems/test_tsp.py
@@ -9,6 +9,7 @@ from scipy.stats import uniform, randint
 
 from miplearn.problems.tsp import TravelingSalesmanGenerator, TravelingSalesmanInstance
 from miplearn.solvers.learning import LearningSolver
+from miplearn.solvers.tests import assert_equals
 
 
 def test_generator() -> None:
@@ -41,14 +42,9 @@ def test_instance() -> None:
     solver.solve(instance)
     assert len(instance.get_samples()) == 1
     sample = instance.get_samples()[0]
-    assert sample.after_mip is not None
-    features = sample.after_mip
-    assert features is not None
-    assert features.variables is not None
-    assert features.variables.values == [1.0, 0.0, 1.0, 1.0, 0.0, 1.0]
-    assert features.mip_solve is not None
-    assert features.mip_solve.mip_lower_bound == 4.0
-    assert features.mip_solve.mip_upper_bound == 4.0
+    assert_equals(sample.get_array("mip_var_values"), [1.0, 0.0, 1.0, 1.0, 0.0, 1.0])
+    assert sample.get_scalar("mip_lower_bound") == 4.0
+    assert sample.get_scalar("mip_upper_bound") == 4.0
 
 
 def test_subtour() -> None:
@@ -67,32 +63,31 @@ def test_subtour() -> None:
     instance = TravelingSalesmanInstance(n_cities, distances)
     solver = LearningSolver()
     solver.solve(instance)
-    assert len(instance.get_samples()) == 1
-    sample = instance.get_samples()[0]
-    assert sample.after_mip is not None
-    features = sample.after_mip
-    assert features.extra is not None
-    assert "lazy_enforced" in features.extra
-    lazy_enforced = features.extra["lazy_enforced"]
+    samples = instance.get_samples()
+    assert len(samples) == 1
+    sample = samples[0]
+    lazy_enforced = sample.get_array("mip_constr_lazy_enforced")
     assert lazy_enforced is not None
     assert len(lazy_enforced) > 0
-    assert features.variables is not None
-    assert features.variables.values == [
-        1.0,
-        0.0,
-        0.0,
-        1.0,
-        0.0,
-        1.0,
-        0.0,
-        0.0,
-        0.0,
-        1.0,
-        0.0,
-        0.0,
-        0.0,
-        1.0,
-        1.0,
-    ]
+    assert_equals(
+        sample.get_array("mip_var_values"),
+        [
+            1.0,
+            0.0,
+            0.0,
+            1.0,
+            0.0,
+            1.0,
+            0.0,
+            0.0,
+            0.0,
+            1.0,
+            0.0,
+            0.0,
+            0.0,
+            1.0,
+            1.0,
+        ],
+    )
     solver.fit([instance])
     solver.solve(instance)
diff --git a/tests/solvers/test_learning_solver.py b/tests/solvers/test_learning_solver.py
index 19ad2b2..97fcf47 100644
--- a/tests/solvers/test_learning_solver.py
+++ b/tests/solvers/test_learning_solver.py
@@ -38,25 +38,22 @@ def test_learning_solver(
             assert len(instance.get_samples()) > 0
             sample = instance.get_samples()[0]
 
-            after_mip = sample.after_mip
-            assert after_mip is not None
-            assert after_mip.variables is not None
-            assert after_mip.variables.values == [1.0, 0.0, 1.0, 1.0, 61.0]
-            assert after_mip.mip_solve is not None
-            assert after_mip.mip_solve.mip_lower_bound == 1183.0
-            assert after_mip.mip_solve.mip_upper_bound == 1183.0
-            assert after_mip.mip_solve.mip_log is not None
-            assert len(after_mip.mip_solve.mip_log) > 100
-
-            after_lp = sample.after_lp
-            assert after_lp is not None
-            assert after_lp.variables is not None
-            assert_equals(after_lp.variables.values, [1.0, 0.923077, 1.0, 0.0, 67.0])
-            assert after_lp.lp_solve is not None
-            assert after_lp.lp_solve.lp_value is not None
-            assert round(after_lp.lp_solve.lp_value, 3) == 1287.923
-            assert after_lp.lp_solve.lp_log is not None
-            assert len(after_lp.lp_solve.lp_log) > 100
+            assert_equals(
+                sample.get_array("mip_var_values"), [1.0, 0.0, 1.0, 1.0, 61.0]
+            )
+            assert sample.get_scalar("mip_lower_bound") == 1183.0
+            assert sample.get_scalar("mip_upper_bound") == 1183.0
+            mip_log = sample.get_scalar("mip_log")
+            assert mip_log is not None
+            assert len(mip_log) > 100
+
+            assert_equals(
+                sample.get_array("lp_var_values"), [1.0, 0.923077, 1.0, 0.0, 67.0]
+            )
+            assert_equals(sample.get_scalar("lp_value"), 1287.923077)
+            lp_log = sample.get_scalar("lp_log")
+            assert lp_log is not None
+            assert len(lp_log) > 100
 
             solver.fit([instance], n_jobs=4)
             solver.solve(instance)
diff --git a/tests/test_features.py b/tests/test_features.py
deleted file mode 100644
index f4fb485..0000000
--- a/tests/test_features.py
+++ /dev/null
@@ -1,158 +0,0 @@
-#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
-#  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
-#  Released under the modified BSD license. See COPYING.md for more details.
-
-import numpy as np
-
-from miplearn.features import (
-    FeaturesExtractor,
-    InstanceFeatures,
-    VariableFeatures,
-    ConstraintFeatures,
-)
-from miplearn.solvers.gurobi import GurobiSolver
-from miplearn.solvers.tests import assert_equals
-
-inf = float("inf")
-
-
-def test_knapsack() -> None:
-    solver = GurobiSolver()
-    instance = solver.build_test_instance_knapsack()
-    model = instance.to_model()
-    solver.set_instance(instance, model)
-    solver.solve_lp()
-
-    features = FeaturesExtractor().extract(instance, solver)
-    assert features.variables is not None
-    assert features.instance is not None
-
-    assert_equals(
-        features.variables,
-        VariableFeatures(
-            names=["x[0]", "x[1]", "x[2]", "x[3]", "z"],
-            basis_status=["U", "B", "U", "L", "U"],
-            categories=["default", "default", "default", "default", None],
-            lower_bounds=[0.0, 0.0, 0.0, 0.0, 0.0],
-            obj_coeffs=[505.0, 352.0, 458.0, 220.0, 0.0],
-            reduced_costs=[193.615385, 0.0, 187.230769, -23.692308, 13.538462],
-            sa_lb_down=[-inf, -inf, -inf, -0.111111, -inf],
-            sa_lb_up=[1.0, 0.923077, 1.0, 1.0, 67.0],
-            sa_obj_down=[311.384615, 317.777778, 270.769231, -inf, -13.538462],
-            sa_obj_up=[inf, 570.869565, inf, 243.692308, inf],
-            sa_ub_down=[0.913043, 0.923077, 0.9, 0.0, 43.0],
-            sa_ub_up=[2.043478, inf, 2.2, inf, 69.0],
-            types=["B", "B", "B", "B", "C"],
-            upper_bounds=[1.0, 1.0, 1.0, 1.0, 67.0],
-            user_features=[
-                [23.0, 505.0],
-                [26.0, 352.0],
-                [20.0, 458.0],
-                [18.0, 220.0],
-                None,
-            ],
-            values=[1.0, 0.923077, 1.0, 0.0, 67.0],
-            alvarez_2017=[
-                [1.0, 0.32899, 0.0, 0.0, 1.0, 1.0, 5.265874, 46.051702],
-                [1.0, 0.229316, 0.0, 0.076923, 1.0, 1.0, 3.532875, 5.388476],
-                [1.0, 0.298371, 0.0, 0.0, 1.0, 1.0, 5.232342, 46.051702],
-                [1.0, 0.143322, 0.0, 0.0, 1.0, -1.0, 46.051702, 3.16515],
-                [0.0, 0.0, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0],
-            ],
-        ),
-    )
-    assert_equals(
-        features.constraints,
-        ConstraintFeatures(
-            basis_status=["N"],
-            categories=["eq_capacity"],
-            dual_values=[13.538462],
-            names=["eq_capacity"],
-            lazy=[False],
-            lhs=[
-                [
-                    ("x[0]", 23.0),
-                    ("x[1]", 26.0),
-                    ("x[2]", 20.0),
-                    ("x[3]", 18.0),
-                    ("z", -1.0),
-                ],
-            ],
-            rhs=[0.0],
-            sa_rhs_down=[-24.0],
-            sa_rhs_up=[2.0],
-            senses=["="],
-            slacks=[0.0],
-            user_features=[None],
-        ),
-    )
-    assert_equals(
-        features.instance,
-        InstanceFeatures(
-            user_features=[67.0, 21.75],
-            lazy_constraint_count=0,
-        ),
-    )
-
-
-def test_constraint_getindex() -> None:
-    cf = ConstraintFeatures(
-        names=["c1", "c2", "c3"],
-        rhs=[1.0, 2.0, 3.0],
-        senses=["=", "<", ">"],
-        lhs=[
-            [
-                ("x1", 1.0),
-                ("x2", 1.0),
-            ],
-            [
-                ("x2", 2.0),
-                ("x3", 2.0),
-            ],
-            [
-                ("x3", 3.0),
-                ("x4", 3.0),
-            ],
-        ],
-    )
-    assert_equals(
-        cf[[True, False, True]],
-        ConstraintFeatures(
-            names=["c1", "c3"],
-            rhs=[1.0, 3.0],
-            senses=["=", ">"],
-            lhs=[
-                [
-                    ("x1", 1.0),
-                    ("x2", 1.0),
-                ],
-                [
-                    ("x3", 3.0),
-                    ("x4", 3.0),
-                ],
-            ],
-        ),
-    )
-
-
-def test_assert_equals() -> None:
-    assert_equals("hello", "hello")
-    assert_equals([1.0, 2.0], [1.0, 2.0])
-    assert_equals(
-        np.array([1.0, 2.0]),
-        np.array([1.0, 2.0]),
-    )
-    assert_equals(
-        np.array([[1.0, 2.0], [3.0, 4.0]]),
-        np.array([[1.0, 2.0], [3.0, 4.0]]),
-    )
-    assert_equals(
-        VariableFeatures(values=np.array([1.0, 2.0])),  # type: ignore
-        VariableFeatures(values=np.array([1.0, 2.0])),  # type: ignore
-    )
-    assert_equals(
-        np.array([True, True]),
-        [True, True],
-    )
-    assert_equals((1.0,), (1.0,))
-    assert_equals({"x": 10}, {"x": 10})