From 895cb962b6fce6bd97e400b430d0be72d9f2d39e Mon Sep 17 00:00:00 2001
From: "Alinson S. Xavier" <git@axavier.org>
Date: Mon, 9 Aug 2021 15:19:53 -0500
Subject: [PATCH] Make get_variable_{categories,features} return np.ndarray

---
 miplearn/components/component.py |   6 +-
 miplearn/components/primal.py    |  18 ++--
 miplearn/features/extractor.py   | 154 +++++++++++++++----------------
 miplearn/instance/base.py        |  12 +--
 miplearn/instance/file.py        |   8 +-
 miplearn/instance/picklegz.py    |   8 +-
 miplearn/problems/knapsack.py    |  12 ++-
 miplearn/problems/stab.py        |  16 ++--
 miplearn/problems/tsp.py         |   4 -
 miplearn/solvers/pyomo/base.py   |  21 +++--
 miplearn/types.py                |   2 +-
 tests/components/test_primal.py  |  57 +++++++-----
 tests/features/test_extractor.py |   4 +-
 13 files changed, 166 insertions(+), 156 deletions(-)

diff --git a/miplearn/components/component.py b/miplearn/components/component.py
index 3013d4e..d47be48 100644
--- a/miplearn/components/component.py
+++ b/miplearn/components/component.py
@@ -9,7 +9,7 @@ from p_tqdm import p_umap
 
 from miplearn.features.sample import Sample
 from miplearn.instance.base import Instance
-from miplearn.types import LearningSolveStats
+from miplearn.types import LearningSolveStats, Category
 
 if TYPE_CHECKING:
     from miplearn.solvers.learning import LearningSolver
@@ -101,8 +101,8 @@ class Component:
 
     def fit_xy(
         self,
-        x: Dict[str, np.ndarray],
-        y: Dict[str, np.ndarray],
+        x: Dict[Category, np.ndarray],
+        y: Dict[Category, np.ndarray],
     ) -> None:
         """
         Given two dictionaries x and y, mapping the name of the category to matrices
diff --git a/miplearn/components/primal.py b/miplearn/components/primal.py
index 2e40623..769cbef 100644
--- a/miplearn/components/primal.py
+++ b/miplearn/components/primal.py
@@ -47,8 +47,8 @@ class PrimalSolutionComponent(Component):
         assert isinstance(threshold, Threshold)
         assert mode in ["exact", "heuristic"]
         self.mode = mode
-        self.classifiers: Dict[str, Classifier] = {}
-        self.thresholds: Dict[str, Threshold] = {}
+        self.classifiers: Dict[Category, Classifier] = {}
+        self.thresholds: Dict[Category, Threshold] = {}
         self.threshold_prototype = threshold
         self.classifier_prototype = classifier
 
@@ -96,7 +96,7 @@ class PrimalSolutionComponent(Component):
 
     def sample_predict(self, sample: Sample) -> Solution:
         var_names = sample.get_array("static_var_names")
-        var_categories = sample.get_vector("static_var_categories")
+        var_categories = sample.get_array("static_var_categories")
         assert var_names is not None
         assert var_categories is not None
 
@@ -120,7 +120,7 @@ class PrimalSolutionComponent(Component):
 
         # Convert y_pred into solution
         solution: Solution = {v: None for v in var_names}
-        category_offset: Dict[str, int] = {cat: 0 for cat in x.keys()}
+        category_offset: Dict[Category, int] = {cat: 0 for cat in x.keys()}
         for (i, var_name) in enumerate(var_names):
             category = var_categories[i]
             if category not in category_offset:
@@ -146,7 +146,7 @@ class PrimalSolutionComponent(Component):
         mip_var_values = sample.get_array("mip_var_values")
         var_features = sample.get_vector_list("lp_var_features")
         var_names = sample.get_array("static_var_names")
-        var_categories = sample.get_vector("static_var_categories")
+        var_categories = sample.get_array("static_var_categories")
         if var_features is None:
             var_features = sample.get_vector_list("static_var_features")
         assert instance_features is not None
@@ -157,7 +157,7 @@ class PrimalSolutionComponent(Component):
         for (i, var_name) in enumerate(var_names):
             # Initialize categories
             category = var_categories[i]
-            if category is None:
+            if len(category) == 0:
                 continue
             if category not in x.keys():
                 x[category] = []
@@ -176,7 +176,7 @@ class PrimalSolutionComponent(Component):
                     f"Variable {var_name} has non-binary value {opt_value} in the "
                     "optimal solution. Predicting values of non-binary "
                     "variables is not currently supported. Please set its "
-                    "category to None."
+                    "category to ''."
                 )
                 y[category].append([opt_value < 0.5, opt_value >= 0.5])
         return x, y
@@ -230,8 +230,8 @@ class PrimalSolutionComponent(Component):
     @overrides
     def fit_xy(
         self,
-        x: Dict[str, np.ndarray],
-        y: Dict[str, np.ndarray],
+        x: Dict[Category, np.ndarray],
+        y: Dict[Category, np.ndarray],
     ) -> None:
         for category in x.keys():
             clf = self.classifier_prototype.clone()
diff --git a/miplearn/features/extractor.py b/miplearn/features/extractor.py
index ead7f8f..c82b950 100644
--- a/miplearn/features/extractor.py
+++ b/miplearn/features/extractor.py
@@ -46,20 +46,25 @@ class FeaturesExtractor:
         vars_features_user, var_categories = self._extract_user_features_vars(
             instance, sample
         )
-        sample.put_vector("static_var_categories", var_categories)
+        sample.put_array("static_var_categories", var_categories)
         self._extract_user_features_constrs(instance, sample)
         self._extract_user_features_instance(instance, sample)
         alw17 = self._extract_var_features_AlvLouWeh2017(sample)
-        sample.put_vector_list(
+
+        # Build static_var_features
+        assert variables.lower_bounds is not None
+        assert variables.obj_coeffs is not None
+        assert variables.upper_bounds is not None
+        sample.put_array(
             "static_var_features",
-            self._combine(
+            np.hstack(
                 [
-                    alw17,
                     vars_features_user,
-                    sample.get_array("static_var_lower_bounds"),
-                    sample.get_array("static_var_obj_coeffs"),
-                    sample.get_array("static_var_upper_bounds"),
-                ],
+                    alw17,
+                    variables.lower_bounds.reshape(-1, 1),
+                    variables.obj_coeffs.reshape(-1, 1),
+                    variables.upper_bounds.reshape(-1, 1),
+                ]
             ),
         )
 
@@ -88,23 +93,29 @@ class FeaturesExtractor:
         sample.put_array("lp_constr_sa_rhs_up", constraints.sa_rhs_up)
         sample.put_array("lp_constr_slacks", constraints.slacks)
         alw17 = self._extract_var_features_AlvLouWeh2017(sample)
-        sample.put_vector_list(
-            "lp_var_features",
-            self._combine(
-                [
-                    alw17,
-                    sample.get_array("lp_var_reduced_costs"),
-                    sample.get_array("lp_var_sa_lb_down"),
-                    sample.get_array("lp_var_sa_lb_up"),
-                    sample.get_array("lp_var_sa_obj_down"),
-                    sample.get_array("lp_var_sa_obj_up"),
-                    sample.get_array("lp_var_sa_ub_down"),
-                    sample.get_array("lp_var_sa_ub_up"),
-                    sample.get_array("lp_var_values"),
-                    sample.get_vector_list("static_var_features"),
-                ],
-            ),
-        )
+
+        # Build lp_var_features
+        lp_var_features_list = []
+        for f in [
+            sample.get_array("static_var_features"),
+            alw17,
+        ]:
+            if f is not None:
+                lp_var_features_list.append(f)
+        for f in [
+            variables.reduced_costs,
+            variables.sa_lb_down,
+            variables.sa_lb_up,
+            variables.sa_obj_down,
+            variables.sa_obj_up,
+            variables.sa_ub_down,
+            variables.sa_ub_up,
+            variables.values,
+        ]:
+            if f is not None:
+                lp_var_features_list.append(f.reshape(-1, 1))
+        sample.put_array("lp_var_features", np.hstack(lp_var_features_list))
+
         sample.put_vector_list(
             "lp_constr_features",
             self._combine(
@@ -148,60 +159,49 @@ class FeaturesExtractor:
         self,
         instance: "Instance",
         sample: Sample,
-    ) -> Tuple[List, List]:
+    ) -> Tuple[np.ndarray, np.ndarray]:
         # Query variable names
         var_names = sample.get_array("static_var_names")
         assert var_names is not None
 
-        # Query variable features and categories
-        var_features_dict = {
-            v.encode(): f for (v, f) in instance.get_variable_features().items()
-        }
-        var_categories_dict = {
-            v.encode(): f for (v, f) in instance.get_variable_categories().items()
-        }
-
-        # Assert that variables in user-provided dicts actually exist
-        var_names_set = set(var_names)
-        for keys in [var_features_dict.keys(), var_categories_dict.keys()]:
-            for vn in cast(KeysView, keys):
-                assert (
-                    vn in var_names_set
-                ), f"Variable {vn!r} not found in the problem; {var_names_set}"
-
-        # Assemble into compact lists
-        user_features: List[Optional[List[float]]] = []
-        categories: List[Optional[str]] = []
-        for (i, var_name) in enumerate(var_names):
-            if var_name not in var_categories_dict:
-                user_features.append(None)
-                categories.append(None)
-                continue
-            category: str = var_categories_dict[var_name]
-            assert isinstance(category, str), (
-                f"Variable category must be a string. "
-                f"Found {type(category).__name__} instead for var={var_name}."
-            )
-            categories.append(category)
-            user_features_i: Optional[List[float]] = None
-            if var_name in var_features_dict:
-                user_features_i = var_features_dict[var_name]
-                if isinstance(user_features_i, np.ndarray):
-                    user_features_i = user_features_i.tolist()
-                assert isinstance(user_features_i, list), (
-                    f"Variable features must be a list. "
-                    f"Found {type(user_features_i).__name__} instead for "
-                    f"var={var_name}."
-                )
-                for v in user_features_i:
-                    assert isinstance(v, numbers.Real), (
-                        f"Variable features must be a list of numbers. "
-                        f"Found {type(v).__name__} instead "
-                        f"for var={var_name}."
-                    )
-                user_features_i = list(user_features_i)
-            user_features.append(user_features_i)
-        return user_features, categories
+        # Query variable features
+        var_features = instance.get_variable_features(var_names)
+        assert isinstance(var_features, np.ndarray), (
+            f"Variable features must be a numpy array. "
+            f"Found {var_features.__class__} instead."
+        )
+        assert len(var_features.shape) == 2, (
+            f"Variable features must be 2-dimensional array. "
+            f"Found array with shape {var_features.shape} instead."
+        )
+        assert var_features.shape[0] == len(var_names), (
+            f"Variable features must have exactly {len(var_names)} rows. "
+            f"Found {var_features.shape[0]} rows instead."
+        )
+        assert var_features.dtype.kind in ["f"], (
+            f"Variable features must be floating point numbers. "
+            f"Found dtype: {var_features.dtype} instead."
+        )
+
+        # Query variable categories
+        var_categories = instance.get_variable_categories(var_names)
+        assert isinstance(var_categories, np.ndarray), (
+            f"Variable categories must be a numpy array. "
+            f"Found {var_categories.__class__} instead."
+        )
+        assert len(var_categories.shape) == 1, (
+            f"Variable categories must be a vector. "
+            f"Found array with shape {var_categories.shape} instead."
+        )
+        assert len(var_categories) == len(var_names), (
+            f"Variable categories must have exactly {len(var_names)} elements. "
+            f"Found {var_features.shape[0]} elements instead."
+        )
+        assert var_categories.dtype.kind == "S", (
+            f"Variable categories must be a numpy array with dtype='S'. "
+            f"Found {var_categories.dtype} instead."
+        )
+        return var_features, var_categories
 
     def _extract_user_features_constrs(
         self,
@@ -277,7 +277,7 @@ class FeaturesExtractor:
 
     # Alvarez, A. M., Louveaux, Q., & Wehenkel, L. (2017). A machine learning-based
     # approximation of strong branching. INFORMS Journal on Computing, 29(1), 185-195.
-    def _extract_var_features_AlvLouWeh2017(self, sample: Sample) -> List:
+    def _extract_var_features_AlvLouWeh2017(self, sample: Sample) -> np.ndarray:
         obj_coeffs = sample.get_array("static_var_obj_coeffs")
         obj_sa_down = sample.get_array("lp_var_sa_obj_down")
         obj_sa_up = sample.get_array("lp_var_sa_obj_up")
@@ -351,7 +351,7 @@ class FeaturesExtractor:
                     f[i] = 0.0
 
             features.append(f)
-        return features
+        return np.array(features, dtype=float)
 
     def _combine(
         self,
diff --git a/miplearn/instance/base.py b/miplearn/instance/base.py
index 1c5ba8a..09e0397 100644
--- a/miplearn/instance/base.py
+++ b/miplearn/instance/base.py
@@ -63,7 +63,7 @@ class Instance(ABC):
         """
         return np.zeros(1)
 
-    def get_variable_features(self) -> Dict[str, List[float]]:
+    def get_variable_features(self, names: np.ndarray) -> np.ndarray:
         """
         Returns dictionary mapping the name of each variable to a (1-dimensional) list
         of numerical features describing a particular decision variable.
@@ -81,11 +81,11 @@ class Instance(ABC):
         If features are not provided for a given variable, MIPLearn will use a
         default set of features.
 
-        By default, returns {}.
+        By default, returns [[0.0], ..., [0.0]].
         """
-        return {}
+        return np.zeros((len(names), 1))
 
-    def get_variable_categories(self) -> Dict[str, str]:
+    def get_variable_categories(self, names: np.ndarray) -> np.ndarray:
         """
         Returns a dictionary mapping the name of each variable to its category.
 
@@ -93,9 +93,9 @@ class Instance(ABC):
         internal ML model to predict the values of both variables. If a variable is not
         listed in the dictionary, ML models will ignore the variable.
 
-        By default, returns {}.
+        By default, returns `names`.
         """
-        return {}
+        return names
 
     def get_constraint_features(self) -> Dict[str, List[float]]:
         return {}
diff --git a/miplearn/instance/file.py b/miplearn/instance/file.py
index daf1816..d7181f2 100644
--- a/miplearn/instance/file.py
+++ b/miplearn/instance/file.py
@@ -36,14 +36,14 @@ class FileInstance(Instance):
         return self.instance.get_instance_features()
 
     @overrides
-    def get_variable_features(self) -> Dict[str, List[float]]:
+    def get_variable_features(self, names: np.ndarray) -> np.ndarray:
         assert self.instance is not None
-        return self.instance.get_variable_features()
+        return self.instance.get_variable_features(names)
 
     @overrides
-    def get_variable_categories(self) -> Dict[str, str]:
+    def get_variable_categories(self, names: np.ndarray) -> np.ndarray:
         assert self.instance is not None
-        return self.instance.get_variable_categories()
+        return self.instance.get_variable_categories(names)
 
     @overrides
     def get_constraint_features(self) -> Dict[str, List[float]]:
diff --git a/miplearn/instance/picklegz.py b/miplearn/instance/picklegz.py
index b7b6b40..94b3968 100644
--- a/miplearn/instance/picklegz.py
+++ b/miplearn/instance/picklegz.py
@@ -48,14 +48,14 @@ class PickleGzInstance(Instance):
         return self.instance.get_instance_features()
 
     @overrides
-    def get_variable_features(self) -> Dict[str, List[float]]:
+    def get_variable_features(self, names: np.ndarray) -> np.ndarray:
         assert self.instance is not None
-        return self.instance.get_variable_features()
+        return self.instance.get_variable_features(names)
 
     @overrides
-    def get_variable_categories(self) -> Dict[str, str]:
+    def get_variable_categories(self, names: np.ndarray) -> np.ndarray:
         assert self.instance is not None
-        return self.instance.get_variable_categories()
+        return self.instance.get_variable_categories(names)
 
     @overrides
     def get_constraint_features(self) -> Dict[str, List[float]]:
diff --git a/miplearn/problems/knapsack.py b/miplearn/problems/knapsack.py
index 2a922de..1dd06ef 100644
--- a/miplearn/problems/knapsack.py
+++ b/miplearn/problems/knapsack.py
@@ -98,11 +98,13 @@ class MultiKnapsackInstance(Instance):
         return np.array([float(np.mean(self.prices))] + list(self.capacities))
 
     @overrides
-    def get_variable_features(self) -> Dict[str, List[float]]:
-        return {
-            f"x[{i}]": [self.prices[i] + list(self.weights[:, i])]
-            for i in range(self.n)
-        }
+    def get_variable_features(self, names: np.ndarray) -> np.ndarray:
+        features = []
+        for i in range(len(self.weights)):
+            f = [self.prices[i]]
+            f.extend(self.weights[:, i])
+            features.append(f)
+        return np.array(features)
 
 
 # noinspection PyPep8Naming
diff --git a/miplearn/problems/stab.py b/miplearn/problems/stab.py
index db5bff8..a64fb3c 100644
--- a/miplearn/problems/stab.py
+++ b/miplearn/problems/stab.py
@@ -66,9 +66,11 @@ class MaxWeightStableSetInstance(Instance):
         return model
 
     @overrides
-    def get_variable_features(self) -> Dict[str, List[float]]:
-        features = {}
-        for v1 in self.nodes:
+    def get_variable_features(self, names: np.ndarray) -> np.ndarray:
+        features = []
+        assert len(names) == len(self.nodes)
+        for i, v1 in enumerate(self.nodes):
+            assert names[i] == f"x[{v1}]".encode()
             neighbor_weights = [0.0] * 15
             neighbor_degrees = [100.0] * 15
             for v2 in self.graph.neighbors(v1):
@@ -80,12 +82,12 @@ class MaxWeightStableSetInstance(Instance):
             f += neighbor_weights[:5]
             f += neighbor_degrees[:5]
             f += [self.graph.degree(v1)]
-            features[f"x[{v1}]"] = f
-        return features
+            features.append(f)
+        return np.array(features)
 
     @overrides
-    def get_variable_categories(self) -> Dict[str, str]:
-        return {f"x[{v}]": "default" for v in self.nodes}
+    def get_variable_categories(self, names: np.ndarray) -> np.ndarray:
+        return np.array(["default" for _ in names], dtype="S")
 
 
 class MaxWeightStableSetGenerator:
diff --git a/miplearn/problems/tsp.py b/miplearn/problems/tsp.py
index 66cae5d..bdb053b 100644
--- a/miplearn/problems/tsp.py
+++ b/miplearn/problems/tsp.py
@@ -80,10 +80,6 @@ class TravelingSalesmanInstance(Instance):
             )
         return model
 
-    @overrides
-    def get_variable_categories(self) -> Dict[str, str]:
-        return {f"x[{e}]": f"x[{e}]" for e in self.edges}
-
     @overrides
     def find_violated_lazy_constraints(
         self,
diff --git a/miplearn/solvers/pyomo/base.py b/miplearn/solvers/pyomo/base.py
index acd0e37..3e306a5 100644
--- a/miplearn/solvers/pyomo/base.py
+++ b/miplearn/solvers/pyomo/base.py
@@ -605,6 +605,7 @@ class PyomoTestInstanceKnapsack(Instance):
         self.weights = weights
         self.prices = prices
         self.capacity = capacity
+        self.n = len(weights)
 
     @overrides
     def to_model(self) -> pe.ConcreteModel:
@@ -631,15 +632,17 @@ class PyomoTestInstanceKnapsack(Instance):
         )
 
     @overrides
-    def get_variable_features(self) -> Dict[str, List[float]]:
-        return {
-            f"x[{i}]": [
-                self.weights[i],
-                self.prices[i],
+    def get_variable_features(self, names: np.ndarray) -> np.ndarray:
+        return np.vstack(
+            [
+                [[self.weights[i], self.prices[i]] for i in range(self.n)],
+                [0.0, 0.0],
             ]
-            for i in range(len(self.weights))
-        }
+        )
 
     @overrides
-    def get_variable_categories(self) -> Dict[str, str]:
-        return {f"x[{i}]": "default" for i in range(len(self.weights))}
+    def get_variable_categories(self, names: np.ndarray) -> np.ndarray:
+        return np.array(
+            ["default" if n.decode().startswith("x") else "" for n in names],
+            dtype="S",
+        )
diff --git a/miplearn/types.py b/miplearn/types.py
index 5d94163..74a194e 100644
--- a/miplearn/types.py
+++ b/miplearn/types.py
@@ -10,7 +10,7 @@ if TYPE_CHECKING:
     # noinspection PyUnresolvedReferences
     from miplearn.solvers.learning import InternalSolver
 
-Category = str
+Category = bytes
 IterationCallback = Callable[[], bool]
 LazyCallback = Callable[[Any, Any], None]
 SolverParams = Dict[str, Any]
diff --git a/tests/components/test_primal.py b/tests/components/test_primal.py
index 0f56d7b..a77cad0 100644
--- a/tests/components/test_primal.py
+++ b/tests/components/test_primal.py
@@ -23,21 +23,28 @@ def sample() -> Sample:
     sample = MemorySample(
         {
             "static_var_names": np.array(["x[0]", "x[1]", "x[2]", "x[3]"], dtype="S"),
-            "static_var_categories": ["default", None, "default", "default"],
+            "static_var_categories": np.array(
+                ["default", "", "default", "default"],
+                dtype="S",
+            ),
             "mip_var_values": np.array([0.0, 1.0, 1.0, 0.0]),
-            "static_instance_features": [5.0],
-            "static_var_features": [
-                [0.0, 0.0],
-                None,
-                [1.0, 0.0],
-                [1.0, 1.0],
-            ],
-            "lp_var_features": [
-                [0.0, 0.0, 2.0, 2.0],
-                None,
-                [1.0, 0.0, 3.0, 2.0],
-                [1.0, 1.0, 3.0, 3.0],
-            ],
+            "static_instance_features": np.array([5.0]),
+            "static_var_features": np.array(
+                [
+                    [0.0, 0.0],
+                    [0.0, 0.0],
+                    [1.0, 0.0],
+                    [1.0, 1.0],
+                ]
+            ),
+            "lp_var_features": np.array(
+                [
+                    [0.0, 0.0, 2.0, 2.0],
+                    [0.0, 0.0, 0.0, 0.0],
+                    [1.0, 0.0, 3.0, 2.0],
+                    [1.0, 1.0, 3.0, 3.0],
+                ]
+            ),
         },
     )
     return sample
@@ -45,14 +52,14 @@ def sample() -> Sample:
 
 def test_xy(sample: Sample) -> None:
     x_expected = {
-        "default": [
+        b"default": [
             [5.0, 0.0, 0.0, 2.0, 2.0],
             [5.0, 1.0, 0.0, 3.0, 2.0],
             [5.0, 1.0, 1.0, 3.0, 3.0],
         ]
     }
     y_expected = {
-        "default": [
+        b"default": [
             [True, False],
             [False, True],
             [True, False],
@@ -72,15 +79,15 @@ def test_fit_xy() -> None:
     thr.clone = lambda: Mock(spec=Threshold)
     comp = PrimalSolutionComponent(classifier=clf, threshold=thr)
     x = {
-        "type-a": np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]),
-        "type-b": np.array([[7.0, 8.0, 9.0]]),
+        b"type-a": np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]),
+        b"type-b": np.array([[7.0, 8.0, 9.0]]),
     }
     y = {
-        "type-a": np.array([[True, False], [False, True]]),
-        "type-b": np.array([[True, False]]),
+        b"type-a": np.array([[True, False], [False, True]]),
+        b"type-b": np.array([[True, False]]),
     }
     comp.fit_xy(x, y)
-    for category in ["type-a", "type-b"]:
+    for category in [b"type-a", b"type-b"]:
         assert category in comp.classifiers
         assert category in comp.thresholds
         clf = comp.classifiers[category]  # type: ignore
@@ -142,13 +149,13 @@ def test_predict(sample: Sample) -> None:
     thr.predict = Mock(return_value=[0.75, 0.75])
     comp = PrimalSolutionComponent()
     x, _ = comp.sample_xy(None, sample)
-    comp.classifiers = {"default": clf}
-    comp.thresholds = {"default": thr}
+    comp.classifiers = {b"default": clf}
+    comp.thresholds = {b"default": thr}
     pred = comp.sample_predict(sample)
     clf.predict_proba.assert_called_once()
     thr.predict.assert_called_once()
-    assert_array_equal(x["default"], clf.predict_proba.call_args[0][0])
-    assert_array_equal(x["default"], thr.predict.call_args[0][0])
+    assert_array_equal(x[b"default"], clf.predict_proba.call_args[0][0])
+    assert_array_equal(x[b"default"], thr.predict.call_args[0][0])
     assert pred == {
         b"x[0]": 0.0,
         b"x[1]": None,
diff --git a/tests/features/test_extractor.py b/tests/features/test_extractor.py
index 6e60f75..9d0da22 100644
--- a/tests/features/test_extractor.py
+++ b/tests/features/test_extractor.py
@@ -49,8 +49,8 @@ def test_knapsack() -> None:
         sample.get_vector("static_var_upper_bounds"), [1.0, 1.0, 1.0, 1.0, 67.0]
     )
     assert_equals(
-        sample.get_vector("static_var_categories"),
-        ["default", "default", "default", "default", None],
+        sample.get_array("static_var_categories"),
+        np.array(["default", "default", "default", "default", ""], dtype="S"),
     )
     assert sample.get_vector_list("static_var_features") is not None
     assert_equals(