From 895cb962b6fce6bd97e400b430d0be72d9f2d39e Mon Sep 17 00:00:00 2001 From: "Alinson S. Xavier" Date: Mon, 9 Aug 2021 15:19:53 -0500 Subject: [PATCH] Make get_variable_{categories,features} return np.ndarray --- miplearn/components/component.py | 6 +- miplearn/components/primal.py | 18 ++-- miplearn/features/extractor.py | 154 +++++++++++++++---------------- miplearn/instance/base.py | 12 +-- miplearn/instance/file.py | 8 +- miplearn/instance/picklegz.py | 8 +- miplearn/problems/knapsack.py | 12 ++- miplearn/problems/stab.py | 16 ++-- miplearn/problems/tsp.py | 4 - miplearn/solvers/pyomo/base.py | 21 +++-- miplearn/types.py | 2 +- tests/components/test_primal.py | 57 +++++++----- tests/features/test_extractor.py | 4 +- 13 files changed, 166 insertions(+), 156 deletions(-) diff --git a/miplearn/components/component.py b/miplearn/components/component.py index 3013d4e..d47be48 100644 --- a/miplearn/components/component.py +++ b/miplearn/components/component.py @@ -9,7 +9,7 @@ from p_tqdm import p_umap from miplearn.features.sample import Sample from miplearn.instance.base import Instance -from miplearn.types import LearningSolveStats +from miplearn.types import LearningSolveStats, Category if TYPE_CHECKING: from miplearn.solvers.learning import LearningSolver @@ -101,8 +101,8 @@ class Component: def fit_xy( self, - x: Dict[str, np.ndarray], - y: Dict[str, np.ndarray], + x: Dict[Category, np.ndarray], + y: Dict[Category, np.ndarray], ) -> None: """ Given two dictionaries x and y, mapping the name of the category to matrices diff --git a/miplearn/components/primal.py b/miplearn/components/primal.py index 2e40623..769cbef 100644 --- a/miplearn/components/primal.py +++ b/miplearn/components/primal.py @@ -47,8 +47,8 @@ class PrimalSolutionComponent(Component): assert isinstance(threshold, Threshold) assert mode in ["exact", "heuristic"] self.mode = mode - self.classifiers: Dict[str, Classifier] = {} - self.thresholds: Dict[str, Threshold] = {} + self.classifiers: Dict[Category, Classifier] = {} + self.thresholds: Dict[Category, Threshold] = {} self.threshold_prototype = threshold self.classifier_prototype = classifier @@ -96,7 +96,7 @@ class PrimalSolutionComponent(Component): def sample_predict(self, sample: Sample) -> Solution: var_names = sample.get_array("static_var_names") - var_categories = sample.get_vector("static_var_categories") + var_categories = sample.get_array("static_var_categories") assert var_names is not None assert var_categories is not None @@ -120,7 +120,7 @@ class PrimalSolutionComponent(Component): # Convert y_pred into solution solution: Solution = {v: None for v in var_names} - category_offset: Dict[str, int] = {cat: 0 for cat in x.keys()} + category_offset: Dict[Category, int] = {cat: 0 for cat in x.keys()} for (i, var_name) in enumerate(var_names): category = var_categories[i] if category not in category_offset: @@ -146,7 +146,7 @@ class PrimalSolutionComponent(Component): mip_var_values = sample.get_array("mip_var_values") var_features = sample.get_vector_list("lp_var_features") var_names = sample.get_array("static_var_names") - var_categories = sample.get_vector("static_var_categories") + var_categories = sample.get_array("static_var_categories") if var_features is None: var_features = sample.get_vector_list("static_var_features") assert instance_features is not None @@ -157,7 +157,7 @@ class PrimalSolutionComponent(Component): for (i, var_name) in enumerate(var_names): # Initialize categories category = var_categories[i] - if category is None: + if len(category) == 0: continue if category not in x.keys(): x[category] = [] @@ -176,7 +176,7 @@ class PrimalSolutionComponent(Component): f"Variable {var_name} has non-binary value {opt_value} in the " "optimal solution. Predicting values of non-binary " "variables is not currently supported. Please set its " - "category to None." + "category to ''." ) y[category].append([opt_value < 0.5, opt_value >= 0.5]) return x, y @@ -230,8 +230,8 @@ class PrimalSolutionComponent(Component): @overrides def fit_xy( self, - x: Dict[str, np.ndarray], - y: Dict[str, np.ndarray], + x: Dict[Category, np.ndarray], + y: Dict[Category, np.ndarray], ) -> None: for category in x.keys(): clf = self.classifier_prototype.clone() diff --git a/miplearn/features/extractor.py b/miplearn/features/extractor.py index ead7f8f..c82b950 100644 --- a/miplearn/features/extractor.py +++ b/miplearn/features/extractor.py @@ -46,20 +46,25 @@ class FeaturesExtractor: vars_features_user, var_categories = self._extract_user_features_vars( instance, sample ) - sample.put_vector("static_var_categories", var_categories) + sample.put_array("static_var_categories", var_categories) self._extract_user_features_constrs(instance, sample) self._extract_user_features_instance(instance, sample) alw17 = self._extract_var_features_AlvLouWeh2017(sample) - sample.put_vector_list( + + # Build static_var_features + assert variables.lower_bounds is not None + assert variables.obj_coeffs is not None + assert variables.upper_bounds is not None + sample.put_array( "static_var_features", - self._combine( + np.hstack( [ - alw17, vars_features_user, - sample.get_array("static_var_lower_bounds"), - sample.get_array("static_var_obj_coeffs"), - sample.get_array("static_var_upper_bounds"), - ], + alw17, + variables.lower_bounds.reshape(-1, 1), + variables.obj_coeffs.reshape(-1, 1), + variables.upper_bounds.reshape(-1, 1), + ] ), ) @@ -88,23 +93,29 @@ class FeaturesExtractor: sample.put_array("lp_constr_sa_rhs_up", constraints.sa_rhs_up) sample.put_array("lp_constr_slacks", constraints.slacks) alw17 = self._extract_var_features_AlvLouWeh2017(sample) - sample.put_vector_list( - "lp_var_features", - self._combine( - [ - alw17, - sample.get_array("lp_var_reduced_costs"), - sample.get_array("lp_var_sa_lb_down"), - sample.get_array("lp_var_sa_lb_up"), - sample.get_array("lp_var_sa_obj_down"), - sample.get_array("lp_var_sa_obj_up"), - sample.get_array("lp_var_sa_ub_down"), - sample.get_array("lp_var_sa_ub_up"), - sample.get_array("lp_var_values"), - sample.get_vector_list("static_var_features"), - ], - ), - ) + + # Build lp_var_features + lp_var_features_list = [] + for f in [ + sample.get_array("static_var_features"), + alw17, + ]: + if f is not None: + lp_var_features_list.append(f) + for f in [ + variables.reduced_costs, + variables.sa_lb_down, + variables.sa_lb_up, + variables.sa_obj_down, + variables.sa_obj_up, + variables.sa_ub_down, + variables.sa_ub_up, + variables.values, + ]: + if f is not None: + lp_var_features_list.append(f.reshape(-1, 1)) + sample.put_array("lp_var_features", np.hstack(lp_var_features_list)) + sample.put_vector_list( "lp_constr_features", self._combine( @@ -148,60 +159,49 @@ class FeaturesExtractor: self, instance: "Instance", sample: Sample, - ) -> Tuple[List, List]: + ) -> Tuple[np.ndarray, np.ndarray]: # Query variable names var_names = sample.get_array("static_var_names") assert var_names is not None - # Query variable features and categories - var_features_dict = { - v.encode(): f for (v, f) in instance.get_variable_features().items() - } - var_categories_dict = { - v.encode(): f for (v, f) in instance.get_variable_categories().items() - } - - # Assert that variables in user-provided dicts actually exist - var_names_set = set(var_names) - for keys in [var_features_dict.keys(), var_categories_dict.keys()]: - for vn in cast(KeysView, keys): - assert ( - vn in var_names_set - ), f"Variable {vn!r} not found in the problem; {var_names_set}" - - # Assemble into compact lists - user_features: List[Optional[List[float]]] = [] - categories: List[Optional[str]] = [] - for (i, var_name) in enumerate(var_names): - if var_name not in var_categories_dict: - user_features.append(None) - categories.append(None) - continue - category: str = var_categories_dict[var_name] - assert isinstance(category, str), ( - f"Variable category must be a string. " - f"Found {type(category).__name__} instead for var={var_name}." - ) - categories.append(category) - user_features_i: Optional[List[float]] = None - if var_name in var_features_dict: - user_features_i = var_features_dict[var_name] - if isinstance(user_features_i, np.ndarray): - user_features_i = user_features_i.tolist() - assert isinstance(user_features_i, list), ( - f"Variable features must be a list. " - f"Found {type(user_features_i).__name__} instead for " - f"var={var_name}." - ) - for v in user_features_i: - assert isinstance(v, numbers.Real), ( - f"Variable features must be a list of numbers. " - f"Found {type(v).__name__} instead " - f"for var={var_name}." - ) - user_features_i = list(user_features_i) - user_features.append(user_features_i) - return user_features, categories + # Query variable features + var_features = instance.get_variable_features(var_names) + assert isinstance(var_features, np.ndarray), ( + f"Variable features must be a numpy array. " + f"Found {var_features.__class__} instead." + ) + assert len(var_features.shape) == 2, ( + f"Variable features must be 2-dimensional array. " + f"Found array with shape {var_features.shape} instead." + ) + assert var_features.shape[0] == len(var_names), ( + f"Variable features must have exactly {len(var_names)} rows. " + f"Found {var_features.shape[0]} rows instead." + ) + assert var_features.dtype.kind in ["f"], ( + f"Variable features must be floating point numbers. " + f"Found dtype: {var_features.dtype} instead." + ) + + # Query variable categories + var_categories = instance.get_variable_categories(var_names) + assert isinstance(var_categories, np.ndarray), ( + f"Variable categories must be a numpy array. " + f"Found {var_categories.__class__} instead." + ) + assert len(var_categories.shape) == 1, ( + f"Variable categories must be a vector. " + f"Found array with shape {var_categories.shape} instead." + ) + assert len(var_categories) == len(var_names), ( + f"Variable categories must have exactly {len(var_names)} elements. " + f"Found {var_features.shape[0]} elements instead." + ) + assert var_categories.dtype.kind == "S", ( + f"Variable categories must be a numpy array with dtype='S'. " + f"Found {var_categories.dtype} instead." + ) + return var_features, var_categories def _extract_user_features_constrs( self, @@ -277,7 +277,7 @@ class FeaturesExtractor: # Alvarez, A. M., Louveaux, Q., & Wehenkel, L. (2017). A machine learning-based # approximation of strong branching. INFORMS Journal on Computing, 29(1), 185-195. - def _extract_var_features_AlvLouWeh2017(self, sample: Sample) -> List: + def _extract_var_features_AlvLouWeh2017(self, sample: Sample) -> np.ndarray: obj_coeffs = sample.get_array("static_var_obj_coeffs") obj_sa_down = sample.get_array("lp_var_sa_obj_down") obj_sa_up = sample.get_array("lp_var_sa_obj_up") @@ -351,7 +351,7 @@ class FeaturesExtractor: f[i] = 0.0 features.append(f) - return features + return np.array(features, dtype=float) def _combine( self, diff --git a/miplearn/instance/base.py b/miplearn/instance/base.py index 1c5ba8a..09e0397 100644 --- a/miplearn/instance/base.py +++ b/miplearn/instance/base.py @@ -63,7 +63,7 @@ class Instance(ABC): """ return np.zeros(1) - def get_variable_features(self) -> Dict[str, List[float]]: + def get_variable_features(self, names: np.ndarray) -> np.ndarray: """ Returns dictionary mapping the name of each variable to a (1-dimensional) list of numerical features describing a particular decision variable. @@ -81,11 +81,11 @@ class Instance(ABC): If features are not provided for a given variable, MIPLearn will use a default set of features. - By default, returns {}. + By default, returns [[0.0], ..., [0.0]]. """ - return {} + return np.zeros((len(names), 1)) - def get_variable_categories(self) -> Dict[str, str]: + def get_variable_categories(self, names: np.ndarray) -> np.ndarray: """ Returns a dictionary mapping the name of each variable to its category. @@ -93,9 +93,9 @@ class Instance(ABC): internal ML model to predict the values of both variables. If a variable is not listed in the dictionary, ML models will ignore the variable. - By default, returns {}. + By default, returns `names`. """ - return {} + return names def get_constraint_features(self) -> Dict[str, List[float]]: return {} diff --git a/miplearn/instance/file.py b/miplearn/instance/file.py index daf1816..d7181f2 100644 --- a/miplearn/instance/file.py +++ b/miplearn/instance/file.py @@ -36,14 +36,14 @@ class FileInstance(Instance): return self.instance.get_instance_features() @overrides - def get_variable_features(self) -> Dict[str, List[float]]: + def get_variable_features(self, names: np.ndarray) -> np.ndarray: assert self.instance is not None - return self.instance.get_variable_features() + return self.instance.get_variable_features(names) @overrides - def get_variable_categories(self) -> Dict[str, str]: + def get_variable_categories(self, names: np.ndarray) -> np.ndarray: assert self.instance is not None - return self.instance.get_variable_categories() + return self.instance.get_variable_categories(names) @overrides def get_constraint_features(self) -> Dict[str, List[float]]: diff --git a/miplearn/instance/picklegz.py b/miplearn/instance/picklegz.py index b7b6b40..94b3968 100644 --- a/miplearn/instance/picklegz.py +++ b/miplearn/instance/picklegz.py @@ -48,14 +48,14 @@ class PickleGzInstance(Instance): return self.instance.get_instance_features() @overrides - def get_variable_features(self) -> Dict[str, List[float]]: + def get_variable_features(self, names: np.ndarray) -> np.ndarray: assert self.instance is not None - return self.instance.get_variable_features() + return self.instance.get_variable_features(names) @overrides - def get_variable_categories(self) -> Dict[str, str]: + def get_variable_categories(self, names: np.ndarray) -> np.ndarray: assert self.instance is not None - return self.instance.get_variable_categories() + return self.instance.get_variable_categories(names) @overrides def get_constraint_features(self) -> Dict[str, List[float]]: diff --git a/miplearn/problems/knapsack.py b/miplearn/problems/knapsack.py index 2a922de..1dd06ef 100644 --- a/miplearn/problems/knapsack.py +++ b/miplearn/problems/knapsack.py @@ -98,11 +98,13 @@ class MultiKnapsackInstance(Instance): return np.array([float(np.mean(self.prices))] + list(self.capacities)) @overrides - def get_variable_features(self) -> Dict[str, List[float]]: - return { - f"x[{i}]": [self.prices[i] + list(self.weights[:, i])] - for i in range(self.n) - } + def get_variable_features(self, names: np.ndarray) -> np.ndarray: + features = [] + for i in range(len(self.weights)): + f = [self.prices[i]] + f.extend(self.weights[:, i]) + features.append(f) + return np.array(features) # noinspection PyPep8Naming diff --git a/miplearn/problems/stab.py b/miplearn/problems/stab.py index db5bff8..a64fb3c 100644 --- a/miplearn/problems/stab.py +++ b/miplearn/problems/stab.py @@ -66,9 +66,11 @@ class MaxWeightStableSetInstance(Instance): return model @overrides - def get_variable_features(self) -> Dict[str, List[float]]: - features = {} - for v1 in self.nodes: + def get_variable_features(self, names: np.ndarray) -> np.ndarray: + features = [] + assert len(names) == len(self.nodes) + for i, v1 in enumerate(self.nodes): + assert names[i] == f"x[{v1}]".encode() neighbor_weights = [0.0] * 15 neighbor_degrees = [100.0] * 15 for v2 in self.graph.neighbors(v1): @@ -80,12 +82,12 @@ class MaxWeightStableSetInstance(Instance): f += neighbor_weights[:5] f += neighbor_degrees[:5] f += [self.graph.degree(v1)] - features[f"x[{v1}]"] = f - return features + features.append(f) + return np.array(features) @overrides - def get_variable_categories(self) -> Dict[str, str]: - return {f"x[{v}]": "default" for v in self.nodes} + def get_variable_categories(self, names: np.ndarray) -> np.ndarray: + return np.array(["default" for _ in names], dtype="S") class MaxWeightStableSetGenerator: diff --git a/miplearn/problems/tsp.py b/miplearn/problems/tsp.py index 66cae5d..bdb053b 100644 --- a/miplearn/problems/tsp.py +++ b/miplearn/problems/tsp.py @@ -80,10 +80,6 @@ class TravelingSalesmanInstance(Instance): ) return model - @overrides - def get_variable_categories(self) -> Dict[str, str]: - return {f"x[{e}]": f"x[{e}]" for e in self.edges} - @overrides def find_violated_lazy_constraints( self, diff --git a/miplearn/solvers/pyomo/base.py b/miplearn/solvers/pyomo/base.py index acd0e37..3e306a5 100644 --- a/miplearn/solvers/pyomo/base.py +++ b/miplearn/solvers/pyomo/base.py @@ -605,6 +605,7 @@ class PyomoTestInstanceKnapsack(Instance): self.weights = weights self.prices = prices self.capacity = capacity + self.n = len(weights) @overrides def to_model(self) -> pe.ConcreteModel: @@ -631,15 +632,17 @@ class PyomoTestInstanceKnapsack(Instance): ) @overrides - def get_variable_features(self) -> Dict[str, List[float]]: - return { - f"x[{i}]": [ - self.weights[i], - self.prices[i], + def get_variable_features(self, names: np.ndarray) -> np.ndarray: + return np.vstack( + [ + [[self.weights[i], self.prices[i]] for i in range(self.n)], + [0.0, 0.0], ] - for i in range(len(self.weights)) - } + ) @overrides - def get_variable_categories(self) -> Dict[str, str]: - return {f"x[{i}]": "default" for i in range(len(self.weights))} + def get_variable_categories(self, names: np.ndarray) -> np.ndarray: + return np.array( + ["default" if n.decode().startswith("x") else "" for n in names], + dtype="S", + ) diff --git a/miplearn/types.py b/miplearn/types.py index 5d94163..74a194e 100644 --- a/miplearn/types.py +++ b/miplearn/types.py @@ -10,7 +10,7 @@ if TYPE_CHECKING: # noinspection PyUnresolvedReferences from miplearn.solvers.learning import InternalSolver -Category = str +Category = bytes IterationCallback = Callable[[], bool] LazyCallback = Callable[[Any, Any], None] SolverParams = Dict[str, Any] diff --git a/tests/components/test_primal.py b/tests/components/test_primal.py index 0f56d7b..a77cad0 100644 --- a/tests/components/test_primal.py +++ b/tests/components/test_primal.py @@ -23,21 +23,28 @@ def sample() -> Sample: sample = MemorySample( { "static_var_names": np.array(["x[0]", "x[1]", "x[2]", "x[3]"], dtype="S"), - "static_var_categories": ["default", None, "default", "default"], + "static_var_categories": np.array( + ["default", "", "default", "default"], + dtype="S", + ), "mip_var_values": np.array([0.0, 1.0, 1.0, 0.0]), - "static_instance_features": [5.0], - "static_var_features": [ - [0.0, 0.0], - None, - [1.0, 0.0], - [1.0, 1.0], - ], - "lp_var_features": [ - [0.0, 0.0, 2.0, 2.0], - None, - [1.0, 0.0, 3.0, 2.0], - [1.0, 1.0, 3.0, 3.0], - ], + "static_instance_features": np.array([5.0]), + "static_var_features": np.array( + [ + [0.0, 0.0], + [0.0, 0.0], + [1.0, 0.0], + [1.0, 1.0], + ] + ), + "lp_var_features": np.array( + [ + [0.0, 0.0, 2.0, 2.0], + [0.0, 0.0, 0.0, 0.0], + [1.0, 0.0, 3.0, 2.0], + [1.0, 1.0, 3.0, 3.0], + ] + ), }, ) return sample @@ -45,14 +52,14 @@ def sample() -> Sample: def test_xy(sample: Sample) -> None: x_expected = { - "default": [ + b"default": [ [5.0, 0.0, 0.0, 2.0, 2.0], [5.0, 1.0, 0.0, 3.0, 2.0], [5.0, 1.0, 1.0, 3.0, 3.0], ] } y_expected = { - "default": [ + b"default": [ [True, False], [False, True], [True, False], @@ -72,15 +79,15 @@ def test_fit_xy() -> None: thr.clone = lambda: Mock(spec=Threshold) comp = PrimalSolutionComponent(classifier=clf, threshold=thr) x = { - "type-a": np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]), - "type-b": np.array([[7.0, 8.0, 9.0]]), + b"type-a": np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]), + b"type-b": np.array([[7.0, 8.0, 9.0]]), } y = { - "type-a": np.array([[True, False], [False, True]]), - "type-b": np.array([[True, False]]), + b"type-a": np.array([[True, False], [False, True]]), + b"type-b": np.array([[True, False]]), } comp.fit_xy(x, y) - for category in ["type-a", "type-b"]: + for category in [b"type-a", b"type-b"]: assert category in comp.classifiers assert category in comp.thresholds clf = comp.classifiers[category] # type: ignore @@ -142,13 +149,13 @@ def test_predict(sample: Sample) -> None: thr.predict = Mock(return_value=[0.75, 0.75]) comp = PrimalSolutionComponent() x, _ = comp.sample_xy(None, sample) - comp.classifiers = {"default": clf} - comp.thresholds = {"default": thr} + comp.classifiers = {b"default": clf} + comp.thresholds = {b"default": thr} pred = comp.sample_predict(sample) clf.predict_proba.assert_called_once() thr.predict.assert_called_once() - assert_array_equal(x["default"], clf.predict_proba.call_args[0][0]) - assert_array_equal(x["default"], thr.predict.call_args[0][0]) + assert_array_equal(x[b"default"], clf.predict_proba.call_args[0][0]) + assert_array_equal(x[b"default"], thr.predict.call_args[0][0]) assert pred == { b"x[0]": 0.0, b"x[1]": None, diff --git a/tests/features/test_extractor.py b/tests/features/test_extractor.py index 6e60f75..9d0da22 100644 --- a/tests/features/test_extractor.py +++ b/tests/features/test_extractor.py @@ -49,8 +49,8 @@ def test_knapsack() -> None: sample.get_vector("static_var_upper_bounds"), [1.0, 1.0, 1.0, 1.0, 67.0] ) assert_equals( - sample.get_vector("static_var_categories"), - ["default", "default", "default", "default", None], + sample.get_array("static_var_categories"), + np.array(["default", "default", "default", "default", ""], dtype="S"), ) assert sample.get_vector_list("static_var_features") is not None assert_equals(