Request variable features/categories in bulk

2025-12-06 01:18:52 -06:00 · 2021-06-29 09:02:46 -05:00
parent 6969f2ffd2
commit 438859e493
9 changed files with 100 additions and 138 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
 # MIPLearn: Changelog

-## [Unreleased]
+## [0.2.0] - [Unreleased]

 ### Added

@@ -20,14 +20,7 @@

 ### Changed

- Variables are now referenced by their names, instead of tuples `(var_name, index)`. This change was required to improve the compatibility with modeling languages other than Pyomo, which do not follow this convention. The functions `get_variable_category` and `get_variable_features` now have the following signature:
-  ````python
-   def get_variable_features(self, var_name: str) -> List[float]:
-      pass
-  
-   def get_variable_category(self, var_name: str) -> Optional[Hashable]:
-      pass  
-   ````
+- Variables are now referenced by their names, instead of tuples `(var_name, index)`. This change was required to improve the compatibility with modeling languages other than Pyomo, which do not follow this convention. For performance reasons, the functions `get_variable_features` and `get_variable_categories` should now return a dictionary containing categories and features for all relevant variables. Previously, MIPLearn had to perform two function calls per variable, which was too slow for very large models.
 - Internal solvers must now be specified as objects, instead of strings. For example,
  ```python
  solver = LearningSolver(
--- a/miplearn/features.py
+++ b/miplearn/features.py
@@ -189,17 +189,25 @@ class FeaturesExtractor:
    ) -> None:
        assert features.variables is not None
        assert features.variables.names is not None
-        categories: List[Hashable] = []
+        categories: List[Optional[Hashable]] = []
        user_features: List[Optional[List[float]]] = []
+        var_features_dict = instance.get_variable_features()
+        var_categories_dict = instance.get_variable_categories()
+
        for (i, var_name) in enumerate(features.variables.names):
-            category: Hashable = instance.get_variable_category(var_name)
+            if var_name not in var_categories_dict:
+                user_features.append(None)
+                categories.append(None)
+                continue
+            category: Hashable = var_categories_dict[var_name]
+            assert isinstance(category, collections.Hashable), (
+                f"Variable category must be be hashable. "
+                f"Found {type(category).__name__} instead for var={var_name}."
+            )
+            categories.append(category)
            user_features_i: Optional[List[float]] = None
-            if category is not None:
-                assert isinstance(category, collections.Hashable), (
-                    f"Variable category must be be hashable. "
-                    f"Found {type(category).__name__} instead for var={var_name}."
-                )
-                user_features_i = instance.get_variable_features(var_name)
+            if var_name in var_features_dict:
+                user_features_i = var_features_dict[var_name]
                if isinstance(user_features_i, np.ndarray):
                    user_features_i = user_features_i.tolist()
                assert isinstance(user_features_i, list), (
@@ -213,11 +221,8 @@ class FeaturesExtractor:
                        f"Found {type(v).__name__} instead "
                        f"for var={var_name}."
                    )
-            categories.append(category)
-            if user_features_i is None:
-                user_features.append(None)
-            else:
-                user_features.append(list(user_features_i))
+                user_features_i = list(user_features_i)
+            user_features.append(user_features_i)
        features.variables.categories = categories
        features.variables.user_features = user_features

--- a/miplearn/instance/base.py
+++ b/miplearn/instance/base.py
@@ -4,10 +4,11 @@

 import logging
 from abc import ABC, abstractmethod
-from typing import Any, List, Optional, Hashable, TYPE_CHECKING
+from typing import Any, List, Optional, Hashable, TYPE_CHECKING, Dict
+
+from overrides import EnforceOverrides

 from miplearn.features import Sample
-from miplearn.types import VariableName, Category

 logger = logging.getLogger(__name__)

@@ -16,7 +17,7 @@ if TYPE_CHECKING:


 # noinspection PyMethodMayBeStatic
-class Instance(ABC):
+class Instance(ABC, EnforceOverrides):
    """
    Abstract class holding all the data necessary to generate a concrete model of the
    proble.
@@ -62,10 +63,10 @@ class Instance(ABC):
        """
        return [0.0]

-    def get_variable_features(self, var_name: VariableName) -> List[float]:
+    def get_variable_features(self) -> Dict[str, List[float]]:
        """
-        Returns a (1-dimensional) list of numerical features describing a particular
-        decision variable.
+        Returns dictionary mapping the name of each variable to a (1-dimensional) list
+        of numerical features describing a particular decision variable.

        In combination with instance features, variable features are used by
        LearningSolver to predict, among other things, the optimal value of each
@@ -77,22 +78,25 @@ class Instance(ABC):
        length for all variables within the same category, for all relevant instances
        of the problem.

-        By default, returns [0.0].
-        """
-        return [0.0]
+        If features are not provided for a given variable, MIPLearn will use a
+        default set of features.

-    def get_variable_category(self, var_name: VariableName) -> Optional[Category]:
+        By default, returns {}.
        """
-        Returns the category for each decision variable.
+        return {}
+
+    def get_variable_categories(self) -> Dict[str, Hashable]:
+        """
+        Returns a dictionary mapping the name of each variable to its category.

        If two variables have the same category, LearningSolver will use the same
-        internal ML model to predict the values of both variables. If the returned
-        category is None, ML models will ignore the variable.
+        internal ML model to predict the values of both variables. If a variable is not
+        listed in the dictionary, ML models will ignore the variable.

        A category can be any hashable type, such as strings, numbers or tuples.
-        By default, returns "default".
+        By default, returns {}.
        """
-        return "default"
+        return {}

    def get_constraint_features(self, cid: str) -> List[float]:
        return [0.0]
--- a/miplearn/instance/picklegz.py
+++ b/miplearn/instance/picklegz.py
@@ -6,12 +6,11 @@ import gc
 import gzip
 import os
 import pickle
-from typing import Optional, Any, List, Hashable, cast, IO, TYPE_CHECKING
+from typing import Optional, Any, List, Hashable, cast, IO, TYPE_CHECKING, Dict

 from overrides import overrides

-from miplearn.instance.base import logger, Instance
-from miplearn.types import VariableName, Category
+from miplearn.instance.base import Instance

 if TYPE_CHECKING:
    from miplearn.solvers.learning import InternalSolver
@@ -47,14 +46,14 @@ class PickleGzInstance(Instance):
        return self.instance.get_instance_features()

    @overrides
-    def get_variable_features(self, var_name: VariableName) -> List[float]:
+    def get_variable_features(self) -> Dict[str, List[float]]:
        assert self.instance is not None
-        return self.instance.get_variable_features(var_name)
+        return self.instance.get_variable_features()

    @overrides
-    def get_variable_category(self, var_name: VariableName) -> Optional[Category]:
+    def get_variable_categories(self) -> Dict[str, Hashable]:
        assert self.instance is not None
-        return self.instance.get_variable_category(var_name)
+        return self.instance.get_variable_categories()

    @overrides
    def get_constraint_features(self, cid: str) -> Optional[List[float]]:
--- a/miplearn/problems/knapsack.py
+++ b/miplearn/problems/knapsack.py
@@ -48,13 +48,14 @@ class ChallengeA:
 class MultiKnapsackInstance(Instance):
    """Representation of the Multidimensional 0-1 Knapsack Problem.

-    Given a set of n items and m knapsacks, the problem is to find a subset of items S maximizing
-    sum(prices[i] for i in S). If selected, each item i occupies weights[i,j] units of space in
-    each knapsack j. Furthermore, each knapsack j has limited storage space, given by capacities[j].
+    Given a set of n items and m knapsacks, the problem is to find a subset of items
+    S maximizing sum(prices[i] for i in S). If selected, each item i occupies
+    weights[i,j] units of space in each knapsack j. Furthermore, each knapsack j has
+    limited storage space, given by capacities[j].

-    This implementation assigns a different category for each decision variable, and therefore
-    trains one ML model per variable. It is only suitable when training and test instances have
-    same size and items don't shuffle around.
+    This implementation assigns a different category for each decision variable,
+    and therefore trains one ML model per variable. It is only suitable when training
+    and test instances have same size and items don't shuffle around.
    """

    def __init__(
@@ -74,7 +75,6 @@ class MultiKnapsackInstance(Instance):
        self.prices = prices
        self.capacities = capacities
        self.weights = weights
-        self.varname_to_index = {f"x[{i}]": i for i in range(self.n)}

    @overrides
    def to_model(self) -> pe.ConcreteModel:
@@ -98,9 +98,11 @@ class MultiKnapsackInstance(Instance):
        return [float(np.mean(self.prices))] + list(self.capacities)

    @overrides
-    def get_variable_features(self, var_name: VariableName) -> List[float]:
-        index = self.varname_to_index[var_name]
-        return [self.prices[index]] + list(self.weights[:, index])
+    def get_variable_features(self) -> Dict[str, List[float]]:
+        return {
+            f"x[{i}]": [self.prices[i] + list(self.weights[:, i])]
+            for i in range(self.n)
+        }


 # noinspection PyPep8Naming
@@ -110,7 +112,7 @@ class MultiKnapsackGenerator:
        n: rv_frozen = randint(low=100, high=101),
        m: rv_frozen = randint(low=30, high=31),
        w: rv_frozen = randint(low=0, high=1000),
-        K: rv_frozen = randint(low=500, high=500),
+        K: rv_frozen = randint(low=500, high=501),
        u: rv_frozen = uniform(loc=0.0, scale=1.0),
        alpha: rv_frozen = uniform(loc=0.25, scale=0.0),
        fix_w: bool = False,
@@ -241,51 +243,3 @@ class MultiKnapsackGenerator:
            return MultiKnapsackInstance(p, b, w)

        return [_sample() for _ in range(n_samples)]
-
-
-class KnapsackInstance(Instance):
-    """
-    Simpler (one-dimensional) Knapsack Problem, used for testing.
-    """
-
-    def __init__(
-        self,
-        weights: List[float],
-        prices: List[float],
-        capacity: float,
-    ) -> None:
-        super().__init__()
-        self.weights = weights
-        self.prices = prices
-        self.capacity = capacity
-        self.varname_to_item: Dict[VariableName, int] = {
-            f"x[{i}]": i for i in range(len(self.weights))
-        }
-
-    @overrides
-    def to_model(self) -> pe.ConcreteModel:
-        model = pe.ConcreteModel()
-        items = range(len(self.weights))
-        model.x = pe.Var(items, domain=pe.Binary)
-        model.OBJ = pe.Objective(
-            expr=sum(model.x[v] * self.prices[v] for v in items), sense=pe.maximize
-        )
-        model.eq_capacity = pe.Constraint(
-            expr=sum(model.x[v] * self.weights[v] for v in items) <= self.capacity
-        )
-        return model
-
-    @overrides
-    def get_instance_features(self) -> List[float]:
-        return [
-            self.capacity,
-            np.average(self.weights),
-        ]
-
-    @overrides
-    def get_variable_features(self, var_name: VariableName) -> List[Category]:
-        item = self.varname_to_item[var_name]
-        return [
-            self.weights[item],
-            self.prices[item],
-        ]
--- a/miplearn/problems/stab.py
+++ b/miplearn/problems/stab.py
@@ -1,7 +1,7 @@
 #  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
 #  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
 #  Released under the modified BSD license. See COPYING.md for more details.
-from typing import List
+from typing import List, Dict, Hashable

 import networkx as nx
 import numpy as np
@@ -52,7 +52,6 @@ class MaxWeightStableSetInstance(Instance):
        self.graph = graph
        self.weights = weights
        self.nodes = list(self.graph.nodes)
-        self.varname_to_node = {f"x[{v}]": v for v in self.nodes}

    @overrides
    def to_model(self) -> pe.ConcreteModel:
@@ -68,24 +67,26 @@ class MaxWeightStableSetInstance(Instance):
        return model

    @overrides
-    def get_variable_features(self, var_name: VariableName) -> List[float]:
-        v1 = self.varname_to_node[var_name]
-        neighbor_weights = [0.0] * 15
-        neighbor_degrees = [100.0] * 15
-        for v2 in self.graph.neighbors(v1):
-            neighbor_weights += [self.weights[v2] / self.weights[v1]]
-            neighbor_degrees += [self.graph.degree(v2) / self.graph.degree(v1)]
-        neighbor_weights.sort(reverse=True)
-        neighbor_degrees.sort()
-        features = []
-        features += neighbor_weights[:5]
-        features += neighbor_degrees[:5]
-        features += [self.graph.degree(v1)]
+    def get_variable_features(self) -> Dict[str, List[float]]:
+        features = {}
+        for v1 in self.nodes:
+            neighbor_weights = [0.0] * 15
+            neighbor_degrees = [100.0] * 15
+            for v2 in self.graph.neighbors(v1):
+                neighbor_weights += [self.weights[v2] / self.weights[v1]]
+                neighbor_degrees += [self.graph.degree(v2) / self.graph.degree(v1)]
+            neighbor_weights.sort(reverse=True)
+            neighbor_degrees.sort()
+            f = []
+            f += neighbor_weights[:5]
+            f += neighbor_degrees[:5]
+            f += [self.graph.degree(v1)]
+            features[f"x[{v1}]"] = f
        return features

    @overrides
-    def get_variable_category(self, var: VariableName) -> Category:
-        return "default"
+    def get_variable_categories(self) -> Dict[str, Hashable]:
+        return {f"x[{v}]": "default" for v in self.nodes}


 class MaxWeightStableSetGenerator:
--- a/miplearn/problems/tsp.py
+++ b/miplearn/problems/tsp.py
@@ -1,7 +1,7 @@
 #  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
 #  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
 #  Released under the modified BSD license. See COPYING.md for more details.
-from typing import List, Tuple, FrozenSet, Any, Optional, Hashable
+from typing import List, Tuple, FrozenSet, Any, Optional, Hashable, Dict

 import networkx as nx
 import numpy as np
@@ -59,7 +59,6 @@ class TravelingSalesmanInstance(Instance):
        self.edges = [
            (i, j) for i in range(self.n_cities) for j in range(i + 1, self.n_cities)
        ]
-        self.varname_to_index = {f"x[{e}]": e for e in self.edges}

    @overrides
    def to_model(self) -> pe.ConcreteModel:
@@ -83,8 +82,8 @@ class TravelingSalesmanInstance(Instance):
        return model

    @overrides
-    def get_variable_category(self, var_name: VariableName) -> Category:
-        return self.varname_to_index[var_name]
+    def get_variable_categories(self) -> Dict[str, Hashable]:
+        return {f"x[{e}]": f"x[{e}]" for e in self.edges}

    @overrides
    def find_violated_lazy_constraints(
--- a/miplearn/solvers/pyomo/base.py
+++ b/miplearn/solvers/pyomo/base.py
@@ -6,7 +6,7 @@ import logging
 import re
 import sys
 from io import StringIO
-from typing import Any, List, Dict, Optional, Tuple
+from typing import Any, List, Dict, Optional, Tuple, Hashable

 import numpy as np
 import pyomo
@@ -604,9 +604,6 @@ class PyomoTestInstanceKnapsack(Instance):
        self.weights = weights
        self.prices = prices
        self.capacity = capacity
-        self.varname_to_item: Dict[VariableName, int] = {
-            f"x[{i}]": i for i in range(len(self.weights))
-        }

    @overrides
    def to_model(self) -> pe.ConcreteModel:
@@ -631,15 +628,15 @@ class PyomoTestInstanceKnapsack(Instance):
        ]

    @overrides
-    def get_variable_features(self, var_name: VariableName) -> List[Category]:
-        item = self.varname_to_item[var_name]
-        return [
-            self.weights[item],
-            self.prices[item],
-        ]
+    def get_variable_features(self) -> Dict[str, List[float]]:
+        return {
+            f"x[{i}]": [
+                self.weights[i],
+                self.prices[i],
+            ]
+            for i in range(len(self.weights))
+        }

    @overrides
-    def get_variable_category(self, var_name: VariableName) -> Optional[Category]:
-        if var_name.startswith("x"):
-            return "default"
-        return None
+    def get_variable_categories(self) -> Dict[str, Hashable]:
+        return {f"x[{i}]": "default" for i in range(len(self.weights))}
--- a/tests/problems/test_knapsack.py
+++ b/tests/problems/test_knapsack.py
@@ -5,6 +5,7 @@
 import numpy as np
 from scipy.stats import uniform, randint

+from miplearn import LearningSolver
 from miplearn.problems.knapsack import MultiKnapsackGenerator


@@ -22,3 +23,12 @@ def test_knapsack_generator() -> None:
    b_sum = sum(instance.capacities for instance in instances) / len(instances)
    assert round(float(np.mean(w_sum)), -1) == 500.0
    assert round(float(np.mean(b_sum)), -3) == 25000.0
+
+
+def test_knapsack() -> None:
+    instance = MultiKnapsackGenerator(
+        n=randint(low=5, high=6),
+        m=randint(low=5, high=6),
+    ).generate(1)[0]
+    solver = LearningSolver()
+    solver.solve(instance)