Implement MultiKnapsackPerturber

2025-12-09 02:48:52 -06:00 · 2025-12-08 13:31:52 -06:00
parent 15cdb7e679
commit 14e2fe331d
3 changed files with 173 additions and 134 deletions
--- a/miplearn/problems/multiknapsack.py
+++ b/miplearn/problems/multiknapsack.py
@@ -38,36 +38,10 @@ class MultiKnapsackData:
 class MultiKnapsackGenerator:
    """Random instance generator for the multi-dimensional knapsack problem.

-    Instances have a random number of items (or variables) and a random number of
-    knapsacks (or constraints), as specified by the provided probability
-    distributions `n` and `m`, respectively. The weight of each item `i` on knapsack
-    `j` is sampled independently from the provided distribution `w`. The capacity of
-    knapsack `j` is set to ``alpha_j * sum(w[i,j] for i in range(n))``,
-    where `alpha_j`, the tightness ratio, is sampled from the provided probability
-    distribution `alpha`.
-
-    To make the instances more challenging, the costs of the items are linearly
-    correlated to their average weights. More specifically, the weight of each item
-    `i` is set to ``sum(w[i,j]/m for j in range(m)) + K * u_i``, where `K`,
-    the correlation coefficient, and `u_i`, the correlation multiplier, are sampled
-    from the provided probability distributions. Note that `K` is only sample once
-    for the entire instance.
-
-    If `fix_w=True`, then `weights[i,j]` are kept the same in all generated
-    instances. This also implies that n and m are kept fixed. Although the prices and
-    capacities are derived from `weights[i,j]`, as long as `u` and `K` are not
-    constants, the generated instances will still not be completely identical.
-
-    If a probability distribution `w_jitter` is provided, then item weights will be
-    set to ``w[i,j] * gamma[i,j]`` where `gamma[i,j]` is sampled from `w_jitter`.
-    When combined with `fix_w=True`, this argument may be used to generate instances
-    where the weight of each item is roughly the same, but not exactly identical,
-    across all instances. The prices of the items and the capacities of the knapsacks
-    will be calculated as above, but using these perturbed weights instead.
-
-    By default, all generated prices, weights and capacities are rounded to the
-    nearest integer number. If `round=False` is provided, this rounding will be
-    disabled.
+    Generates new instances by creating random items and knapsacks according to the
+    provided probability distributions. Each instance has a random number of items
+    (variables) and knapsacks (constraints), with weights, prices, and capacities
+    sampled independently.

    Parameters
    ----------
@@ -83,11 +57,6 @@ class MultiKnapsackGenerator:
        Probability distribution for the profit multiplier.
    alpha: rv_continuous
        Probability distribution for the tightness ratio.
-    fix_w: boolean
-        If true, weights are kept the same (minus the noise from w_jitter) in all
-        instances.
-    w_jitter: rv_continuous
-        Probability distribution for random noise added to the weights.
    round: boolean
        If true, all prices, weights and capacities are rounded to the nearest
        integer.
@@ -101,9 +70,6 @@ class MultiKnapsackGenerator:
        K: rv_frozen = randint(low=500, high=501),
        u: rv_frozen = uniform(loc=0.0, scale=1.0),
        alpha: rv_frozen = uniform(loc=0.25, scale=0.0),
-        fix_w: bool = False,
-        w_jitter: rv_frozen = uniform(loc=1.0, scale=0.0),
-        p_jitter: rv_frozen = uniform(loc=1.0, scale=0.0),
        round: bool = True,
    ):
        assert isinstance(n, rv_frozen), "n should be a SciPy probability distribution"
@@ -114,10 +80,6 @@ class MultiKnapsackGenerator:
        assert isinstance(
            alpha, rv_frozen
        ), "alpha should be a SciPy probability distribution"
-        assert isinstance(fix_w, bool), "fix_w should be boolean"
-        assert isinstance(
-            w_jitter, rv_frozen
-        ), "w_jitter should be a SciPy probability distribution"

        self.n = n
        self.m = m
@@ -125,45 +87,17 @@ class MultiKnapsackGenerator:
        self.u = u
        self.K = K
        self.alpha = alpha
-        self.w_jitter = w_jitter
-        self.p_jitter = p_jitter
        self.round = round
-        self.fix_n: Optional[int] = None
-        self.fix_m: Optional[int] = None
-        self.fix_w: Optional[np.ndarray] = None
-        self.fix_u: Optional[np.ndarray] = None
-        self.fix_K: Optional[float] = None
-
-        if fix_w:
-            self.fix_n = self.n.rvs()
-            self.fix_m = self.m.rvs()
-            self.fix_w = np.array([self.w.rvs(self.fix_n) for _ in range(self.fix_m)])
-            self.fix_u = self.u.rvs(self.fix_n)
-            self.fix_K = self.K.rvs()

    def generate(self, n_samples: int) -> List[MultiKnapsackData]:
        def _sample() -> MultiKnapsackData:
-            if self.fix_w is not None:
-                assert self.fix_m is not None
-                assert self.fix_n is not None
-                assert self.fix_u is not None
-                assert self.fix_K is not None
-                n = self.fix_n
-                m = self.fix_m
-                w = self.fix_w
-                u = self.fix_u
-                K = self.fix_K
-            else:
-                n = self.n.rvs()
-                m = self.m.rvs()
-                w = np.array([self.w.rvs(n) for _ in range(m)])
-                u = self.u.rvs(n)
-                K = self.K.rvs()
-            w = w * np.array([self.w_jitter.rvs(n) for _ in range(m)])
+            n = self.n.rvs()
+            m = self.m.rvs()
+            w = np.array([self.w.rvs(n) for _ in range(m)])
+            u = self.u.rvs(n)
+            K = self.K.rvs()
            alpha = self.alpha.rvs(m)
-            p = np.array(
-                [w[:, j].sum() / m + K * u[j] for j in range(n)]
-            ) * self.p_jitter.rvs(n)
+            p = np.array([w[:, j].sum() / m + K * u[j] for j in range(n)])
            b = np.array([w[i, :].sum() * alpha[i] for i in range(m)])
            if self.round:
                p = p.round()
@@ -174,6 +108,72 @@ class MultiKnapsackGenerator:
        return [_sample() for _ in range(n_samples)]


+class MultiKnapsackPerturber:
+    """Perturbation generator for existing multi-dimensional knapsack instances.
+
+    Takes an existing MultiKnapsackData instance and generates new instances by
+    applying randomization factors to the existing weights and prices while keeping
+    the structure (number of items and knapsacks) fixed.
+
+    Parameters
+    ----------
+    w_jitter: rv_continuous
+        Probability distribution for randomization factors applied to item weights.
+    p_jitter: rv_continuous
+        Probability distribution for randomization factors applied to item prices.
+    alpha_jitter: rv_continuous
+        Probability distribution for randomization factors applied to knapsack capacities.
+    round: boolean
+        If true, all perturbed prices, weights and capacities are rounded to the
+        nearest integer.
+    """
+
+    def __init__(
+        self,
+        w_jitter: rv_frozen = uniform(loc=0.9, scale=0.2),
+        p_jitter: rv_frozen = uniform(loc=0.9, scale=0.2),
+        alpha_jitter: rv_frozen = uniform(loc=0.9, scale=0.2),
+        round: bool = True,
+    ):
+        assert isinstance(
+            w_jitter, rv_frozen
+        ), "w_jitter should be a SciPy probability distribution"
+        assert isinstance(
+            p_jitter, rv_frozen
+        ), "p_jitter should be a SciPy probability distribution"
+        assert isinstance(
+            alpha_jitter, rv_frozen
+        ), "alpha_jitter should be a SciPy probability distribution"
+
+        self.w_jitter = w_jitter
+        self.p_jitter = p_jitter
+        self.alpha_jitter = alpha_jitter
+        self.round = round
+
+    def perturb(
+        self,
+        instance: MultiKnapsackData,
+        n_samples: int,
+    ) -> List[MultiKnapsackData]:
+        def _sample() -> MultiKnapsackData:
+            m, n = instance.weights.shape
+            w_factors = np.array([self.w_jitter.rvs(n) for _ in range(m)])
+            p_factors = self.p_jitter.rvs(n)
+            alpha_factors = self.alpha_jitter.rvs(m)
+
+            w = instance.weights * w_factors
+            p = instance.prices * p_factors
+            b = instance.capacities * alpha_factors
+
+            if self.round:
+                p = p.round()
+                b = b.round()
+                w = w.round()
+            return MultiKnapsackData(p, b, w)
+
+        return [_sample() for _ in range(n_samples)]
+
+
 def build_multiknapsack_model_gurobipy(
    data: Union[str, MultiKnapsackData]
 ) -> GurobiModel: