Implement MultiKnapsackPerturber

2025-12-09 02:48:52 -06:00 · 2025-12-08 13:31:52 -06:00
parent 15cdb7e679
commit 14e2fe331d
3 changed files with 173 additions and 134 deletions
--- a/docs/guide/problems.ipynb
+++ b/docs/guide/problems.ipynb
@@ -276,10 +276,9 @@
    "where $K$, the correlation coefficient, and $u_j$, the correlation multiplier, are sampled\n",
    "from the provided probability distributions `K` and `u`.\n",
    "\n",
-    "If `fix_w=True` is provided, then $w_{ij}$ are kept the same in all generated instances. This also implies that $n$ and $m$ are kept fixed. Although the prices and capacities are derived from $w_{ij}$, as long as `u` and `K` are not constants, the generated instances will still not be completely identical.\n",
+    "To create multiple instances with the same structure but different values, you can use [MultiKnapsackPerturber][MultiKnapsackPerturber]. This class takes an existing MultiKnapsackData instance and generates new instances by applying randomization factors to the existing weights, prices, and capacities while keeping the number of items and knapsacks fixed.\n",
    "\n",
-    "\n",
-    "If a probability distribution `w_jitter` is provided, then item weights will be set to $w_{ij} \\gamma_{ij}$ where $\\gamma_{ij}$ is sampled from `w_jitter`. When combined with `fix_w=True`, this argument may be used to generate instances where the weight of each item is roughly the same, but not exactly identical, across all instances. The prices of the items and the capacities of the knapsacks will be calculated as above, but using these perturbed weights instead.\n",
+    "[MultiKnapsackPerturber]: ../../api/problems/#miplearn.problems.multiknapsack.MultiKnapsackPerturber\n",
    "\n",
    "By default, all generated prices, weights and capacities are rounded to the nearest integer number. If `round=False` is provided, this rounding will be disabled."
   ]
@@ -307,7 +306,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 5,
   "id": "1ce5f8fb-2769-4fbd-a40c-fd62b897690a",
   "metadata": {
    "ExecuteTime": {
@@ -320,16 +319,16 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "prices\n",
-      " [350. 692. 454. 709. 605. 543. 321. 674. 571. 341.]\n",
-      "weights\n",
-      " [[392. 977. 764. 622. 158. 163.  56. 840. 574. 696.]\n",
-      " [ 20. 948. 860. 209. 178. 184. 293. 541. 414. 305.]\n",
-      " [629. 135. 278. 378. 466. 803. 205. 492. 584.  45.]\n",
-      " [630. 173.  64. 907. 947. 794. 312.  99. 711. 439.]\n",
-      " [117. 506.  35. 915. 266. 662. 312. 516. 521. 178.]]\n",
-      "capacities\n",
-      " [1310.  988. 1004. 1269. 1007.]\n",
+      "  prices\n",
+      " [405. 663. 473. 708. 545. 588. 330. 636. 635. 397.]\n",
+      "  weights\n",
+      " [[393. 977. 764. 623. 158. 163.  56. 840. 574. 696.]\n",
+      " [ 21. 948. 859. 209. 178. 184. 293. 541. 414. 305.]\n",
+      " [629. 135. 278. 378. 465. 803. 205. 492. 584.  44.]\n",
+      " [630. 173.  64. 908. 948. 794. 312.  99. 710. 439.]\n",
+      " [117. 506.  35. 915. 266. 663. 313. 516. 521. 178.]]\n",
+      "  capacities\n",
+      " [1245.  921. 1087. 1351. 1038.]\n",
      "\n",
      "Gurobi Optimizer version 13.0.0 build v13.0.0rc1 (linux64 - \"Ubuntu 22.04.5 LTS\")\n",
      "\n",
@@ -337,40 +336,43 @@
      "Thread count: 16 physical cores, 16 logical processors, using up to 16 threads\n",
      "\n",
      "Optimize a model with 5 rows, 10 columns and 50 nonzeros (Min)\n",
-      "Model fingerprint: 0xaf3ac15e\n",
+      "Model fingerprint: 0xdd25f04e\n",
      "Model has 10 linear objective coefficients\n",
      "Variable types: 0 continuous, 10 integer (10 binary)\n",
      "Coefficient statistics:\n",
      "  Matrix range     [2e+01, 1e+03]\n",
      "  Objective range  [3e+02, 7e+02]\n",
      "  Bounds range     [1e+00, 1e+00]\n",
-      "  RHS range        [1e+03, 1e+03]\n",
-      "Found heuristic solution: objective -804.0000000\n",
+      "  RHS range        [9e+02, 1e+03]\n",
+      "Found heuristic solution: objective -878.0000000\n",
      "Presolve removed 0 rows and 3 columns\n",
      "Presolve time: 0.00s\n",
-      "Presolved: 5 rows, 7 columns, 34 nonzeros\n",
+      "Presolved: 5 rows, 7 columns, 35 nonzeros\n",
      "Variable types: 0 continuous, 7 integer (7 binary)\n",
      "\n",
-      "Root relaxation: objective -1.428726e+03, 4 iterations, 0.00 seconds (0.00 work units)\n",
+      "Root relaxation: objective -1.319577e+03, 6 iterations, 0.00 seconds (0.00 work units)\n",
      "\n",
      "    Nodes    |    Current Node    |     Objective Bounds      |     Work\n",
      " Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time\n",
      "\n",
-      "     0     0 -1428.7265    0    4 -804.00000 -1428.7265  77.7%     -    0s\n",
-      "H    0     0                    -995.0000000 -1428.7265  43.6%     -    0s\n",
-      "H    0     0                    -1279.000000 -1428.7265  11.7%     -    0s\n",
-      "     0     0 -1428.7265    0    4 -1279.0000 -1428.7265  11.7%     -    0s\n",
+      "     0     0 -1319.5773    0    4 -878.00000 -1319.5773  50.3%     -    0s\n",
+      "H    0     0                    -1181.000000 -1319.5773  11.7%     -    0s\n",
+      "     0     0 -1181.0000    0    2 -1181.0000 -1181.0000  0.00%     -    0s\n",
      "\n",
-      "Explored 1 nodes (4 simplex iterations) in 0.01 seconds (0.00 work units)\n",
+      "Cutting planes:\n",
+      "  Cover: 1\n",
+      "  MIR: 1\n",
+      "\n",
+      "Explored 1 nodes (10 simplex iterations) in 0.00 seconds (0.00 work units)\n",
      "Thread count was 16 (of 16 available processors)\n",
      "\n",
-      "Solution count 3: -1279 -995 -804 \n",
-      "No other solutions better than -1279\n",
+      "Solution count 2: -1181 -878 \n",
+      "No other solutions better than -1181\n",
      "\n",
      "Optimal solution found (tolerance 1.00e-04)\n",
-      "Best objective -1.279000000000e+03, best bound -1.279000000000e+03, gap 0.0000%\n",
+      "Best objective -1.181000000000e+03, best bound -1.181000000000e+03, gap 0.0000%\n",
      "\n",
-      "User-callback calls 418, time in user-callback 0.00 sec\n"
+      "User-callback calls 391, time in user-callback 0.00 sec\n"
     ]
    }
   ],
@@ -379,30 +381,36 @@
    "from scipy.stats import uniform, randint\n",
    "from miplearn.problems.multiknapsack import (\n",
    "    MultiKnapsackGenerator,\n",
+    "    MultiKnapsackPerturber,\n",
    "    build_multiknapsack_model_gurobipy,\n",
    ")\n",
    "\n",
    "# Set random seed, to make example reproducible\n",
    "np.random.seed(42)\n",
    "\n",
-    "# Generate ten similar random instances of the multiknapsack problem with\n",
-    "# ten items, five resources and weights around [0, 1000].\n",
-    "data = MultiKnapsackGenerator(\n",
+    "# Generate a reference instance with 10 items and 5 knapsacks\n",
+    "generator = MultiKnapsackGenerator(\n",
    "    n=randint(low=10, high=11),\n",
    "    m=randint(low=5, high=6),\n",
    "    w=uniform(loc=0, scale=1000),\n",
    "    K=uniform(loc=100, scale=0),\n",
    "    u=uniform(loc=1, scale=0),\n",
    "    alpha=uniform(loc=0.25, scale=0),\n",
+    ")\n",
+    "reference_instance = generator.generate(1)[0]\n",
+    "\n",
+    "# Generate perturbed instances using the reference\n",
+    "perturber = MultiKnapsackPerturber(\n",
    "    w_jitter=uniform(loc=0.95, scale=0.1),\n",
-    "    p_jitter=uniform(loc=0.75, scale=0.5),\n",
-    "    fix_w=True,\n",
-    ").generate(10)\n",
+    "    p_jitter=uniform(loc=0.9, scale=0.2),\n",
+    "    alpha_jitter=uniform(loc=0.9, scale=0.2),\n",
+    ")\n",
+    "data = perturber.perturb(reference_instance, 10)\n",
    "\n",
    "# Print data for one of the instances\n",
-    "print(\"prices\\n\", data[0].prices)\n",
-    "print(\"weights\\n\", data[0].weights)\n",
-    "print(\"capacities\\n\", data[0].capacities)\n",
+    "print(\"  prices\\n\", data[0].prices)\n",
+    "print(\"  weights\\n\", data[0].weights)\n",
+    "print(\"  capacities\\n\", data[0].capacities)\n",
    "print()\n",
    "\n",
    "# Build model and optimize\n",
@@ -1666,7 +1674,16 @@
   "cell_type": "markdown",
   "id": "j49upfw2o8k",
   "metadata": {},
-   "source": "### Random instance generator\n\nThe class [MaxCutGenerator][MaxCutGenerator] can be used to generate random instances of this problem. For each instance, the generator creates a new random Erdős-Rényi graph $G_{n,p}$, where $n$ (number of vertices) and $p$ (edge probability) are sampled from user-provided probability distributions. Each edge is assigned a random weight drawn from the set $\\{-1, +1\\}$ with equal probability.\n\nTo create multiple instances with the same graph structure but different edge weight patterns, you can use [MaxCutPerturber][MaxCutPerturber]. This class takes an existing MaxCutData instance and generates new instances by randomly flipping the sign of each edge weight with a given probability while keeping the graph structure fixed.\n\n[MaxCutGenerator]: ../../api/problems/#miplearn.problems.maxcut.MaxCutGenerator\n[MaxCutPerturber]: ../../api/problems/#miplearn.problems.maxcut.MaxCutPerturber"
+   "source": [
+    "### Random instance generator\n",
+    "\n",
+    "The class [MaxCutGenerator][MaxCutGenerator] can be used to generate random instances of this problem. For each instance, the generator creates a new random Erdős-Rényi graph $G_{n,p}$, where $n$ (number of vertices) and $p$ (edge probability) are sampled from user-provided probability distributions. Each edge is assigned a random weight drawn from the set $\\{-1, +1\\}$ with equal probability.\n",
+    "\n",
+    "To create multiple instances with the same graph structure but different edge weight patterns, you can use [MaxCutPerturber][MaxCutPerturber]. This class takes an existing MaxCutData instance and generates new instances by randomly flipping the sign of each edge weight with a given probability while keeping the graph structure fixed.\n",
+    "\n",
+    "[MaxCutGenerator]: ../../api/problems/#miplearn.problems.maxcut.MaxCutGenerator\n",
+    "[MaxCutPerturber]: ../../api/problems/#miplearn.problems.maxcut.MaxCutPerturber"
+   ]
  },
  {
   "cell_type": "markdown",
@@ -1682,7 +1699,41 @@
   "id": "uge28hmv3a",
   "metadata": {},
   "outputs": [],
-   "source": "import random\nimport numpy as np\nfrom scipy.stats import uniform, randint\nfrom miplearn.problems.maxcut import (\n    MaxCutGenerator,\n    MaxCutPerturber,\n    build_maxcut_model_gurobipy,\n)\n\n# Set random seed to make example reproducible\nrandom.seed(42)\nnp.random.seed(42)\n\n# Generate a reference instance with a 10-node graph and 30% edge probability\ngenerator = MaxCutGenerator(\n    n=randint(low=10, high=11),\n    p=uniform(loc=0.3, scale=0.0),\n)\nreference_instance = generator.generate(1)[0]\n\n# Generate perturbed instances using the reference\nperturber = MaxCutPerturber(w_jitter=0.2)\ndata = perturber.perturb(reference_instance, 10)\n\n# Print the graph and weights for two instances\nprint(\"graph edges:\", list(data[0].graph.edges()))\nprint(\"weights[0]:\", data[0].weights)\nprint(\"weights[1]:\", data[1].weights)\nprint()\n\n# Build and optimize the first instance\nmodel = build_maxcut_model_gurobipy(data[0])\nmodel.optimize()"
+   "source": [
+    "import random\n",
+    "import numpy as np\n",
+    "from scipy.stats import uniform, randint\n",
+    "from miplearn.problems.maxcut import (\n",
+    "    MaxCutGenerator,\n",
+    "    MaxCutPerturber,\n",
+    "    build_maxcut_model_gurobipy,\n",
+    ")\n",
+    "\n",
+    "# Set random seed to make example reproducible\n",
+    "random.seed(42)\n",
+    "np.random.seed(42)\n",
+    "\n",
+    "# Generate a reference instance with a 10-node graph and 30% edge probability\n",
+    "generator = MaxCutGenerator(\n",
+    "    n=randint(low=10, high=11),\n",
+    "    p=uniform(loc=0.3, scale=0.0),\n",
+    ")\n",
+    "reference_instance = generator.generate(1)[0]\n",
+    "\n",
+    "# Generate perturbed instances using the reference\n",
+    "perturber = MaxCutPerturber(w_jitter=0.2)\n",
+    "data = perturber.perturb(reference_instance, 10)\n",
+    "\n",
+    "# Print the graph and weights for two instances\n",
+    "print(\"graph edges:\", list(data[0].graph.edges()))\n",
+    "print(\"weights[0]:\", data[0].weights)\n",
+    "print(\"weights[1]:\", data[1].weights)\n",
+    "print()\n",
+    "\n",
+    "# Build and optimize the first instance\n",
+    "model = build_maxcut_model_gurobipy(data[0])\n",
+    "model.optimize()"
+   ]
  }
 ],
 "metadata": {
--- a/miplearn/problems/multiknapsack.py
+++ b/miplearn/problems/multiknapsack.py
@@ -38,36 +38,10 @@ class MultiKnapsackData:
 class MultiKnapsackGenerator:
    """Random instance generator for the multi-dimensional knapsack problem.

-    Instances have a random number of items (or variables) and a random number of
-    knapsacks (or constraints), as specified by the provided probability
-    distributions `n` and `m`, respectively. The weight of each item `i` on knapsack
-    `j` is sampled independently from the provided distribution `w`. The capacity of
-    knapsack `j` is set to ``alpha_j * sum(w[i,j] for i in range(n))``,
-    where `alpha_j`, the tightness ratio, is sampled from the provided probability
-    distribution `alpha`.
-
-    To make the instances more challenging, the costs of the items are linearly
-    correlated to their average weights. More specifically, the weight of each item
-    `i` is set to ``sum(w[i,j]/m for j in range(m)) + K * u_i``, where `K`,
-    the correlation coefficient, and `u_i`, the correlation multiplier, are sampled
-    from the provided probability distributions. Note that `K` is only sample once
-    for the entire instance.
-
-    If `fix_w=True`, then `weights[i,j]` are kept the same in all generated
-    instances. This also implies that n and m are kept fixed. Although the prices and
-    capacities are derived from `weights[i,j]`, as long as `u` and `K` are not
-    constants, the generated instances will still not be completely identical.
-
-    If a probability distribution `w_jitter` is provided, then item weights will be
-    set to ``w[i,j] * gamma[i,j]`` where `gamma[i,j]` is sampled from `w_jitter`.
-    When combined with `fix_w=True`, this argument may be used to generate instances
-    where the weight of each item is roughly the same, but not exactly identical,
-    across all instances. The prices of the items and the capacities of the knapsacks
-    will be calculated as above, but using these perturbed weights instead.
-
-    By default, all generated prices, weights and capacities are rounded to the
-    nearest integer number. If `round=False` is provided, this rounding will be
-    disabled.
+    Generates new instances by creating random items and knapsacks according to the
+    provided probability distributions. Each instance has a random number of items
+    (variables) and knapsacks (constraints), with weights, prices, and capacities
+    sampled independently.

    Parameters
    ----------
@@ -83,11 +57,6 @@ class MultiKnapsackGenerator:
        Probability distribution for the profit multiplier.
    alpha: rv_continuous
        Probability distribution for the tightness ratio.
-    fix_w: boolean
-        If true, weights are kept the same (minus the noise from w_jitter) in all
-        instances.
-    w_jitter: rv_continuous
-        Probability distribution for random noise added to the weights.
    round: boolean
        If true, all prices, weights and capacities are rounded to the nearest
        integer.
@@ -101,9 +70,6 @@ class MultiKnapsackGenerator:
        K: rv_frozen = randint(low=500, high=501),
        u: rv_frozen = uniform(loc=0.0, scale=1.0),
        alpha: rv_frozen = uniform(loc=0.25, scale=0.0),
-        fix_w: bool = False,
-        w_jitter: rv_frozen = uniform(loc=1.0, scale=0.0),
-        p_jitter: rv_frozen = uniform(loc=1.0, scale=0.0),
        round: bool = True,
    ):
        assert isinstance(n, rv_frozen), "n should be a SciPy probability distribution"
@@ -114,10 +80,6 @@ class MultiKnapsackGenerator:
        assert isinstance(
            alpha, rv_frozen
        ), "alpha should be a SciPy probability distribution"
-        assert isinstance(fix_w, bool), "fix_w should be boolean"
-        assert isinstance(
-            w_jitter, rv_frozen
-        ), "w_jitter should be a SciPy probability distribution"

        self.n = n
        self.m = m
@@ -125,45 +87,17 @@ class MultiKnapsackGenerator:
        self.u = u
        self.K = K
        self.alpha = alpha
-        self.w_jitter = w_jitter
-        self.p_jitter = p_jitter
        self.round = round
-        self.fix_n: Optional[int] = None
-        self.fix_m: Optional[int] = None
-        self.fix_w: Optional[np.ndarray] = None
-        self.fix_u: Optional[np.ndarray] = None
-        self.fix_K: Optional[float] = None
-
-        if fix_w:
-            self.fix_n = self.n.rvs()
-            self.fix_m = self.m.rvs()
-            self.fix_w = np.array([self.w.rvs(self.fix_n) for _ in range(self.fix_m)])
-            self.fix_u = self.u.rvs(self.fix_n)
-            self.fix_K = self.K.rvs()

    def generate(self, n_samples: int) -> List[MultiKnapsackData]:
        def _sample() -> MultiKnapsackData:
-            if self.fix_w is not None:
-                assert self.fix_m is not None
-                assert self.fix_n is not None
-                assert self.fix_u is not None
-                assert self.fix_K is not None
-                n = self.fix_n
-                m = self.fix_m
-                w = self.fix_w
-                u = self.fix_u
-                K = self.fix_K
-            else:
-                n = self.n.rvs()
-                m = self.m.rvs()
-                w = np.array([self.w.rvs(n) for _ in range(m)])
-                u = self.u.rvs(n)
-                K = self.K.rvs()
-            w = w * np.array([self.w_jitter.rvs(n) for _ in range(m)])
+            n = self.n.rvs()
+            m = self.m.rvs()
+            w = np.array([self.w.rvs(n) for _ in range(m)])
+            u = self.u.rvs(n)
+            K = self.K.rvs()
            alpha = self.alpha.rvs(m)
-            p = np.array(
-                [w[:, j].sum() / m + K * u[j] for j in range(n)]
-            ) * self.p_jitter.rvs(n)
+            p = np.array([w[:, j].sum() / m + K * u[j] for j in range(n)])
            b = np.array([w[i, :].sum() * alpha[i] for i in range(m)])
            if self.round:
                p = p.round()
@@ -174,6 +108,72 @@ class MultiKnapsackGenerator:
        return [_sample() for _ in range(n_samples)]


+class MultiKnapsackPerturber:
+    """Perturbation generator for existing multi-dimensional knapsack instances.
+
+    Takes an existing MultiKnapsackData instance and generates new instances by
+    applying randomization factors to the existing weights and prices while keeping
+    the structure (number of items and knapsacks) fixed.
+
+    Parameters
+    ----------
+    w_jitter: rv_continuous
+        Probability distribution for randomization factors applied to item weights.
+    p_jitter: rv_continuous
+        Probability distribution for randomization factors applied to item prices.
+    alpha_jitter: rv_continuous
+        Probability distribution for randomization factors applied to knapsack capacities.
+    round: boolean
+        If true, all perturbed prices, weights and capacities are rounded to the
+        nearest integer.
+    """
+
+    def __init__(
+        self,
+        w_jitter: rv_frozen = uniform(loc=0.9, scale=0.2),
+        p_jitter: rv_frozen = uniform(loc=0.9, scale=0.2),
+        alpha_jitter: rv_frozen = uniform(loc=0.9, scale=0.2),
+        round: bool = True,
+    ):
+        assert isinstance(
+            w_jitter, rv_frozen
+        ), "w_jitter should be a SciPy probability distribution"
+        assert isinstance(
+            p_jitter, rv_frozen
+        ), "p_jitter should be a SciPy probability distribution"
+        assert isinstance(
+            alpha_jitter, rv_frozen
+        ), "alpha_jitter should be a SciPy probability distribution"
+
+        self.w_jitter = w_jitter
+        self.p_jitter = p_jitter
+        self.alpha_jitter = alpha_jitter
+        self.round = round
+
+    def perturb(
+        self,
+        instance: MultiKnapsackData,
+        n_samples: int,
+    ) -> List[MultiKnapsackData]:
+        def _sample() -> MultiKnapsackData:
+            m, n = instance.weights.shape
+            w_factors = np.array([self.w_jitter.rvs(n) for _ in range(m)])
+            p_factors = self.p_jitter.rvs(n)
+            alpha_factors = self.alpha_jitter.rvs(m)
+
+            w = instance.weights * w_factors
+            p = instance.prices * p_factors
+            b = instance.capacities * alpha_factors
+
+            if self.round:
+                p = p.round()
+                b = b.round()
+                w = w.round()
+            return MultiKnapsackData(p, b, w)
+
+        return [_sample() for _ in range(n_samples)]
+
+
 def build_multiknapsack_model_gurobipy(
    data: Union[str, MultiKnapsackData]
 ) -> GurobiModel:
--- a/tests/problems/test_multiknapsack.py
+++ b/tests/problems/test_multiknapsack.py
@@ -21,26 +21,14 @@ def test_knapsack_generator() -> None:
        K=randint(low=500, high=501),
        u=uniform(loc=0.0, scale=1.0),
        alpha=uniform(loc=0.25, scale=0.0),
-        fix_w=True,
-        w_jitter=uniform(loc=0.9, scale=0.2),
-        p_jitter=uniform(loc=0.9, scale=0.2),
-        round=True,
    )
-    data = gen.generate(2)
-    assert data[0].prices.tolist() == [433.0, 477.0, 802.0, 494.0, 458.0]
-    assert data[0].capacities.tolist() == [458.0, 357.0, 392.0]
+    data = gen.generate(1)
+    assert data[0].prices.tolist() == [380.0, 521.0, 729.0, 476.0, 466.0]
+    assert data[0].capacities.tolist() == [443.0, 382.0, 389.0]
    assert data[0].weights.tolist() == [
-        [111.0, 392.0, 945.0, 276.0, 108.0],
-        [64.0, 633.0, 20.0, 602.0, 110.0],
-        [510.0, 203.0, 303.0, 469.0, 85.0],
-    ]
-
-    assert data[1].prices.tolist() == [344.0, 527.0, 658.0, 519.0, 460.0]
-    assert data[1].capacities.tolist() == [449.0, 377.0, 380.0]
-    assert data[1].weights.tolist() == [
-        [92.0, 473.0, 871.0, 264.0, 96.0],
-        [67.0, 664.0, 21.0, 628.0, 129.0],
-        [436.0, 209.0, 309.0, 481.0, 86.0],
+        [102, 435, 860, 270, 106],
+        [71, 700, 20, 614, 121],
+        [466, 214, 330, 458, 87],
    ]