Implement PMedianPerturber

2025-12-08 18:38:51 -06:00 · 2025-12-08 13:36:49 -06:00
parent 14e2fe331d
commit 427bd1d806
3 changed files with 104 additions and 99 deletions
--- a/docs/guide/problems.ipynb
+++ b/docs/guide/problems.ipynb
@@ -473,9 +473,10 @@
    "\n",
    "The class [PMedianGenerator][PMedianGenerator] can be used to generate random instances of this problem. First, it decides the number of customers and the parameter $p$ by sampling the provided `n` and `p` distributions, respectively. Then, for each customer $i$, the class builds its geographical location $(x_i, y_i)$ by sampling the provided `x` and `y` distributions. For each $i$, the demand for customer $i$ and the capacity of facility $i$ are decided by sampling the provided distributions `demands` and `capacities`, respectively. Finally, the costs $w_{ij}$ are set to the Euclidean distance between the locations of customers $i$ and $j$.\n",
    "\n",
-    "If `fixed=True`, then the number of customers, their locations, the parameter $p$, the demands and the capacities are only sampled from their respective distributions exactly once, to build a reference instance which is then randomly perturbed. Specifically, in each perturbation, the distances, demands and capacities are multiplied by random scaling factors sampled from the distributions `distances_jitter`, `demands_jitter` and `capacities_jitter`, respectively. The result is a list of instances that have the same set of customers, but slightly different demands, capacities and distances.\n",
+    "To create multiple instances with the same customer locations but different values, you can use [PMedianPerturber][PMedianPerturber]. This class takes an existing PMedianData instance and generates new instances by applying randomization factors to the existing distances, demands, and capacities while keeping the graph structure and parameter p fixed. More specifically, the distances, demands and capacities are multiplied by random scaling factors sampled from the distributions `distances_jitter`, `demands_jitter` and `capacities_jitter`, respectively. The result is a list of instances that have the same set of customers, but slightly different demands, capacities and distances.\n",
    "\n",
-    "[PMedianGenerator]: ../../api/problems/#miplearn.problems.pmedian.PMedianGenerator"
+    "[PMedianGenerator]: ../../api/problems/#miplearn.problems.pmedian.PMedianGenerator\n",
    "[PMedianPerturber]: ../../api/problems/#miplearn.problems.pmedian.PMedianPerturber"
   ]
  },
  {
@@ -488,7 +489,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 7,
   "id": "4e0e4223-b4e0-4962-a157-82a23a86e37d",
   "metadata": {
    "ExecuteTime": {
@@ -503,18 +504,18 @@
     "text": [
      "p = 5\n",
      "distances =\n",
-      " [[  0.    50.17  82.42  32.76  33.2   35.45  86.88  79.11  43.17  66.2 ]\n",
+      " [[  0.    54.88  80.58  34.54  35.61  32.75  78.74  75.78  45.82  65.11]\n",
-      " [ 50.17   0.    72.64  72.51  17.06  80.25  39.92  68.93  43.41  42.96]\n",
+      " [ 54.88   0.    69.32  68.14  17.48  83.67  41.01  64.26  46.85  40.57]\n",
-      " [ 82.42  72.64   0.    71.69  70.92  82.51  67.88   3.76  39.74  30.73]\n",
+      " [ 80.58  69.32   0.    64.6   68.52  86.81  65.36   4.08  38.29  28.39]\n",
-      " [ 32.76  72.51  71.69   0.    56.56  11.03 101.35  69.39  42.09  68.58]\n",
+      " [ 34.54  68.14  64.6    0.    51.62  11.17 101.52  73.67  38.81  66.35]\n",
-      " [ 33.2   17.06  70.92  56.56   0.    63.68  54.71  67.16  34.89  44.99]\n",
+      " [ 35.61  17.48  68.52  51.62   0.    67.13  59.17  68.95  32.99  48.98]\n",
-      " [ 35.45  80.25  82.51  11.03  63.68   0.   111.04  80.29  52.78  79.36]\n",
+      " [ 32.75  83.67  86.81  11.17  67.13   0.   105.47  86.25  52.01  76.55]\n",
-      " [ 86.88  39.92  67.88 101.35  54.71 111.04   0.    65.13  61.37  40.82]\n",
+      " [ 78.74  41.01  65.36 101.52  59.17 105.47   0.    69.09  65.27  40.97]\n",
-      " [ 79.11  68.93   3.76  69.39  67.16  80.29  65.13   0.    36.26  27.24]\n",
+      " [ 75.78  64.26   4.08  73.67  68.95  86.25  69.09   0.    38.88  28.35]\n",
-      " [ 43.17  43.41  39.74  42.09  34.89  52.78  61.37  36.26   0.    26.62]\n",
+      " [ 45.82  46.85  38.29  38.81  32.99  52.01  65.27  38.88   0.    25.89]\n",
-      " [ 66.2   42.96  30.73  68.58  44.99  79.36  40.82  27.24  26.62   0.  ]]\n",
+      " [ 65.11  40.57  28.39  66.35  48.98  76.55  40.97  28.35  25.89   0.  ]]\n",
-      "demands = [6.12 1.39 2.92 3.66 4.56 7.85 2.   5.14 5.92 0.46]\n",
+      "demands = [6.69 1.32 2.92 3.51 4.36 7.12 2.04 5.14 5.39 0.44]\n",
-      "capacities = [151.89  42.63  16.26 237.22 241.41 202.1   76.15  24.42 171.06 110.04]\n",
+      "capacities = [164.29  40.41  15.11 236.72 264.86 191.67  78.77  25.7  162.08 115.06]\n",
      "\n",
      "Gurobi Optimizer version 13.0.0 build v13.0.0rc1 (linux64 - \"Ubuntu 22.04.5 LTS\")\n",
      "\n",
@@ -522,15 +523,15 @@
      "Thread count: 16 physical cores, 16 logical processors, using up to 16 threads\n",
      "\n",
      "Optimize a model with 21 rows, 110 columns and 220 nonzeros (Min)\n",
-      "Model fingerprint: 0x8d8d9346\n",
+      "Model fingerprint: 0x6d581c67\n",
      "Model has 90 linear objective coefficients\n",
      "Variable types: 0 continuous, 110 integer (110 binary)\n",
      "Coefficient statistics:\n",
-      "  Matrix range     [5e-01, 2e+02]\n",
+      "  Matrix range     [4e-01, 3e+02]\n",
      "  Objective range  [4e+00, 1e+02]\n",
      "  Bounds range     [1e+00, 1e+00]\n",
      "  RHS range        [1e+00, 5e+00]\n",
-      "Found heuristic solution: objective 368.7900000\n",
+      "Found heuristic solution: objective 375.3900000\n",
      "Presolve time: 0.00s\n",
      "Presolved: 21 rows, 110 columns, 220 nonzeros\n",
      "Variable types: 0 continuous, 110 integer (110 binary)\n",
@@ -540,57 +541,64 @@
      "    Nodes    |    Current Node    |     Objective Bounds      |     Work\n",
      " Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time\n",
      "\n",
-      "     0     0    0.00000    0    6  368.79000    0.00000   100%     -    0s\n",
+      "     0     0    0.00000    0    6  375.39000    0.00000   100%     -    0s\n",
-      "H    0     0                     301.7200000    0.00000   100%     -    0s\n",
+      "H    0     0                     299.2900000    0.00000   100%     -    0s\n",
-      "H    0     0                     185.1900000    0.00000   100%     -    0s\n",
+      "H    0     0                     181.8200000    0.00000   100%     -    0s\n",
-      "H    0     0                     153.5000000    0.00000   100%     -    0s\n",
+      "H    0     0                     151.3600000    0.00000   100%     -    0s\n",
-      "H    0     0                     131.7700000    0.00000   100%     -    0s\n",
+      "H    0     0                     129.7800000    0.00000   100%     -    0s\n",
-      "     0     0   17.14595    0   10  131.77000   17.14595  87.0%     -    0s\n",
+      "     0     0   17.60837    0   10  129.78000   17.60837  86.4%     -    0s\n",
-      "H    0     0                     122.8100000   17.14595  86.0%     -    0s\n",
+      "H    0     0                     126.3600000   17.60837  86.1%     -    0s\n",
-      "H    0     0                      98.3900000   17.14595  82.6%     -    0s\n",
+      "H    0     0                     117.7900000   17.60837  85.1%     -    0s\n",
-      "H    0     0                      92.2900000   64.28872  30.3%     -    0s\n",
+      "H    0     0                      99.5900000   65.81544  33.9%     -    0s\n",
-      "H    0     0                      91.6700000   64.28872  29.9%     -    0s\n",
+      "     0     0   65.81544    0   15   99.59000   65.81544  33.9%     -    0s\n",
-      "     0     0   64.28872    0   15   91.67000   64.28872  29.9%     -    0s\n",
+      "H    0     0                      94.0700000   65.81544  30.0%     -    0s\n",
-      "H    0     0                      91.2300000   64.28872  29.5%     -    0s\n",
+      "H    0     0                      93.1600000   85.53733  8.18%     -    0s\n",
      "H    0     0                      91.3700000   85.53733  6.38%     -    0s\n",
      "\n",
      "Cutting planes:\n",
      "  Cover: 16\n",
      "  MIR: 2\n",
      "  StrongCG: 1\n",
      "\n",
      "Explored 1 nodes (42 simplex iterations) in 0.01 seconds (0.00 work units)\n",
      "Thread count was 16 (of 16 available processors)\n",
      "\n",
-      "Solution count 10: 91.23 91.67 92.29 ... 368.79\n",
+      "Solution count 10: 91.37 93.16 94.07 ... 299.29\n",
      "\n",
      "Optimal solution found (tolerance 1.00e-04)\n",
-      "Best objective 9.123000000000e+01, best bound 9.123000000000e+01, gap 0.0000%\n",
+      "Best objective 9.137000000000e+01, best bound 9.137000000000e+01, gap 0.0000%\n",
      "\n",
-      "User-callback calls 187, time in user-callback 0.00 sec\n"
+      "User-callback calls 250, time in user-callback 0.00 sec\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "from scipy.stats import uniform, randint\n",
-    "from miplearn.problems.pmedian import PMedianGenerator, build_pmedian_model_gurobipy\n",
+    "from miplearn.problems.pmedian import PMedianGenerator, PMedianPerturber, build_pmedian_model_gurobipy\n",
    "\n",
    "# Set random seed, to make example reproducible\n",
    "np.random.seed(42)\n",
    "\n",
-    "# Generate random instances with ten customers located in a\n",
+    "# Generate a reference instance with ten customers located in a\n",
    "# 100x100 square, with demands in [0,10], capacities in [0, 250].\n",
-    "data = PMedianGenerator(\n",
+    "generator = PMedianGenerator(\n",
    "    x=uniform(loc=0.0, scale=100.0),\n",
    "    y=uniform(loc=0.0, scale=100.0),\n",
    "    n=randint(low=10, high=11),\n",
    "    p=randint(low=5, high=6),\n",
    "    demands=uniform(loc=0, scale=10),\n",
    "    capacities=uniform(loc=0, scale=250),\n",
    ")\n",
    "reference_instance = generator.generate(1)[0]\n",
    "\n",
    "# Generate perturbed instances using the reference\n",
    "perturber = PMedianPerturber(\n",
    "    distances_jitter=uniform(loc=0.9, scale=0.2),\n",
    "    demands_jitter=uniform(loc=0.9, scale=0.2),\n",
    "    capacities_jitter=uniform(loc=0.9, scale=0.2),\n",
-    "    fixed=True,\n",
+    ")\n",
-    ").generate(10)\n",
+    "data = perturber.perturb(reference_instance, 10)\n",
    "\n",
    "# Print data for one of the instances\n",
    "print(\"p =\", data[0].p)\n",
--- a/miplearn/problems/pmedian.py
+++ b/miplearn/problems/pmedian.py
@@ -49,15 +49,6 @@ class PMedianGenerator:
    `demands` and `capacities`, respectively. Finally, the costs `w[i,j]` are set to
    the Euclidean distance between the locations of customers `i` and `j`.
    If `fixed=True`, then the number of customers, their locations, the parameter
    `p`, the demands and the capacities are only sampled from their respective
    distributions exactly once, to build a reference instance which is then
    perturbed. Specifically, for each perturbation, the distances, demands and
    capacities are multiplied by factors sampled from the distributions
    `distances_jitter`, `demands_jitter` and `capacities_jitter`, respectively. The
    result is a list of instances that have the same set of customers, but slightly
    different demands, capacities and distances.
    Parameters
    ----------
    x
@@ -72,14 +63,6 @@ class PMedianGenerator:
        Probability distribution for the customer demands.
    capacities
        Probability distribution for the facility capacities.
    distances_jitter
        Probability distribution for the random scaling factor applied to distances.
    demands_jitter
        Probability distribution for the random scaling factor applied to demands.
    capacities_jitter
        Probability distribution for the random scaling factor applied to capacities.
    fixed
        If `True`, then customer are kept the same across instances.
    """
    def __init__(
@@ -90,10 +73,6 @@ class PMedianGenerator:
        p: rv_frozen = randint(low=10, high=11),
        demands: rv_frozen = uniform(loc=0, scale=20),
        capacities: rv_frozen = uniform(loc=0, scale=100),
        distances_jitter: rv_frozen = uniform(loc=1.0, scale=0.0),
        demands_jitter: rv_frozen = uniform(loc=1.0, scale=0.0),
        capacities_jitter: rv_frozen = uniform(loc=1.0, scale=0.0),
        fixed: bool = True,
    ):
        self.x = x
        self.y = y
@@ -101,30 +80,15 @@ class PMedianGenerator:
        self.p = p
        self.demands = demands
        self.capacities = capacities
        self.distances_jitter = distances_jitter
        self.demands_jitter = demands_jitter
        self.capacities_jitter = capacities_jitter
        self.fixed = fixed
        self.ref_data: Optional[PMedianData] = None
    def generate(self, n_samples: int) -> List[PMedianData]:
        def _sample() -> PMedianData:
-            if self.ref_data is None:
+            n = self.n.rvs()
-                n = self.n.rvs()
+            p = self.p.rvs()
-                p = self.p.rvs()
+            loc = np.array([(self.x.rvs(), self.y.rvs()) for _ in range(n)])
-                loc = np.array([(self.x.rvs(), self.y.rvs()) for _ in range(n)])
+            distances = squareform(pdist(loc))
-                distances = squareform(pdist(loc))
+            demands = self.demands.rvs(n)
-                demands = self.demands.rvs(n)
+            capacities = self.capacities.rvs(n)
                capacities = self.capacities.rvs(n)
            else:
                n = self.ref_data.demands.shape[0]
                distances = self.ref_data.distances * self.distances_jitter.rvs(
                    size=(n, n)
                )
                distances = np.tril(distances) + np.triu(distances.T, 1)
                demands = self.ref_data.demands * self.demands_jitter.rvs(n)
                capacities = self.ref_data.capacities * self.capacities_jitter.rvs(n)
                p = self.ref_data.p
            data = PMedianData(
                distances=distances.round(2),
@@ -133,14 +97,62 @@ class PMedianGenerator:
                capacities=capacities.round(2),
            )
            if self.fixed and self.ref_data is None:
                self.ref_data = data
            return data
        return [_sample() for _ in range(n_samples)]
 class PMedianPerturber:
    """Perturbation generator for existing p-median instances.
    Takes an existing PMedianData instance and generates new instances by applying
    randomization factors to the existing distances, demands, and capacities while
    keeping the graph structure and parameter p fixed.
    """
    def __init__(
        self,
        distances_jitter: rv_frozen = uniform(loc=1.0, scale=0.0),
        demands_jitter: rv_frozen = uniform(loc=1.0, scale=0.0),
        capacities_jitter: rv_frozen = uniform(loc=1.0, scale=0.0),
    ):
        """Initialize the perturbation generator.
        Parameters
        ----------
        distances_jitter
            Probability distribution for randomization factors applied to distances.
        demands_jitter
            Probability distribution for randomization factors applied to demands.
        capacities_jitter
            Probability distribution for randomization factors applied to capacities.
        """
        self.distances_jitter = distances_jitter
        self.demands_jitter = demands_jitter
        self.capacities_jitter = capacities_jitter
    def perturb(
        self,
        instance: PMedianData,
        n_samples: int,
    ) -> List[PMedianData]:
        def _sample() -> PMedianData:
            n = instance.demands.shape[0]
            distances = instance.distances * self.distances_jitter.rvs(size=(n, n))
            distances = np.tril(distances) + np.triu(distances.T, 1)
            demands = instance.demands * self.demands_jitter.rvs(n)
            capacities = instance.capacities * self.capacities_jitter.rvs(n)
            return PMedianData(
                distances=distances.round(2),
                demands=demands.round(2),
                p=instance.p,
                capacities=capacities.round(2),
            )
        return [_sample() for _ in range(n_samples)]
 def build_pmedian_model_gurobipy(data: Union[str, PMedianData]) -> GurobiModel:
    """Converts capacitated p-median data into a concrete Gurobipy model."""
    if isinstance(data, str):
--- a/tests/problems/test_pmedian.py
+++ b/tests/problems/test_pmedian.py
@@ -17,12 +17,8 @@ def test_pmedian() -> None:
        p=randint(low=2, high=3),
        demands=uniform(loc=0, scale=20),
        capacities=uniform(loc=0, scale=100),
        distances_jitter=uniform(loc=0.95, scale=0.1),
        demands_jitter=uniform(loc=0.95, scale=0.1),
        capacities_jitter=uniform(loc=0.95, scale=0.1),
        fixed=True,
    )
-    data = gen.generate(2)
+    data = gen.generate(1)
    assert data[0].p == 2
    assert data[0].demands.tolist() == [0.41, 19.4, 16.65, 4.25, 3.64]
@@ -35,17 +31,6 @@ def test_pmedian() -> None:
        [33.2, 17.06, 70.92, 56.56, 0.0],
    ]
    assert data[1].p == 2
    assert data[1].demands.tolist() == [0.42, 19.03, 16.68, 4.27, 3.53]
    assert data[1].capacities.tolist() == [19.2, 31.26, 54.79, 44.9, 29.41]
    assert data[1].distances.tolist() == [
        [0.0, 51.6, 83.31, 33.77, 31.95],
        [51.6, 0.0, 70.25, 71.09, 17.05],
        [83.31, 70.25, 0.0, 68.81, 67.62],
        [33.77, 71.09, 68.81, 0.0, 58.88],
        [31.95, 17.05, 67.62, 58.88, 0.0],
    ]
    model = build_pmedian_model_gurobipy(data[0])
    assert model.inner.numVars == 30
    assert model.inner.numConstrs == 11