From 9f0fa0e500d2dae8e80c92954ad9421d30f934dc Mon Sep 17 00:00:00 2001
From: "Alinson S. Xavier" <git@axavier.org>
Date: Mon, 8 Dec 2025 16:08:05 -0600
Subject: [PATCH] problems: Allow correlated arguments in random problem
 generators

---
 miplearn/problems/multiknapsack.py   | 19 +++++++++++++------
 miplearn/problems/pmedian.py         | 25 +++++++++++++++++++++----
 miplearn/problems/setcover.py        | 20 ++++++++++++--------
 miplearn/problems/setpack.py         | 15 ++++++++-------
 miplearn/problems/uc.py              | 20 +++++++++++++++-----
 tests/problems/test_multiknapsack.py | 15 +++++++++++++++
 tests/problems/test_pmedian.py       | 15 +++++++++++++++
 tests/problems/test_setcover.py      | 16 ++++++++++++++++
 tests/problems/test_setpack.py       | 18 ++++++++++++++++++
 9 files changed, 133 insertions(+), 30 deletions(-)

diff --git a/miplearn/problems/multiknapsack.py b/miplearn/problems/multiknapsack.py
index e3e0a16..25b8964 100644
--- a/miplearn/problems/multiknapsack.py
+++ b/miplearn/problems/multiknapsack.py
@@ -3,7 +3,7 @@
 #  Released under the modified BSD license. See COPYING.md for more details.
 
 from dataclasses import dataclass
-from typing import List, Optional, Union
+from typing import List, Optional, Union, Callable
 
 import gurobipy as gp
 import numpy as np
@@ -47,8 +47,10 @@ class MultiKnapsackGenerator:
     ----------
     n: rv_discrete
         Probability distribution for the number of items (or variables).
-    m: rv_discrete
-        Probability distribution for the number of knapsacks (or constraints).
+    m: rv_discrete or callable
+        Probability distribution for the number of knapsacks (or constraints), or a
+        callable that takes the numer of items and returns the number of knapsacks
+        (e.g., lambda n: n//3).
     w: rv_continuous
         Probability distribution for the item weights.
     K: rv_continuous
@@ -65,7 +67,7 @@ class MultiKnapsackGenerator:
     def __init__(
         self,
         n: rv_frozen = randint(low=100, high=101),
-        m: rv_frozen = randint(low=30, high=31),
+        m: Union[rv_frozen, Callable] = randint(low=30, high=31),
         w: rv_frozen = randint(low=0, high=1000),
         K: rv_frozen = randint(low=500, high=501),
         u: rv_frozen = uniform(loc=0.0, scale=1.0),
@@ -73,7 +75,9 @@ class MultiKnapsackGenerator:
         round: bool = True,
     ):
         assert isinstance(n, rv_frozen), "n should be a SciPy probability distribution"
-        assert isinstance(m, rv_frozen), "m should be a SciPy probability distribution"
+        assert isinstance(m, rv_frozen) or callable(
+            m
+        ), "m should be a SciPy probability distribution or callable"
         assert isinstance(w, rv_frozen), "w should be a SciPy probability distribution"
         assert isinstance(K, rv_frozen), "K should be a SciPy probability distribution"
         assert isinstance(u, rv_frozen), "u should be a SciPy probability distribution"
@@ -92,7 +96,10 @@ class MultiKnapsackGenerator:
     def generate(self, n_samples: int) -> List[MultiKnapsackData]:
         def _sample() -> MultiKnapsackData:
             n = self.n.rvs()
-            m = self.m.rvs()
+            if callable(self.m):
+                m = self.m(n)
+            else:
+                m = self.m.rvs()
             w = np.array([self.w.rvs(n) for _ in range(m)])
             u = self.u.rvs(n)
             K = self.K.rvs()
diff --git a/miplearn/problems/pmedian.py b/miplearn/problems/pmedian.py
index c6193af..067a0ef 100644
--- a/miplearn/problems/pmedian.py
+++ b/miplearn/problems/pmedian.py
@@ -3,7 +3,7 @@
 #  Released under the modified BSD license. See COPYING.md for more details.
 
 from dataclasses import dataclass
-from typing import List, Optional, Union
+from typing import List, Optional, Union, Callable
 
 import gurobipy as gp
 import numpy as np
@@ -58,7 +58,8 @@ class PMedianGenerator:
     n
         Probability distribution for the number of customer.
     p
-        Probability distribution for the number of medians.
+        Probability distribution for the number of medians, or a callable that takes
+        the number of customers and returns the number of medians (e.g., lambda n: n//10).
     demands
         Probability distribution for the customer demands.
     capacities
@@ -70,10 +71,23 @@ class PMedianGenerator:
         x: rv_frozen = uniform(loc=0.0, scale=100.0),
         y: rv_frozen = uniform(loc=0.0, scale=100.0),
         n: rv_frozen = randint(low=100, high=101),
-        p: rv_frozen = randint(low=10, high=11),
+        p: Union[rv_frozen, Callable] = randint(low=10, high=11),
         demands: rv_frozen = uniform(loc=0, scale=20),
         capacities: rv_frozen = uniform(loc=0, scale=100),
     ):
+        assert isinstance(x, rv_frozen), "x should be a SciPy probability distribution"
+        assert isinstance(y, rv_frozen), "y should be a SciPy probability distribution"
+        assert isinstance(n, rv_frozen), "n should be a SciPy probability distribution"
+        assert isinstance(p, rv_frozen) or callable(
+            p
+        ), "p should be a SciPy probability distribution or callable"
+        assert isinstance(
+            demands, rv_frozen
+        ), "demands should be a SciPy probability distribution"
+        assert isinstance(
+            capacities, rv_frozen
+        ), "capacities should be a SciPy probability distribution"
+
         self.x = x
         self.y = y
         self.n = n
@@ -84,7 +98,10 @@ class PMedianGenerator:
     def generate(self, n_samples: int) -> List[PMedianData]:
         def _sample() -> PMedianData:
             n = self.n.rvs()
-            p = self.p.rvs()
+            if callable(self.p):
+                p = self.p(n)
+            else:
+                p = self.p.rvs()
             loc = np.array([(self.x.rvs(), self.y.rvs()) for _ in range(n)])
             distances = squareform(pdist(loc))
             demands = self.demands.rvs(n)
diff --git a/miplearn/problems/setcover.py b/miplearn/problems/setcover.py
index 7ed9ef0..52eadb4 100644
--- a/miplearn/problems/setcover.py
+++ b/miplearn/problems/setcover.py
@@ -3,7 +3,7 @@
 #  Released under the modified BSD license. See COPYING.md for more details.
 
 from dataclasses import dataclass
-from typing import List, Union
+from typing import List, Union, Callable
 
 import gurobipy as gp
 import numpy as np
@@ -34,7 +34,7 @@ class SetCoverGenerator:
     def __init__(
         self,
         n_elements: rv_frozen = randint(low=50, high=51),
-        n_sets: rv_frozen = randint(low=100, high=101),
+        n_sets: Union[rv_frozen, Callable] = randint(low=100, high=101),
         costs: rv_frozen = uniform(loc=0.0, scale=100.0),
         K: rv_frozen = uniform(loc=25.0, scale=0.0),
         density: rv_frozen = uniform(loc=0.02, scale=0.00),
@@ -45,8 +45,9 @@ class SetCoverGenerator:
         ----------
         n_elements: rv_discrete
             Probability distribution for number of elements.
-        n_sets: rv_discrete
-            Probability distribution for number of sets.
+        n_sets: rv_discrete or callable
+            Probability distribution for number of sets, or a callable that takes
+            the number of elements and returns the number of sets.
         costs: rv_continuous
             Probability distribution for base set costs.
         K: rv_continuous
@@ -57,9 +58,9 @@ class SetCoverGenerator:
         assert isinstance(
             n_elements, rv_frozen
         ), "n_elements should be a SciPy probability distribution"
-        assert isinstance(
-            n_sets, rv_frozen
-        ), "n_sets should be a SciPy probability distribution"
+        assert isinstance(n_sets, rv_frozen) or callable(
+            n_sets
+        ), "n_sets should be a SciPy probability distribution or callable"
         assert isinstance(
             costs, rv_frozen
         ), "costs should be a SciPy probability distribution"
@@ -75,8 +76,11 @@ class SetCoverGenerator:
 
     def generate(self, n_samples: int) -> List[SetCoverData]:
         def _sample() -> SetCoverData:
-            n_sets = self.n_sets.rvs()
             n_elements = self.n_elements.rvs()
+            if callable(self.n_sets):
+                n_sets = self.n_sets(n_elements)
+            else:
+                n_sets = self.n_sets.rvs()
             density = self.density.rvs()
 
             incidence_matrix = np.random.rand(n_elements, n_sets) < density
diff --git a/miplearn/problems/setpack.py b/miplearn/problems/setpack.py
index 0a8c29f..d23d46f 100644
--- a/miplearn/problems/setpack.py
+++ b/miplearn/problems/setpack.py
@@ -3,7 +3,7 @@
 #  Released under the modified BSD license. See COPYING.md for more details.
 
 from dataclasses import dataclass
-from typing import List, Union
+from typing import List, Union, Callable
 
 import gurobipy as gp
 import numpy as np
@@ -33,7 +33,7 @@ class SetPackGenerator:
     def __init__(
         self,
         n_elements: rv_frozen = randint(low=50, high=51),
-        n_sets: rv_frozen = randint(low=100, high=101),
+        n_sets: Union[rv_frozen, Callable] = randint(low=100, high=101),
         costs: rv_frozen = uniform(loc=0.0, scale=100.0),
         K: rv_frozen = uniform(loc=25.0, scale=0.0),
         density: rv_frozen = uniform(loc=0.02, scale=0.00),
@@ -44,8 +44,9 @@ class SetPackGenerator:
         ----------
         n_elements: rv_discrete
             Probability distribution for number of elements.
-        n_sets: rv_discrete
-            Probability distribution for number of sets.
+        n_sets: rv_discrete or callable
+            Probability distribution for number of sets, or a callable that takes
+            the number of elements and returns the number of sets.
         costs: rv_continuous
             Probability distribution for base set costs.
         K: rv_continuous
@@ -56,9 +57,9 @@ class SetPackGenerator:
         assert isinstance(
             n_elements, rv_frozen
         ), "n_elements should be a SciPy probability distribution"
-        assert isinstance(
-            n_sets, rv_frozen
-        ), "n_sets should be a SciPy probability distribution"
+        assert isinstance(n_sets, rv_frozen) or callable(
+            n_sets
+        ), "n_sets should be a SciPy probability distribution or callable"
         assert isinstance(
             costs, rv_frozen
         ), "costs should be a SciPy probability distribution"
diff --git a/miplearn/problems/uc.py b/miplearn/problems/uc.py
index 262b9d2..4728003 100644
--- a/miplearn/problems/uc.py
+++ b/miplearn/problems/uc.py
@@ -4,7 +4,7 @@
 
 from dataclasses import dataclass
 from math import pi
-from typing import List, Optional, Union
+from typing import List, Optional, Union, Callable
 
 import gurobipy as gp
 import numpy as np
@@ -39,7 +39,7 @@ class UnitCommitmentGenerator:
     def __init__(
         self,
         n_units: rv_frozen = randint(low=1_000, high=1_001),
-        n_periods: rv_frozen = randint(low=72, high=73),
+        n_periods: Union[rv_frozen, Callable] = randint(low=72, high=73),
         max_power: rv_frozen = uniform(loc=50, scale=450),
         min_power: rv_frozen = uniform(loc=0.5, scale=0.25),
         cost_startup: rv_frozen = uniform(loc=0, scale=10_000),
@@ -55,8 +55,9 @@ class UnitCommitmentGenerator:
         ----------
         n_units: rv_frozen
             Probability distribution for number of units.
-        n_periods: rv_frozen
-            Probability distribution for number of periods.
+        n_periods: rv_frozen or callable
+            Probability distribution for number of periods, or a callable that takes
+            the number of units and returns the number of periods.
         max_power: rv_frozen
             Probability distribution for maximum power output.
         min_power: rv_frozen
@@ -74,6 +75,12 @@ class UnitCommitmentGenerator:
         min_downtime: rv_frozen
             Probability distribution for minimum downtime.
         """
+        assert isinstance(
+            n_units, rv_frozen
+        ), "n_units should be a SciPy probability distribution"
+        assert isinstance(n_periods, rv_frozen) or callable(
+            n_periods
+        ), "n_periods should be a SciPy probability distribution or callable"
         self.n_units = n_units
         self.n_periods = n_periods
         self.max_power = max_power
@@ -87,8 +94,11 @@ class UnitCommitmentGenerator:
 
     def generate(self, n_samples: int) -> List[UnitCommitmentData]:
         def _sample() -> UnitCommitmentData:
-            T = self.n_periods.rvs()
             G = self.n_units.rvs()
+            if callable(self.n_periods):
+                T = self.n_periods(G)
+            else:
+                T = self.n_periods.rvs()
 
             # Generate unit parameteres
             max_power = self.max_power.rvs(G)
diff --git a/tests/problems/test_multiknapsack.py b/tests/problems/test_multiknapsack.py
index 9dd528b..febba08 100644
--- a/tests/problems/test_multiknapsack.py
+++ b/tests/problems/test_multiknapsack.py
@@ -32,6 +32,21 @@ def test_knapsack_generator() -> None:
     ]
 
 
+def test_knapsack_generator_callable() -> None:
+    np.random.seed(42)
+    gen = MultiKnapsackGenerator(
+        n=randint(low=10, high=11),
+        m=lambda n: n // 3,
+        w=randint(low=0, high=1000),
+        K=randint(low=500, high=501),
+        u=uniform(loc=0.0, scale=1.0),
+        alpha=uniform(loc=0.25, scale=0.0),
+    )
+    data = gen.generate(1)[0]
+    assert data.weights.shape[1] == 10
+    assert data.weights.shape[0] == 3
+
+
 def test_knapsack_model() -> None:
     data = MultiKnapsackData(
         prices=np.array([344.0, 527.0, 658.0, 519.0, 460.0]),
diff --git a/tests/problems/test_pmedian.py b/tests/problems/test_pmedian.py
index 62db9e1..d31c266 100644
--- a/tests/problems/test_pmedian.py
+++ b/tests/problems/test_pmedian.py
@@ -36,3 +36,18 @@ def test_pmedian() -> None:
     assert model.inner.numConstrs == 11
     model.optimize()
     assert round(model.inner.objVal) == 107
+
+
+def test_pmedian_generator_callable() -> None:
+    np.random.seed(42)
+    gen = PMedianGenerator(
+        x=uniform(loc=0.0, scale=100.0),
+        y=uniform(loc=0.0, scale=100.0),
+        n=randint(low=10, high=11),
+        p=lambda n: n // 5,
+        demands=uniform(loc=0, scale=20),
+        capacities=uniform(loc=0, scale=100),
+    )
+    data = gen.generate(1)
+    assert data[0].p == 2
+    assert len(data[0].demands) == 10
diff --git a/tests/problems/test_setcover.py b/tests/problems/test_setcover.py
index f3a9c5f..5c7349f 100644
--- a/tests/problems/test_setcover.py
+++ b/tests/problems/test_setcover.py
@@ -35,6 +35,22 @@ def test_set_cover_generator() -> None:
     ]
 
 
+def test_set_cover_generator_callable() -> None:
+    np.random.seed(42)
+    gen = SetCoverGenerator(
+        n_elements=randint(low=4, high=5),
+        n_sets=lambda n: n * 2,
+        costs=uniform(loc=10.0, scale=0.0),
+        density=uniform(loc=0.5, scale=0),
+        K=uniform(loc=0, scale=0),
+    )
+    data = gen.generate(1)
+    n_elements, n_sets = data[0].incidence_matrix.shape
+    assert n_elements == 4
+    assert n_sets == 8
+    assert len(data[0].costs) == 8
+
+
 def test_set_cover() -> None:
     data = SetCoverData(
         costs=np.array([5, 10, 12, 6, 8]),
diff --git a/tests/problems/test_setpack.py b/tests/problems/test_setpack.py
index 8f41810..57163c4 100644
--- a/tests/problems/test_setpack.py
+++ b/tests/problems/test_setpack.py
@@ -3,9 +3,11 @@
 #  Released under the modified BSD license. See COPYING.md for more details.
 
 import numpy as np
+from scipy.stats import randint, uniform
 
 from miplearn.problems.setpack import (
     SetPackData,
+    SetPackGenerator,
     build_setpack_model_gurobipy,
 )
 
@@ -24,3 +26,19 @@ def test_setpack() -> None:
     model = build_setpack_model_gurobipy(data)
     model.optimize()
     assert model.inner.objval == -22.0
+
+
+def test_set_pack_generator_callable() -> None:
+    np.random.seed(42)
+    gen = SetPackGenerator(
+        n_elements=randint(low=4, high=5),
+        n_sets=lambda n: n * 2,
+        costs=uniform(loc=10.0, scale=0.0),
+        density=uniform(loc=0.5, scale=0),
+        K=uniform(loc=0, scale=0),
+    )
+    data = gen.generate(1)
+    n_elements, n_sets = data[0].incidence_matrix.shape
+    assert n_elements == 4
+    assert n_sets == 8
+    assert len(data[0].costs) == 8