Implement SetPackPerturber and SetCoverPerturber

This commit is contained in:
2025-12-08 13:47:33 -06:00
parent 427bd1d806
commit 4137378bb8
4 changed files with 213 additions and 106 deletions

View File

@@ -665,9 +665,10 @@
"\n",
"Finally, the weight of set $j$ is set to $w_j + K | S_j |$, where $w_j$ and $k$ are sampled from `costs` and `K`, respectively, and where $|S_j|$ denotes the size of set $S_j$. The parameter $K$ is used to introduce some correlation between the size of the set and its weight, making the instance more challenging. Note that `K` is only sampled once for the entire instance.\n",
"\n",
"If `fix_sets=True`, then all generated instances have exactly the same sets and elements. The costs of the sets, however, are multiplied by random scaling factors sampled from the provided probability distribution `costs_jitter`.\n",
"To create multiple instances with the same incidence matrix but different costs, you can use [SetCoverPerturber][SetCoverPerturber]. This class takes an existing SetCoverData instance and generates new instances by applying randomization factors to the existing costs while keeping the incidence matrix fixed.\n",
"\n",
"[SetCoverGenerator]: ../../api/problems/#miplearn.problems.setcover.SetCoverGenerator"
"[SetCoverGenerator]: ../../api/problems/#miplearn.problems.setcover.SetCoverGenerator\n",
"[SetCoverPerturber]: ../../api/problems/#miplearn.problems.setcover.SetCoverPerturber"
]
},
{
@@ -680,7 +681,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 8,
"id": "3224845b-9afd-463e-abf4-e0e93d304859",
"metadata": {
"ExecuteTime": {
@@ -699,8 +700,8 @@
" [0 1 1 1 1 0 1 0 0 1]\n",
" [0 1 1 0 0 0 1 1 0 1]\n",
" [1 1 1 0 1 0 1 0 0 1]]\n",
"costs [1044.58 850.13 1014.5 944.83 697.9 971.87 213.49 220.98 70.23\n",
" 425.33]\n",
"costs [1021.32 811.25 1081.2 917.76 667.32 980.17 198.16 234.34 64.25\n",
" 466.75]\n",
"\n",
"Gurobi Optimizer version 13.0.0 build v13.0.0rc1 (linux64 - \"Ubuntu 22.04.5 LTS\")\n",
"\n",
@@ -708,15 +709,15 @@
"Thread count: 16 physical cores, 16 logical processors, using up to 16 threads\n",
"\n",
"Optimize a model with 5 rows, 10 columns and 28 nonzeros (Min)\n",
"Model fingerprint: 0xe5c2d4fa\n",
"Model fingerprint: 0x57e5c4ba\n",
"Model has 10 linear objective coefficients\n",
"Variable types: 0 continuous, 10 integer (10 binary)\n",
"Coefficient statistics:\n",
" Matrix range [1e+00, 1e+00]\n",
" Objective range [7e+01, 1e+03]\n",
" Objective range [6e+01, 1e+03]\n",
" Bounds range [1e+00, 1e+00]\n",
" RHS range [1e+00, 1e+00]\n",
"Found heuristic solution: objective 213.4900000\n",
"Found heuristic solution: objective 198.1600000\n",
"Presolve removed 5 rows and 10 columns\n",
"Presolve time: 0.00s\n",
"Presolve: All rows and columns removed\n",
@@ -724,10 +725,10 @@
"Explored 0 nodes (0 simplex iterations) in 0.00 seconds (0.00 work units)\n",
"Thread count was 1 (of 16 available processors)\n",
"\n",
"Solution count 1: 213.49 \n",
"Solution count 1: 198.16 \n",
"\n",
"Optimal solution found (tolerance 1.00e-04)\n",
"Best objective 2.134900000000e+02, best bound 2.134900000000e+02, gap 0.0000%\n",
"Best objective 1.981600000000e+02, best bound 1.981600000000e+02, gap 0.0000%\n",
"\n",
"User-callback calls 181, time in user-callback 0.00 sec\n"
]
@@ -736,23 +737,26 @@
"source": [
"import numpy as np\n",
"from scipy.stats import uniform, randint\n",
"from miplearn.problems.setcover import SetCoverGenerator, build_setcover_model_gurobipy\n",
"from miplearn.problems.setcover import SetCoverGenerator, SetCoverPerturber, build_setcover_model_gurobipy\n",
"\n",
"# Set random seed, to make example reproducible\n",
"np.random.seed(42)\n",
"\n",
"# Build random instances with five elements, ten sets and costs\n",
"# in the [0, 1000] interval, with a correlation factor of 25 and\n",
"# an incidence matrix with 25% density.\n",
"data = SetCoverGenerator(\n",
"# Generate a reference instance with five elements and ten sets\n",
"generator = SetCoverGenerator(\n",
" n_elements=randint(low=5, high=6),\n",
" n_sets=randint(low=10, high=11),\n",
" costs=uniform(loc=0.0, scale=1000.0),\n",
" costs_jitter=uniform(loc=0.90, scale=0.20),\n",
" density=uniform(loc=0.5, scale=0.00),\n",
" K=uniform(loc=25.0, scale=0.0),\n",
" fix_sets=True,\n",
").generate(10)\n",
")\n",
"reference_instance = generator.generate(1)[0]\n",
"\n",
"# Generate perturbed instances using the reference\n",
"perturber = SetCoverPerturber(\n",
" costs_jitter=uniform(loc=0.9, scale=0.2),\n",
")\n",
"data = perturber.perturb(reference_instance, 10)\n",
"\n",
"# Print problem data for one instance\n",
"print(\"matrix\\n\", data[0].incidence_matrix)\n",
@@ -810,15 +814,18 @@
"\n",
"The class [SetPackGenerator][SetPackGenerator] can generate random instances of this problem. It accepts exactly the same arguments, and generates instance data in exactly the same way as [SetCoverGenerator][SetCoverGenerator]. For more details, please see the documentation for that class.\n",
"\n",
"To create multiple instances with the same incidence matrix but different costs, you can use [SetPackPerturber][SetPackPerturber]. This class takes an existing SetPackData instance and generates new instances by applying randomization factors to the existing costs while keeping the incidence matrix fixed.\n",
"\n",
"[SetPackGenerator]: ../../api/problems/#miplearn.problems.setpack.SetPackGenerator\n",
"[SetCoverGenerator]: ../../api/problems/#miplearn.problems.setcover.SetCoverGenerator\n",
"[SetPackPerturber]: ../../api/problems/#miplearn.problems.setpack.SetPackPerturber\n",
"\n",
"### Example"
]
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 9,
"id": "cc797da7",
"metadata": {
"ExecuteTime": {
@@ -841,8 +848,8 @@
" [0 1 1 1 1 0 1 0 0 1]\n",
" [0 1 1 0 0 0 1 1 0 1]\n",
" [1 1 1 0 1 0 1 0 0 1]]\n",
"costs [1044.58 850.13 1014.5 944.83 697.9 971.87 213.49 220.98 70.23\n",
" 425.33]\n",
"costs [1021.32 811.25 1081.2 917.76 667.32 980.17 198.16 234.34 64.25\n",
" 466.75]\n",
"\n",
"Gurobi Optimizer version 13.0.0 build v13.0.0rc1 (linux64 - \"Ubuntu 22.04.5 LTS\")\n",
"\n",
@@ -850,15 +857,15 @@
"Thread count: 16 physical cores, 16 logical processors, using up to 16 threads\n",
"\n",
"Optimize a model with 5 rows, 10 columns and 28 nonzeros (Min)\n",
"Model fingerprint: 0x4ee91388\n",
"Model fingerprint: 0x75cd8328\n",
"Model has 10 linear objective coefficients\n",
"Variable types: 0 continuous, 10 integer (10 binary)\n",
"Coefficient statistics:\n",
" Matrix range [1e+00, 1e+00]\n",
" Objective range [7e+01, 1e+03]\n",
" Objective range [6e+01, 1e+03]\n",
" Bounds range [1e+00, 1e+00]\n",
" RHS range [1e+00, 1e+00]\n",
"Found heuristic solution: objective -1265.560000\n",
"Found heuristic solution: objective -1255.660000\n",
"Presolve removed 5 rows and 10 columns\n",
"Presolve time: 0.00s\n",
"Presolve: All rows and columns removed\n",
@@ -866,11 +873,11 @@
"Explored 0 nodes (0 simplex iterations) in 0.00 seconds (0.00 work units)\n",
"Thread count was 1 (of 16 available processors)\n",
"\n",
"Solution count 2: -1986.37 -1265.56 \n",
"No other solutions better than -1986.37\n",
"Solution count 2: -2061.37 -1255.66 \n",
"No other solutions better than -2061.37\n",
"\n",
"Optimal solution found (tolerance 1.00e-04)\n",
"Best objective -1.986370000000e+03, best bound -1.986370000000e+03, gap 0.0000%\n",
"Best objective -2.061370000000e+03, best bound -2.061370000000e+03, gap 0.0000%\n",
"\n",
"User-callback calls 242, time in user-callback 0.00 sec\n"
]
@@ -879,23 +886,26 @@
"source": [
"import numpy as np\n",
"from scipy.stats import uniform, randint\n",
"from miplearn.problems.setpack import SetPackGenerator, build_setpack_model_gurobipy\n",
"from miplearn.problems.setpack import SetPackGenerator, SetPackPerturber, build_setpack_model_gurobipy\n",
"\n",
"# Set random seed, to make example reproducible\n",
"np.random.seed(42)\n",
"\n",
"# Build random instances with five elements, ten sets and costs\n",
"# in the [0, 1000] interval, with a correlation factor of 25 and\n",
"# an incidence matrix with 25% density.\n",
"data = SetPackGenerator(\n",
"# Generate a reference instance with five elements and ten sets\n",
"generator = SetPackGenerator(\n",
" n_elements=randint(low=5, high=6),\n",
" n_sets=randint(low=10, high=11),\n",
" costs=uniform(loc=0.0, scale=1000.0),\n",
" costs_jitter=uniform(loc=0.90, scale=0.20),\n",
" density=uniform(loc=0.5, scale=0.00),\n",
" K=uniform(loc=25.0, scale=0.0),\n",
" fix_sets=True,\n",
").generate(10)\n",
")\n",
"reference_instance = generator.generate(1)[0]\n",
"\n",
"# Generate perturbed instances using the reference\n",
"perturber = SetPackPerturber(\n",
" costs_jitter=uniform(loc=0.9, scale=0.2),\n",
")\n",
"data = perturber.perturb(reference_instance, 10)\n",
"\n",
"# Print problem data for one instance\n",
"print(\"matrix\\n\", data[0].incidence_matrix)\n",

View File

@@ -24,29 +24,57 @@ class SetCoverData:
class SetCoverGenerator:
"""Random instance generator for the Set Cover Problem.
Generates instances by creating a new random incidence matrix for each
instance, where the number of elements, sets, density, and costs are sampled
from user-provided probability distributions.
"""
def __init__(
self,
n_elements: rv_frozen = randint(low=50, high=51),
n_sets: rv_frozen = randint(low=100, high=101),
costs: rv_frozen = uniform(loc=0.0, scale=100.0),
costs_jitter: rv_frozen = uniform(loc=-5.0, scale=10.0),
K: rv_frozen = uniform(loc=25.0, scale=0.0),
density: rv_frozen = uniform(loc=0.02, scale=0.00),
fix_sets: bool = True,
):
"""Initialize the problem generator.
Parameters
----------
n_elements: rv_discrete
Probability distribution for number of elements.
n_sets: rv_discrete
Probability distribution for number of sets.
costs: rv_continuous
Probability distribution for base set costs.
K: rv_continuous
Probability distribution for cost scaling factor based on set size.
density: rv_continuous
Probability distribution for incidence matrix density.
"""
assert isinstance(
n_elements, rv_frozen
), "n_elements should be a SciPy probability distribution"
assert isinstance(
n_sets, rv_frozen
), "n_sets should be a SciPy probability distribution"
assert isinstance(
costs, rv_frozen
), "costs should be a SciPy probability distribution"
assert isinstance(K, rv_frozen), "K should be a SciPy probability distribution"
assert isinstance(
density, rv_frozen
), "density should be a SciPy probability distribution"
self.n_elements = n_elements
self.n_sets = n_sets
self.costs = costs
self.costs_jitter = costs_jitter
self.density = density
self.K = K
self.fix_sets = fix_sets
self.fixed_costs = None
self.fixed_matrix = None
def generate(self, n_samples: int) -> List[SetCoverData]:
def _sample() -> SetCoverData:
if self.fixed_matrix is None:
n_sets = self.n_sets.rvs()
n_elements = self.n_elements.rvs()
density = self.density.rvs()
@@ -64,16 +92,7 @@ class SetCoverGenerator:
if incidence_matrix[:, i].sum() == 0:
incidence_matrix[randint(low=0, high=n_elements).rvs(), i] = 1
costs = self.costs.rvs(n_sets) + self.K.rvs() * incidence_matrix.sum(
axis=0
)
if self.fix_sets:
self.fixed_matrix = incidence_matrix
self.fixed_costs = costs
else:
incidence_matrix = self.fixed_matrix
(_, n_sets) = incidence_matrix.shape
costs = self.fixed_costs * self.costs_jitter.rvs(n_sets)
costs = self.costs.rvs(n_sets) + self.K.rvs() * incidence_matrix.sum(axis=0)
return SetCoverData(
costs=costs.round(2),
incidence_matrix=incidence_matrix,
@@ -82,6 +101,47 @@ class SetCoverGenerator:
return [_sample() for _ in range(n_samples)]
class SetCoverPerturber:
"""Perturbation generator for existing Set Cover instances.
Takes an existing SetCoverData instance and generates new instances
by applying randomization factors to the existing costs while keeping the
incidence matrix fixed.
"""
def __init__(
self,
costs_jitter: rv_frozen = uniform(loc=0.9, scale=0.2),
):
"""Initialize the perturbation generator.
Parameters
----------
costs_jitter: rv_continuous
Probability distribution for randomization factors applied to set costs.
"""
assert isinstance(
costs_jitter, rv_frozen
), "costs_jitter should be a SciPy probability distribution"
self.costs_jitter = costs_jitter
def perturb(
self,
instance: SetCoverData,
n_samples: int,
) -> List[SetCoverData]:
def _sample() -> SetCoverData:
(_, n_sets) = instance.incidence_matrix.shape
jitter_factors = self.costs_jitter.rvs(n_sets)
costs = np.round(instance.costs * jitter_factors, 2)
return SetCoverData(
costs=costs,
incidence_matrix=instance.incidence_matrix,
)
return [_sample() for _ in range(n_samples)]
def build_setcover_model_gurobipy(data: Union[str, SetCoverData]) -> GurobiModel:
data = _read_setcover_data(data)
(n_elements, n_sets) = data.incidence_matrix.shape

View File

@@ -11,7 +11,7 @@ from gurobipy import GRB
from scipy.stats import uniform, randint
from scipy.stats.distributions import rv_frozen
from .setcover import SetCoverGenerator
from .setcover import SetCoverGenerator, SetCoverPerturber
from miplearn.solvers.gurobi import GurobiModel
from ..io import read_pkl_gz
@@ -23,24 +23,55 @@ class SetPackData:
class SetPackGenerator:
"""Random instance generator for the Set Packing Problem.
Generates instances by creating a new random incidence matrix for each
instance, where the number of elements, sets, density, and costs are sampled
from user-provided probability distributions.
"""
def __init__(
self,
n_elements: rv_frozen = randint(low=50, high=51),
n_sets: rv_frozen = randint(low=100, high=101),
costs: rv_frozen = uniform(loc=0.0, scale=100.0),
costs_jitter: rv_frozen = uniform(loc=-5.0, scale=10.0),
K: rv_frozen = uniform(loc=25.0, scale=0.0),
density: rv_frozen = uniform(loc=0.02, scale=0.00),
fix_sets: bool = True,
) -> None:
"""Initialize the problem generator.
Parameters
----------
n_elements: rv_discrete
Probability distribution for number of elements.
n_sets: rv_discrete
Probability distribution for number of sets.
costs: rv_continuous
Probability distribution for base set costs.
K: rv_continuous
Probability distribution for cost scaling factor based on set size.
density: rv_continuous
Probability distribution for incidence matrix density.
"""
assert isinstance(
n_elements, rv_frozen
), "n_elements should be a SciPy probability distribution"
assert isinstance(
n_sets, rv_frozen
), "n_sets should be a SciPy probability distribution"
assert isinstance(
costs, rv_frozen
), "costs should be a SciPy probability distribution"
assert isinstance(K, rv_frozen), "K should be a SciPy probability distribution"
assert isinstance(
density, rv_frozen
), "density should be a SciPy probability distribution"
self.gen = SetCoverGenerator(
n_elements=n_elements,
n_sets=n_sets,
costs=costs,
costs_jitter=costs_jitter,
K=K,
density=density,
fix_sets=fix_sets,
)
def generate(self, n_samples: int) -> List[SetPackData]:
@@ -53,6 +84,47 @@ class SetPackGenerator:
]
class SetPackPerturber:
"""Perturbation generator for existing Set Packing instances.
Takes an existing SetPackData instance and generates new instances
by applying randomization factors to the existing costs while keeping the
incidence matrix fixed.
"""
def __init__(
self,
costs_jitter: rv_frozen = uniform(loc=0.9, scale=0.2),
):
"""Initialize the perturbation generator.
Parameters
----------
costs_jitter: rv_continuous
Probability distribution for randomization factors applied to set costs.
"""
assert isinstance(
costs_jitter, rv_frozen
), "costs_jitter should be a SciPy probability distribution"
self.costs_jitter = costs_jitter
def perturb(
self,
instance: SetPackData,
n_samples: int,
) -> List[SetPackData]:
def _sample() -> SetPackData:
(_, n_sets) = instance.incidence_matrix.shape
jitter_factors = self.costs_jitter.rvs(n_sets)
costs = np.round(instance.costs * jitter_factors, 2)
return SetPackData(
costs=costs,
incidence_matrix=instance.incidence_matrix,
)
return [_sample() for _ in range(n_samples)]
def build_setpack_model_gurobipy(data: Union[str, SetPackData]) -> GurobiModel:
if isinstance(data, str):
data = read_pkl_gz(data)

View File

@@ -23,51 +23,16 @@ def test_set_cover_generator() -> None:
n_elements=randint(low=3, high=4),
n_sets=randint(low=5, high=6),
costs=uniform(loc=0.0, scale=100.0),
costs_jitter=uniform(loc=0.95, scale=0.10),
density=uniform(loc=0.5, scale=0),
K=uniform(loc=25, scale=0),
fix_sets=False,
)
data = gen.generate(2)
data = gen.generate(1)
assert data[0].costs.round(1).tolist() == [136.8, 86.2, 25.7, 27.3, 102.5]
assert data[0].incidence_matrix.tolist() == [
[1, 0, 1, 0, 1],
[1, 1, 0, 0, 0],
[1, 0, 0, 1, 1],
]
assert data[1].costs.round(1).tolist() == [63.5, 76.6, 48.1, 74.1, 93.3]
assert data[1].incidence_matrix.tolist() == [
[1, 1, 0, 1, 1],
[0, 1, 0, 1, 0],
[0, 1, 1, 0, 0],
]
def test_set_cover_generator_with_fixed_sets() -> None:
np.random.seed(42)
gen = SetCoverGenerator(
n_elements=randint(low=3, high=4),
n_sets=randint(low=5, high=6),
costs=uniform(loc=0.0, scale=100.0),
costs_jitter=uniform(loc=0.95, scale=0.10),
density=uniform(loc=0.5, scale=0.00),
fix_sets=True,
)
data = gen.generate(3)
assert data[0].costs.tolist() == [136.75, 86.17, 25.71, 27.31, 102.48]
assert data[1].costs.tolist() == [135.38, 82.26, 26.92, 26.58, 98.28]
assert data[2].costs.tolist() == [138.37, 85.15, 26.95, 27.22, 106.17]
print(data[0].incidence_matrix)
for i in range(3):
assert data[i].incidence_matrix.tolist() == [
[1, 0, 1, 0, 1],
[1, 1, 0, 0, 0],
[1, 0, 0, 1, 1],
]
def test_set_cover() -> None: