diff --git a/docs/guide/problems.ipynb b/docs/guide/problems.ipynb index e840bb7..ff9935f 100644 --- a/docs/guide/problems.ipynb +++ b/docs/guide/problems.ipynb @@ -665,9 +665,10 @@ "\n", "Finally, the weight of set $j$ is set to $w_j + K | S_j |$, where $w_j$ and $k$ are sampled from `costs` and `K`, respectively, and where $|S_j|$ denotes the size of set $S_j$. The parameter $K$ is used to introduce some correlation between the size of the set and its weight, making the instance more challenging. Note that `K` is only sampled once for the entire instance.\n", "\n", - "If `fix_sets=True`, then all generated instances have exactly the same sets and elements. The costs of the sets, however, are multiplied by random scaling factors sampled from the provided probability distribution `costs_jitter`.\n", + "To create multiple instances with the same incidence matrix but different costs, you can use [SetCoverPerturber][SetCoverPerturber]. This class takes an existing SetCoverData instance and generates new instances by applying randomization factors to the existing costs while keeping the incidence matrix fixed.\n", "\n", - "[SetCoverGenerator]: ../../api/problems/#miplearn.problems.setcover.SetCoverGenerator" + "[SetCoverGenerator]: ../../api/problems/#miplearn.problems.setcover.SetCoverGenerator\n", + "[SetCoverPerturber]: ../../api/problems/#miplearn.problems.setcover.SetCoverPerturber" ] }, { @@ -680,7 +681,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 8, "id": "3224845b-9afd-463e-abf4-e0e93d304859", "metadata": { "ExecuteTime": { @@ -699,8 +700,8 @@ " [0 1 1 1 1 0 1 0 0 1]\n", " [0 1 1 0 0 0 1 1 0 1]\n", " [1 1 1 0 1 0 1 0 0 1]]\n", - "costs [1044.58 850.13 1014.5 944.83 697.9 971.87 213.49 220.98 70.23\n", - " 425.33]\n", + "costs [1021.32 811.25 1081.2 917.76 667.32 980.17 198.16 234.34 64.25\n", + " 466.75]\n", "\n", "Gurobi Optimizer version 13.0.0 build v13.0.0rc1 (linux64 - \"Ubuntu 22.04.5 LTS\")\n", "\n", @@ -708,15 +709,15 @@ "Thread count: 16 physical cores, 16 logical processors, using up to 16 threads\n", "\n", "Optimize a model with 5 rows, 10 columns and 28 nonzeros (Min)\n", - "Model fingerprint: 0xe5c2d4fa\n", + "Model fingerprint: 0x57e5c4ba\n", "Model has 10 linear objective coefficients\n", "Variable types: 0 continuous, 10 integer (10 binary)\n", "Coefficient statistics:\n", " Matrix range [1e+00, 1e+00]\n", - " Objective range [7e+01, 1e+03]\n", + " Objective range [6e+01, 1e+03]\n", " Bounds range [1e+00, 1e+00]\n", " RHS range [1e+00, 1e+00]\n", - "Found heuristic solution: objective 213.4900000\n", + "Found heuristic solution: objective 198.1600000\n", "Presolve removed 5 rows and 10 columns\n", "Presolve time: 0.00s\n", "Presolve: All rows and columns removed\n", @@ -724,10 +725,10 @@ "Explored 0 nodes (0 simplex iterations) in 0.00 seconds (0.00 work units)\n", "Thread count was 1 (of 16 available processors)\n", "\n", - "Solution count 1: 213.49 \n", + "Solution count 1: 198.16 \n", "\n", "Optimal solution found (tolerance 1.00e-04)\n", - "Best objective 2.134900000000e+02, best bound 2.134900000000e+02, gap 0.0000%\n", + "Best objective 1.981600000000e+02, best bound 1.981600000000e+02, gap 0.0000%\n", "\n", "User-callback calls 181, time in user-callback 0.00 sec\n" ] @@ -736,23 +737,26 @@ "source": [ "import numpy as np\n", "from scipy.stats import uniform, randint\n", - "from miplearn.problems.setcover import SetCoverGenerator, build_setcover_model_gurobipy\n", + "from miplearn.problems.setcover import SetCoverGenerator, SetCoverPerturber, build_setcover_model_gurobipy\n", "\n", "# Set random seed, to make example reproducible\n", "np.random.seed(42)\n", "\n", - "# Build random instances with five elements, ten sets and costs\n", - "# in the [0, 1000] interval, with a correlation factor of 25 and\n", - "# an incidence matrix with 25% density.\n", - "data = SetCoverGenerator(\n", + "# Generate a reference instance with five elements and ten sets\n", + "generator = SetCoverGenerator(\n", " n_elements=randint(low=5, high=6),\n", " n_sets=randint(low=10, high=11),\n", " costs=uniform(loc=0.0, scale=1000.0),\n", - " costs_jitter=uniform(loc=0.90, scale=0.20),\n", " density=uniform(loc=0.5, scale=0.00),\n", " K=uniform(loc=25.0, scale=0.0),\n", - " fix_sets=True,\n", - ").generate(10)\n", + ")\n", + "reference_instance = generator.generate(1)[0]\n", + "\n", + "# Generate perturbed instances using the reference\n", + "perturber = SetCoverPerturber(\n", + " costs_jitter=uniform(loc=0.9, scale=0.2),\n", + ")\n", + "data = perturber.perturb(reference_instance, 10)\n", "\n", "# Print problem data for one instance\n", "print(\"matrix\\n\", data[0].incidence_matrix)\n", @@ -810,15 +814,18 @@ "\n", "The class [SetPackGenerator][SetPackGenerator] can generate random instances of this problem. It accepts exactly the same arguments, and generates instance data in exactly the same way as [SetCoverGenerator][SetCoverGenerator]. For more details, please see the documentation for that class.\n", "\n", + "To create multiple instances with the same incidence matrix but different costs, you can use [SetPackPerturber][SetPackPerturber]. This class takes an existing SetPackData instance and generates new instances by applying randomization factors to the existing costs while keeping the incidence matrix fixed.\n", + "\n", "[SetPackGenerator]: ../../api/problems/#miplearn.problems.setpack.SetPackGenerator\n", "[SetCoverGenerator]: ../../api/problems/#miplearn.problems.setcover.SetCoverGenerator\n", + "[SetPackPerturber]: ../../api/problems/#miplearn.problems.setpack.SetPackPerturber\n", "\n", "### Example" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 9, "id": "cc797da7", "metadata": { "ExecuteTime": { @@ -841,8 +848,8 @@ " [0 1 1 1 1 0 1 0 0 1]\n", " [0 1 1 0 0 0 1 1 0 1]\n", " [1 1 1 0 1 0 1 0 0 1]]\n", - "costs [1044.58 850.13 1014.5 944.83 697.9 971.87 213.49 220.98 70.23\n", - " 425.33]\n", + "costs [1021.32 811.25 1081.2 917.76 667.32 980.17 198.16 234.34 64.25\n", + " 466.75]\n", "\n", "Gurobi Optimizer version 13.0.0 build v13.0.0rc1 (linux64 - \"Ubuntu 22.04.5 LTS\")\n", "\n", @@ -850,15 +857,15 @@ "Thread count: 16 physical cores, 16 logical processors, using up to 16 threads\n", "\n", "Optimize a model with 5 rows, 10 columns and 28 nonzeros (Min)\n", - "Model fingerprint: 0x4ee91388\n", + "Model fingerprint: 0x75cd8328\n", "Model has 10 linear objective coefficients\n", "Variable types: 0 continuous, 10 integer (10 binary)\n", "Coefficient statistics:\n", " Matrix range [1e+00, 1e+00]\n", - " Objective range [7e+01, 1e+03]\n", + " Objective range [6e+01, 1e+03]\n", " Bounds range [1e+00, 1e+00]\n", " RHS range [1e+00, 1e+00]\n", - "Found heuristic solution: objective -1265.560000\n", + "Found heuristic solution: objective -1255.660000\n", "Presolve removed 5 rows and 10 columns\n", "Presolve time: 0.00s\n", "Presolve: All rows and columns removed\n", @@ -866,11 +873,11 @@ "Explored 0 nodes (0 simplex iterations) in 0.00 seconds (0.00 work units)\n", "Thread count was 1 (of 16 available processors)\n", "\n", - "Solution count 2: -1986.37 -1265.56 \n", - "No other solutions better than -1986.37\n", + "Solution count 2: -2061.37 -1255.66 \n", + "No other solutions better than -2061.37\n", "\n", "Optimal solution found (tolerance 1.00e-04)\n", - "Best objective -1.986370000000e+03, best bound -1.986370000000e+03, gap 0.0000%\n", + "Best objective -2.061370000000e+03, best bound -2.061370000000e+03, gap 0.0000%\n", "\n", "User-callback calls 242, time in user-callback 0.00 sec\n" ] @@ -879,23 +886,26 @@ "source": [ "import numpy as np\n", "from scipy.stats import uniform, randint\n", - "from miplearn.problems.setpack import SetPackGenerator, build_setpack_model_gurobipy\n", + "from miplearn.problems.setpack import SetPackGenerator, SetPackPerturber, build_setpack_model_gurobipy\n", "\n", "# Set random seed, to make example reproducible\n", "np.random.seed(42)\n", "\n", - "# Build random instances with five elements, ten sets and costs\n", - "# in the [0, 1000] interval, with a correlation factor of 25 and\n", - "# an incidence matrix with 25% density.\n", - "data = SetPackGenerator(\n", + "# Generate a reference instance with five elements and ten sets\n", + "generator = SetPackGenerator(\n", " n_elements=randint(low=5, high=6),\n", " n_sets=randint(low=10, high=11),\n", " costs=uniform(loc=0.0, scale=1000.0),\n", - " costs_jitter=uniform(loc=0.90, scale=0.20),\n", " density=uniform(loc=0.5, scale=0.00),\n", " K=uniform(loc=25.0, scale=0.0),\n", - " fix_sets=True,\n", - ").generate(10)\n", + ")\n", + "reference_instance = generator.generate(1)[0]\n", + "\n", + "# Generate perturbed instances using the reference\n", + "perturber = SetPackPerturber(\n", + " costs_jitter=uniform(loc=0.9, scale=0.2),\n", + ")\n", + "data = perturber.perturb(reference_instance, 10)\n", "\n", "# Print problem data for one instance\n", "print(\"matrix\\n\", data[0].incidence_matrix)\n", diff --git a/miplearn/problems/setcover.py b/miplearn/problems/setcover.py index 6c15951..7ed9ef0 100644 --- a/miplearn/problems/setcover.py +++ b/miplearn/problems/setcover.py @@ -24,56 +24,75 @@ class SetCoverData: class SetCoverGenerator: + """Random instance generator for the Set Cover Problem. + + Generates instances by creating a new random incidence matrix for each + instance, where the number of elements, sets, density, and costs are sampled + from user-provided probability distributions. + """ + def __init__( self, n_elements: rv_frozen = randint(low=50, high=51), n_sets: rv_frozen = randint(low=100, high=101), costs: rv_frozen = uniform(loc=0.0, scale=100.0), - costs_jitter: rv_frozen = uniform(loc=-5.0, scale=10.0), K: rv_frozen = uniform(loc=25.0, scale=0.0), density: rv_frozen = uniform(loc=0.02, scale=0.00), - fix_sets: bool = True, ): + """Initialize the problem generator. + + Parameters + ---------- + n_elements: rv_discrete + Probability distribution for number of elements. + n_sets: rv_discrete + Probability distribution for number of sets. + costs: rv_continuous + Probability distribution for base set costs. + K: rv_continuous + Probability distribution for cost scaling factor based on set size. + density: rv_continuous + Probability distribution for incidence matrix density. + """ + assert isinstance( + n_elements, rv_frozen + ), "n_elements should be a SciPy probability distribution" + assert isinstance( + n_sets, rv_frozen + ), "n_sets should be a SciPy probability distribution" + assert isinstance( + costs, rv_frozen + ), "costs should be a SciPy probability distribution" + assert isinstance(K, rv_frozen), "K should be a SciPy probability distribution" + assert isinstance( + density, rv_frozen + ), "density should be a SciPy probability distribution" self.n_elements = n_elements self.n_sets = n_sets self.costs = costs - self.costs_jitter = costs_jitter self.density = density self.K = K - self.fix_sets = fix_sets - self.fixed_costs = None - self.fixed_matrix = None def generate(self, n_samples: int) -> List[SetCoverData]: def _sample() -> SetCoverData: - if self.fixed_matrix is None: - n_sets = self.n_sets.rvs() - n_elements = self.n_elements.rvs() - density = self.density.rvs() + n_sets = self.n_sets.rvs() + n_elements = self.n_elements.rvs() + density = self.density.rvs() - incidence_matrix = np.random.rand(n_elements, n_sets) < density - incidence_matrix = incidence_matrix.astype(int) + incidence_matrix = np.random.rand(n_elements, n_sets) < density + incidence_matrix = incidence_matrix.astype(int) - # Ensure each element belongs to at least one set - for j in range(n_elements): - if incidence_matrix[j, :].sum() == 0: - incidence_matrix[j, randint(low=0, high=n_sets).rvs()] = 1 + # Ensure each element belongs to at least one set + for j in range(n_elements): + if incidence_matrix[j, :].sum() == 0: + incidence_matrix[j, randint(low=0, high=n_sets).rvs()] = 1 - # Ensure each set contains at least one element - for i in range(n_sets): - if incidence_matrix[:, i].sum() == 0: - incidence_matrix[randint(low=0, high=n_elements).rvs(), i] = 1 + # Ensure each set contains at least one element + for i in range(n_sets): + if incidence_matrix[:, i].sum() == 0: + incidence_matrix[randint(low=0, high=n_elements).rvs(), i] = 1 - costs = self.costs.rvs(n_sets) + self.K.rvs() * incidence_matrix.sum( - axis=0 - ) - if self.fix_sets: - self.fixed_matrix = incidence_matrix - self.fixed_costs = costs - else: - incidence_matrix = self.fixed_matrix - (_, n_sets) = incidence_matrix.shape - costs = self.fixed_costs * self.costs_jitter.rvs(n_sets) + costs = self.costs.rvs(n_sets) + self.K.rvs() * incidence_matrix.sum(axis=0) return SetCoverData( costs=costs.round(2), incidence_matrix=incidence_matrix, @@ -82,6 +101,47 @@ class SetCoverGenerator: return [_sample() for _ in range(n_samples)] +class SetCoverPerturber: + """Perturbation generator for existing Set Cover instances. + + Takes an existing SetCoverData instance and generates new instances + by applying randomization factors to the existing costs while keeping the + incidence matrix fixed. + """ + + def __init__( + self, + costs_jitter: rv_frozen = uniform(loc=0.9, scale=0.2), + ): + """Initialize the perturbation generator. + + Parameters + ---------- + costs_jitter: rv_continuous + Probability distribution for randomization factors applied to set costs. + """ + assert isinstance( + costs_jitter, rv_frozen + ), "costs_jitter should be a SciPy probability distribution" + self.costs_jitter = costs_jitter + + def perturb( + self, + instance: SetCoverData, + n_samples: int, + ) -> List[SetCoverData]: + def _sample() -> SetCoverData: + (_, n_sets) = instance.incidence_matrix.shape + jitter_factors = self.costs_jitter.rvs(n_sets) + costs = np.round(instance.costs * jitter_factors, 2) + return SetCoverData( + costs=costs, + incidence_matrix=instance.incidence_matrix, + ) + + return [_sample() for _ in range(n_samples)] + + def build_setcover_model_gurobipy(data: Union[str, SetCoverData]) -> GurobiModel: data = _read_setcover_data(data) (n_elements, n_sets) = data.incidence_matrix.shape diff --git a/miplearn/problems/setpack.py b/miplearn/problems/setpack.py index b9195c4..0a8c29f 100644 --- a/miplearn/problems/setpack.py +++ b/miplearn/problems/setpack.py @@ -11,7 +11,7 @@ from gurobipy import GRB from scipy.stats import uniform, randint from scipy.stats.distributions import rv_frozen -from .setcover import SetCoverGenerator +from .setcover import SetCoverGenerator, SetCoverPerturber from miplearn.solvers.gurobi import GurobiModel from ..io import read_pkl_gz @@ -23,24 +23,55 @@ class SetPackData: class SetPackGenerator: + """Random instance generator for the Set Packing Problem. + + Generates instances by creating a new random incidence matrix for each + instance, where the number of elements, sets, density, and costs are sampled + from user-provided probability distributions. + """ + def __init__( self, n_elements: rv_frozen = randint(low=50, high=51), n_sets: rv_frozen = randint(low=100, high=101), costs: rv_frozen = uniform(loc=0.0, scale=100.0), - costs_jitter: rv_frozen = uniform(loc=-5.0, scale=10.0), K: rv_frozen = uniform(loc=25.0, scale=0.0), density: rv_frozen = uniform(loc=0.02, scale=0.00), - fix_sets: bool = True, ) -> None: + """Initialize the problem generator. + + Parameters + ---------- + n_elements: rv_discrete + Probability distribution for number of elements. + n_sets: rv_discrete + Probability distribution for number of sets. + costs: rv_continuous + Probability distribution for base set costs. + K: rv_continuous + Probability distribution for cost scaling factor based on set size. + density: rv_continuous + Probability distribution for incidence matrix density. + """ + assert isinstance( + n_elements, rv_frozen + ), "n_elements should be a SciPy probability distribution" + assert isinstance( + n_sets, rv_frozen + ), "n_sets should be a SciPy probability distribution" + assert isinstance( + costs, rv_frozen + ), "costs should be a SciPy probability distribution" + assert isinstance(K, rv_frozen), "K should be a SciPy probability distribution" + assert isinstance( + density, rv_frozen + ), "density should be a SciPy probability distribution" self.gen = SetCoverGenerator( n_elements=n_elements, n_sets=n_sets, costs=costs, - costs_jitter=costs_jitter, K=K, density=density, - fix_sets=fix_sets, ) def generate(self, n_samples: int) -> List[SetPackData]: @@ -53,6 +84,47 @@ class SetPackGenerator: ] +class SetPackPerturber: + """Perturbation generator for existing Set Packing instances. + + Takes an existing SetPackData instance and generates new instances + by applying randomization factors to the existing costs while keeping the + incidence matrix fixed. + """ + + def __init__( + self, + costs_jitter: rv_frozen = uniform(loc=0.9, scale=0.2), + ): + """Initialize the perturbation generator. + + Parameters + ---------- + costs_jitter: rv_continuous + Probability distribution for randomization factors applied to set costs. + """ + assert isinstance( + costs_jitter, rv_frozen + ), "costs_jitter should be a SciPy probability distribution" + self.costs_jitter = costs_jitter + + def perturb( + self, + instance: SetPackData, + n_samples: int, + ) -> List[SetPackData]: + def _sample() -> SetPackData: + (_, n_sets) = instance.incidence_matrix.shape + jitter_factors = self.costs_jitter.rvs(n_sets) + costs = np.round(instance.costs * jitter_factors, 2) + return SetPackData( + costs=costs, + incidence_matrix=instance.incidence_matrix, + ) + + return [_sample() for _ in range(n_samples)] + + def build_setpack_model_gurobipy(data: Union[str, SetPackData]) -> GurobiModel: if isinstance(data, str): data = read_pkl_gz(data) diff --git a/tests/problems/test_setcover.py b/tests/problems/test_setcover.py index a5a1211..f3a9c5f 100644 --- a/tests/problems/test_setcover.py +++ b/tests/problems/test_setcover.py @@ -23,51 +23,16 @@ def test_set_cover_generator() -> None: n_elements=randint(low=3, high=4), n_sets=randint(low=5, high=6), costs=uniform(loc=0.0, scale=100.0), - costs_jitter=uniform(loc=0.95, scale=0.10), density=uniform(loc=0.5, scale=0), K=uniform(loc=25, scale=0), - fix_sets=False, ) - data = gen.generate(2) - + data = gen.generate(1) assert data[0].costs.round(1).tolist() == [136.8, 86.2, 25.7, 27.3, 102.5] assert data[0].incidence_matrix.tolist() == [ [1, 0, 1, 0, 1], [1, 1, 0, 0, 0], [1, 0, 0, 1, 1], ] - assert data[1].costs.round(1).tolist() == [63.5, 76.6, 48.1, 74.1, 93.3] - assert data[1].incidence_matrix.tolist() == [ - [1, 1, 0, 1, 1], - [0, 1, 0, 1, 0], - [0, 1, 1, 0, 0], - ] - - -def test_set_cover_generator_with_fixed_sets() -> None: - np.random.seed(42) - gen = SetCoverGenerator( - n_elements=randint(low=3, high=4), - n_sets=randint(low=5, high=6), - costs=uniform(loc=0.0, scale=100.0), - costs_jitter=uniform(loc=0.95, scale=0.10), - density=uniform(loc=0.5, scale=0.00), - fix_sets=True, - ) - data = gen.generate(3) - - assert data[0].costs.tolist() == [136.75, 86.17, 25.71, 27.31, 102.48] - assert data[1].costs.tolist() == [135.38, 82.26, 26.92, 26.58, 98.28] - assert data[2].costs.tolist() == [138.37, 85.15, 26.95, 27.22, 106.17] - - print(data[0].incidence_matrix) - - for i in range(3): - assert data[i].incidence_matrix.tolist() == [ - [1, 0, 1, 0, 1], - [1, 1, 0, 0, 0], - [1, 0, 0, 1, 1], - ] def test_set_cover() -> None: