diff --git a/Makefile b/Makefile index 1c8ca2d..b6204d6 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,7 @@ PYTEST_ARGS := -W ignore::DeprecationWarning -vv +all: docs test + docs: mkdocs build diff --git a/docs/problems.md b/docs/problems.md index 3d33491..ef6040e 100644 --- a/docs/problems.md +++ b/docs/problems.md @@ -26,7 +26,7 @@ All experiments presented here were performed on a Linux server (Ubuntu Linux 18 Given a simple undirected graph $G=(V,E)$ and weights $w \in \mathbb{R}^V$, the problem is to find a stable set $S \subseteq V$ that maximizes $ \sum_{v \in V} w_v$. We recall that a subset $S \subseteq V$ is a *stable set* if no two vertices of $S$ are adjacent. This is one of Karp's 21 NP-complete problems. -### Random instance generators +### Random instance generator The class `MaxWeightStableSetGenerator` can be used to generate random instances of this problem, with user-specified probability distributions. When the constructor parameter `fix_graph=True` is provided, one random Erdős-Rényi graph $G_{n,p}$ is generated during the constructor, where $n$ and $p$ are sampled from user-provided probability distributions `n` and `p`. To generate each instance, the generator independently samples each $w_v$ from the user-provided probability distribution `w`. When `fix_graph=False`, a new random graph is generated for each instance, while the remaining parameters are sampled in the same way. @@ -50,3 +50,42 @@ MaxWeightStableSetGenerator(w=uniform(loc=100., scale=50.), #### Challenge A ![alt](figures/mwss.png) + +## Multidimensional 0-1 Knapsack Problem + +### Problem definition + +Given a set of $n$ items and $m$ types of resources (also called *knapsacks*), the problem is to find a subset of items that maximizes profit without consuming more resources than it is available. More precisely, the problem is: + +\begin{align*} + \text{maximize} + & \sum_{j=1}^n p_j x_j + \\ + \text{subject to} + & \sum_{j=1}^n w_{ij} x_j \leq b_i + & \forall i=1,\ldots,m \\ + & x_j \in \{0,1\} + & \forall j=1,\ldots,n +\end{align*} + +### Random instance generator + +The class `MultiKnapsackGenerator` can be used to generate random instances of this problem. The number of items $n$ and knapsacks $m$ are sampled from the user-provided probability distributions `n` and `m`. The weights $w_{ij}$ are sampled independently from the provided distribution `w`. The capacity of knapsack $i$ is set to + +$$ + \alpha_i \sum_{j=1}^n w_{ij} +$$ + +where $\alpha_i$, the tightness ratio, is sampled from the provided probability +distribution `alpha`. To make the instances more challenging, the costs of the items +are linearly correlated to their average weights. More specifically, the weight of each +item $j$ is set to: + +$$ + \sum_{i=1}^m \frac{w_{ij}}{m} + K u_j, +$$ + +where $K$, the correlation coefficient, and $u_j$, the correlation multiplier, are sampled +from the provided probability distributions `K` and `u`. Note that $K$ is only sample once for the entire instance. + +This random generation procedure was developed by A. Freville and G. Plateau (*An efficient preprocessing procedure for the multidimensional knapsack problem*, Discrete Applied Mathematics 49 (1994) 189–212). \ No newline at end of file diff --git a/miplearn/problems/__init__.py b/miplearn/problems/__init__.py index e69de29..be4c1c5 100644 --- a/miplearn/problems/__init__.py +++ b/miplearn/problems/__init__.py @@ -0,0 +1,3 @@ +# MIPLearn, an extensible framework for Learning-Enhanced Mixed-Integer Optimization +# Copyright (C) 2019-2020 Argonne National Laboratory. All rights reserved. +# Written by Alinson S. Xavier diff --git a/miplearn/problems/knapsack.py b/miplearn/problems/knapsack.py index 265cd0b..bf6dbe7 100644 --- a/miplearn/problems/knapsack.py +++ b/miplearn/problems/knapsack.py @@ -3,51 +3,135 @@ # Written by Alinson S. Xavier import miplearn +from miplearn import Instance import numpy as np import pyomo.environ as pe +from scipy.stats import uniform, randint, bernoulli +from scipy.stats.distributions import rv_frozen -class KnapsackInstance(miplearn.Instance): - def __init__(self, weights, prices, capacity): - self.weights = weights +class MultiKnapsackInstance(Instance): + """Representation of the Multidimensional 0-1 Knapsack Problem. + + Given a set of n items and m knapsacks, the problem is to find a subset of items S maximizing + sum(prices[i] for i in S). If selected, each item i occupies weights[i,j] units of space in + each knapsack j. Furthermore, each knapsack j has limited storage space, given by capacities[j]. + + This implementation assigns a different category for each decision variable, and therefore + trains one ML model per variable. It is only suitable when training and test instances have + same size and items don't shuffle around. + """ + + def __init__(self, + prices, + capacities, + weights): + assert isinstance(prices, np.ndarray) + assert isinstance(capacities, np.ndarray) + assert isinstance(weights, np.ndarray) + assert len(weights.shape) == 2 + self.m, self.n = weights.shape + assert prices.shape == (self.n,) + assert capacities.shape == (self.m,) self.prices = prices - self.capacity = capacity - + self.capacities = capacities + self.weights = weights + def to_model(self): model = pe.ConcreteModel() - items = range(len(self.weights)) - model.x = pe.Var(items, domain=pe.Binary) - model.OBJ = pe.Objective(rule=lambda m: sum(m.x[v] * self.prices[v] for v in items), + model.x = pe.Var(range(self.n), domain=pe.Binary) + model.OBJ = pe.Objective(rule=lambda model: sum(model.x[j] * self.prices[j] + for j in range(self.n)), sense=pe.maximize) - model.eq_capacity = pe.Constraint(rule=lambda m: sum(m.x[v] * self.weights[v] - for v in items) <= self.capacity) + model.eq_capacity = pe.ConstraintList() + for i in range(self.m): + model.eq_capacity.add(sum(model.x[j] * self.weights[i,j] + for j in range(self.n)) <= self.capacities[i]) + return model - + def get_instance_features(self): - return np.array([ - self.capacity, - np.average(self.weights), + return np.hstack([ + self.prices, + self.capacities, + self.weights.ravel(), ]) def get_variable_features(self, var, index): - return np.array([ - self.weights[index], - self.prices[index], - ]) - - -class KnapsackInstance2(KnapsackInstance): - """ - Alternative implementation of the Knapsack Problem, which assigns a different category for each - decision variable, and therefore trains one machine learning model per variable. - """ - - def get_instance_features(self): - return np.hstack([self.weights, self.prices]) - - def get_variable_features(self, var, index): - return np.array([ - ]) + return np.array([]) def get_variable_category(self, var, index): return index + + +class MultiKnapsackGenerator: + def __init__(self, + n=randint(low=100, high=101), + m=randint(low=30, high=31), + w=randint(low=0, high=1000), + K=randint(low=500, high=500), + u=uniform(loc=0.0, scale=1.0), + alpha=uniform(loc=0.25, scale=0.0), + ): + """Initialize the problem generator. + + Instances have a random number of items (or variables) and a random number of knapsacks + (or constraints), as specified by the provided probability distributions `n` and `m`, + respectively. The weight of each item `i` on knapsack `j` is sampled independently from + the provided distribution `w`. The capacity of knapsack `j` is set to: + + alpha_j * sum(w[i,j] for i in range(n)), + + where `alpha_j`, the tightness ratio, is sampled from the provided probability + distribution `alpha`. To make the instances more challenging, the costs of the items + are linearly correlated to their average weights. More specifically, the weight of each + item `i` is set to: + + sum(w[i,j]/m for j in range(m)) + K * u_i, + + where `K`, the correlation coefficient, and `u_i`, the correlation multiplier, are sampled + from the provided probability distributions. Note that `K` is only sample once for the + entire instance. + + Parameters + ---------- + n: rv_discrete + Probability distribution for the number of items (or variables) + m: rv_discrete + Probability distribution for the number of knapsacks (or constraints) + w: rv_discrete + Probability distribution for the item weights + K: rv_discrete + Probability distribution for the profit correlation coefficient + u: rv_continuous + Probability distribution for the profit multiplier + alpha: rv_continuous + Probability distribution for the tightness ratio + """ + assert isinstance(n, rv_frozen), "n should be a SciPy probability distribution" + assert isinstance(m, rv_frozen), "m should be a SciPy probability distribution" + assert isinstance(w, rv_frozen), "w should be a SciPy probability distribution" + assert isinstance(K, rv_frozen), "K should be a SciPy probability distribution" + assert isinstance(u, rv_frozen), "u should be a SciPy probability distribution" + assert isinstance(alpha, rv_frozen), "alpha should be a SciPy probability distribution" + self.n = n + self.m = m + self.w = w + self.K = K + self.u = u + self.alpha = alpha + + def generate(self, n_samples): + def _sample(): + n = self.n.rvs() + m = self.m.rvs() + K = self.K.rvs() + u = self.u.rvs(n) + alpha = self.alpha.rvs(m) + weights = np.array([self.w.rvs(n) for _ in range(m)]) + prices = np.array([weights[:,j].sum() / m + K * u[j] for j in range(n)]) + capacities = np.array([weights[i,:].sum() * alpha[i] for i in range(m)]) + return MultiKnapsackInstance(prices, capacities, weights) + return [_sample() for _ in range(n_samples)] + + \ No newline at end of file diff --git a/miplearn/problems/tests/test_knapsack.py b/miplearn/problems/tests/test_knapsack.py new file mode 100644 index 0000000..01c79c7 --- /dev/null +++ b/miplearn/problems/tests/test_knapsack.py @@ -0,0 +1,44 @@ +# MIPLearn, an extensible framework for Learning-Enhanced Mixed-Integer Optimization +# Copyright (C) 2019-2020 Argonne National Laboratory. All rights reserved. +# Written by Alinson S. Xavier + +from miplearn import LearningSolver +from miplearn.problems.knapsack import MultiKnapsackGenerator, MultiKnapsackInstance +from scipy.stats import uniform, randint +import numpy as np + + +def test_knapsack_generator(): + gen = MultiKnapsackGenerator(n=randint(low=100, high=101), + m=randint(low=30, high=31), + w=randint(low=0, high=1000), + K=randint(low=500, high=501), + u=uniform(loc=1.0, scale=1.0), + alpha=uniform(loc=0.50, scale=0.0), + ) + instances = gen.generate(100) + w_sum = sum(instance.weights for instance in instances) / len(instances) + p_sum = sum(instance.prices for instance in instances) / len(instances) + b_sum = sum(instance.capacities for instance in instances) / len(instances) + assert round(np.mean(w_sum), -1) == 500. + assert round(np.mean(p_sum), -1) == 1250. + assert round(np.mean(b_sum), -3) == 25000. + + +def test_knapsack_instance(): + instance = MultiKnapsackInstance( + prices=np.array([5.0, 10.0, 15.0]), + capacities=np.array([20.0, 30.0]), + weights=np.array([ + [5.0, 5.0, 5.0], + [5.0, 10.0, 15.0], + ]) + ) + + assert (instance.get_instance_features() == np.array([ + 5.0, 10.0, 15.0, 20.0, 30.0, 5.0, 5.0, 5.0, 5.0, 10.0, 15.0 + ])).all() + + solver = LearningSolver() + results = solver.solve(instance) + assert results["Problem"][0]["Lower bound"] == 30.0 \ No newline at end of file diff --git a/miplearn/tests/test_solver.py b/miplearn/tests/test_solver.py index bccf9bd..2e97fab 100644 --- a/miplearn/tests/test_solver.py +++ b/miplearn/tests/test_solver.py @@ -3,25 +3,28 @@ # Written by Alinson S. Xavier from miplearn import LearningSolver -from miplearn.problems.knapsack import KnapsackInstance2 +from miplearn.problems.knapsack import MultiKnapsackInstance from miplearn.branching import BranchPriorityComponent from miplearn.warmstart import WarmStartComponent import numpy as np +def _get_instance(): + return MultiKnapsackInstance( + weights=np.array([[23., 26., 20., 18.]]), + prices=np.array([505., 352., 458., 220.]), + capacities=np.array([67.]) + ) + def test_solver(): - instance = KnapsackInstance2(weights=[23., 26., 20., 18.], - prices=[505., 352., 458., 220.], - capacity=67.) + instance = _get_instance() solver = LearningSolver() solver.solve(instance) solver.fit() solver.solve(instance) def test_solve_save_load_state(): - instance = KnapsackInstance2(weights=[23., 26., 20., 18.], - prices=[505., 352., 458., 220.], - capacity=67.) + instance = _get_instance() components_before = { "warm-start": WarmStartComponent(), "branch-priority": BranchPriorityComponent(), @@ -43,10 +46,7 @@ def test_solve_save_load_state(): assert len(solver.components["warm-start"].y_train) == prev_y_train_len def test_parallel_solve(): - instances = [KnapsackInstance2(weights=np.random.rand(5), - prices=np.random.rand(5), - capacity=3.0) - for _ in range(10)] + instances = [_get_instance() for _ in range(10)] solver = LearningSolver() results = solver.parallel_solve(instances, n_jobs=3) assert len(results) == 10 @@ -54,9 +54,7 @@ def test_parallel_solve(): assert len(solver.components["warm-start"].y_train[0]) == 10 def test_solver_random_branch_priority(): - instance = KnapsackInstance2(weights=[23., 26., 20., 18.], - prices=[505., 352., 458., 220.], - capacity=67.) + instance = _get_instance() components = { "warm-start": BranchPriorityComponent(initial_priority=np.array([1, 2, 3, 4])), } diff --git a/miplearn/tests/test_transformer.py b/miplearn/tests/test_transformer.py index ca13794..049ceb6 100644 --- a/miplearn/tests/test_transformer.py +++ b/miplearn/tests/test_transformer.py @@ -2,59 +2,19 @@ # Copyright (C) 2019-2020 Argonne National Laboratory. All rights reserved. # Written by Alinson S. Xavier +from miplearn import LearningSolver from miplearn.transformers import PerVariableTransformer -from miplearn.problems.knapsack import KnapsackInstance, KnapsackInstance2 +from miplearn.problems.knapsack import MultiKnapsackInstance import numpy as np import pyomo.environ as pe - -def test_transform(): - transformer = PerVariableTransformer() - instance = KnapsackInstance(weights=[23., 26., 20., 18.], - prices=[505., 352., 458., 220.], - capacity=67.) - model = instance.to_model() - solver = pe.SolverFactory('gurobi') - solver.options["threads"] = 1 - solver.solve(model) - - var_split = transformer.split_variables(instance, model) - var_split_expected = { - "default": [ - (model.x, 0), - (model.x, 1), - (model.x, 2), - (model.x, 3) - ] - } - assert var_split == var_split_expected - var_index_pairs = [(model.x, i) for i in range(4)] - - x_actual = transformer.transform_instance(instance, var_index_pairs) - x_expected = np.array([ - [67., 21.75, 23., 505.], - [67., 21.75, 26., 352.], - [67., 21.75, 20., 458.], - [67., 21.75, 18., 220.], - ]) - assert x_expected.tolist() == np.round(x_actual, decimals=2).tolist() - - solver.solve(model) - y_actual = transformer.transform_solution(var_index_pairs) - y_expected = np.array([ - [0., 1.], - [1., 0.], - [0., 1.], - [0., 1.], - ]) - assert y_actual.tolist() == y_expected.tolist() - - def test_transform_with_categories(): transformer = PerVariableTransformer() - instance = KnapsackInstance2(weights=[23., 26., 20., 18.], - prices=[505., 352., 458., 220.], - capacity=67.) + instance = MultiKnapsackInstance( + weights=np.array([[23., 26., 20., 18.]]), + prices=np.array([505., 352., 458., 220.]), + capacities=np.array([67.]) + ) model = instance.to_model() solver = pe.SolverFactory('gurobi') solver.options["threads"] = 1 @@ -71,10 +31,11 @@ def test_transform_with_categories(): var_index_pairs = var_split[0] x_actual = transformer.transform_instance(instance, var_index_pairs) - x_expected = np.array([ - [23., 26., 20., 18., 505., 352., 458., 220.] + x_expected = np.hstack([ + instance.get_instance_features(), + instance.get_variable_features(model.x, 0), ]) - assert x_expected.tolist() == np.round(x_actual, decimals=2).tolist() + assert (x_expected == x_actual).all() solver.solve(model) diff --git a/requirements.txt b/requirements.txt index 51e8597..57cc412 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,4 @@ sklearn networkx tqdm pandas +pytest-watch \ No newline at end of file