From e4526bc72464a1f6ddec81335e9a39edbc1eedf7 Mon Sep 17 00:00:00 2001
From: Alinson S Xavier <axavier@anl.gov>
Date: Fri, 20 Dec 2019 14:18:26 -0600
Subject: [PATCH] Add PerVariableTransformer

---
 Makefile                           |  9 ++++
 miplearn/__init__.py               |  5 +-
 miplearn/core.py                   | 41 ----------------
 miplearn/instance.py               | 68 ++++++++++++++++++++++++++
 miplearn/problems/__init__.py      |  0
 miplearn/problems/knapsack.py      | 51 ++++++++++++++++++++
 miplearn/problems/stab.py          | 60 +++++++++++++++++++++++
 miplearn/solvers.py                | 77 +++++++++++++++++++++++++-----
 miplearn/test_stab.py              | 51 --------------------
 miplearn/tests/__init__.py         |  0
 miplearn/tests/test_transformer.py | 75 +++++++++++++++++++++++++++++
 miplearn/transformers.py           | 57 ++++++++++++++++++++++
 miplearn/warmstart.py              | 29 +++++++++++
 13 files changed, 418 insertions(+), 105 deletions(-)
 create mode 100644 Makefile
 delete mode 100644 miplearn/core.py
 create mode 100644 miplearn/instance.py
 create mode 100644 miplearn/problems/__init__.py
 create mode 100644 miplearn/problems/knapsack.py
 create mode 100644 miplearn/problems/stab.py
 delete mode 100644 miplearn/test_stab.py
 create mode 100644 miplearn/tests/__init__.py
 create mode 100644 miplearn/tests/test_transformer.py
 create mode 100644 miplearn/transformers.py
 create mode 100644 miplearn/warmstart.py

diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..25d83fc
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,9 @@
+PYTEST_ARGS := -W ignore::DeprecationWarning --capture=no -vv
+
+test:
+	pytest $(PYTEST_ARGS)
+
+test-watch:
+	pytest-watch -- $(PYTEST_ARGS)
+
+.PHONY: test test-watch
diff --git a/miplearn/__init__.py b/miplearn/__init__.py
index 7248eb0..ab58ecc 100644
--- a/miplearn/__init__.py
+++ b/miplearn/__init__.py
@@ -1,3 +1,6 @@
 # MIPLearn: A Machine-Learning Framework for Mixed-Integer Optimization
 # Copyright (C) 2019-2020 Argonne National Laboratory. All rights reserved.
-# Written by Alinson S. Xavier <axavier@anl.gov>
\ No newline at end of file
+# Written by Alinson S. Xavier <axavier@anl.gov>
+
+from .instance import Instance
+from .solvers import LearningSolver
\ No newline at end of file
diff --git a/miplearn/core.py b/miplearn/core.py
deleted file mode 100644
index 3812908..0000000
--- a/miplearn/core.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# MIPLearn: A Machine-Learning Framework for Mixed-Integer Optimization
-# Copyright (C) 2019-2020 Argonne National Laboratory. All rights reserved.
-# Written by Alinson S. Xavier <axavier@anl.gov>
-
-from abc import ABC, abstractmethod
-
-class Parameters(ABC):
-    """
-    Abstract class for holding the data that distinguishes one relevant instance of the problem
-    from another.
-    
-    In the knapsack problem, for example, this class could hold the number of items, their weights
-    and costs, as well as the size of the knapsack. Objects implementing this class are able to
-    convert themselves into concrete optimization model, which can be solved by a MIPSolver, or
-    into 1-dimensional numpy arrays, which can be given to a machine learning model.
-    """
-    
-    @abstractmethod
-    def to_model(self):
-        """
-        Convert the parameters into a concrete optimization model.
-        """
-        pass
-    
-    @abstractmethod
-    def to_array(self):
-        """
-        Convert the parameters into a 1-dimensional array.
-        
-        The array is used by the LearningEnhancedSolver to determine how similar two instances are.
-        After some normalization or embedding, it may also be used as input to the machine learning
-        models. It must be numerical.
-        
-        There is not necessarily a one-to-one correspondence between parameters and arrays. The
-        array may encode only part of the data necessary to generate a concrete optimization model.
-        The entries may also be reductions on the original data. For example, in the knapsack
-        problem, an implementation may decide to encode only the average weights, the average prices
-        and the size of the knapsack. This technique may be used to guarantee that arrays
-        correponding to instances of different sizes have the same dimension.
-        """
-        pass
diff --git a/miplearn/instance.py b/miplearn/instance.py
new file mode 100644
index 0000000..49fa5d4
--- /dev/null
+++ b/miplearn/instance.py
@@ -0,0 +1,68 @@
+# MIPLearn: A Machine-Learning Framework for Mixed-Integer Optimization
+# Copyright (C) 2019-2020 Argonne National Laboratory. All rights reserved.
+# Written by Alinson S. Xavier <axavier@anl.gov>
+
+from abc import ABC, abstractmethod
+
+class Instance(ABC):
+    """
+    Abstract class holding all the data necessary to generate a concrete model of the problem.
+    
+    In the knapsack problem, for example, this class could hold the number of items, their weights
+    and costs, as well as the size of the knapsack. Objects implementing this class are able to
+    convert themselves into a concrete optimization model, which can be optimized by solver, or
+    into arrays of features, which can be provided as inputs to machine learning models.
+    """
+    
+    @abstractmethod
+    def to_model(self):
+        """
+        Returns a concrete Pyomo model corresponding to this instance.
+        """
+        pass
+    
+    @abstractmethod
+    def get_instance_features(self):
+        """
+        Returns a 1-dimensional Numpy array of (numerical) features describing the entire instance.
+        
+        The array is used by LearningSolver to determine how similar two instances are. It may also
+        be used to predict, in combination with variable-specific features, the values of binary
+        decision variables in the problem.
+        
+        There is not necessarily a one-to-one correspondence between models and instance features:
+        the features may encode only part of the data necessary to generate the complete model.
+        Features may also be statistics computed from the original data. For example, in the
+        knapsack problem, an implementation may decide to provide as instance features only
+        the average weights, average prices, number of items and the size of the knapsack.
+        
+        The returned array MUST have the same length for all relevant instances of the problem. If
+        two instances map into arrays of different lengths, they cannot be solved by the same
+        LearningSolver object.
+        """
+        pass
+
+    @abstractmethod
+    def get_variable_features(self, var, index):
+        """
+        Returns a 1-dimensional array of (numerical) features describing a particular decision
+        variable.
+        
+        The argument `var` is a pyomo.core.Var object, which represents a collection of decision
+        variables. The argument `index` specifies which variable in the collection is the relevant
+        one.
+        
+        In combination with instance features, variable features are used by LearningSolver to
+        predict, among other things, the optimal value of each decision variable before the
+        optimization takes place. In the knapsack problem, for example, an implementation could
+        provide as variable features the weight and the price of a specific item.
+        
+        Like instance features, the arrays returned by this method MUST have the same length for
+        all variables, and for all relevant instances of the problem.
+        
+        If the value of the given variable should not be predicted, this method MUST return None.
+        """
+        pass
+
+    def get_variable_category(self, var, index):
+        return "default"
\ No newline at end of file
diff --git a/miplearn/problems/__init__.py b/miplearn/problems/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/miplearn/problems/knapsack.py b/miplearn/problems/knapsack.py
new file mode 100644
index 0000000..15271d3
--- /dev/null
+++ b/miplearn/problems/knapsack.py
@@ -0,0 +1,51 @@
+# MIPLearn: A Machine-Learning Framework for Mixed-Integer Optimization
+# Copyright (C) 2019-2020 Argonne National Laboratory. All rights reserved.
+# Written by Alinson S. Xavier <axavier@anl.gov>
+
+import miplearn
+import numpy as np
+import pyomo.environ as pe
+
+class KnapsackInstance(miplearn.Instance):
+    def __init__(self, weights, prices, capacity):
+        self.weights = weights
+        self.prices = prices
+        self.capacity = capacity
+        
+    def to_model(self):
+        model = m = pe.ConcreteModel()
+        items = range(len(self.weights))
+        m.x = pe.Var(items, domain=pe.Binary)
+        m.OBJ = pe.Objective(rule=lambda m : sum(m.x[v] * self.prices[v] for v in items),
+                              sense=pe.maximize)
+        m.eq_capacity = pe.Constraint(rule = lambda m :
+                                      sum(m.x[v] * self.weights[v]
+                                          for v in items) <= self.capacity)
+        return m
+    
+    def get_instance_features(self):
+        return np.array([
+            self.capacity,
+            np.average(self.weights),
+        ])
+    
+    def get_variable_features(self, var, index):
+        return np.array([
+            self.weights[index],
+            self.prices[index],
+        ])
+    
+class KnapsackInstance2(KnapsackInstance):
+    """
+    Alternative implementation of the Knapsack Problem, which assigns a different category for each
+    decision variable, and therefore trains one machine learning model per variable.
+    """
+    def get_instance_features(self):
+        return np.hstack([self.weights, self.prices])
+    
+    def get_variable_features(self, var, index):
+        return np.array([
+        ])
+    
+    def get_variable_category(self, var, index):
+        return index
\ No newline at end of file
diff --git a/miplearn/problems/stab.py b/miplearn/problems/stab.py
new file mode 100644
index 0000000..aa0e4b4
--- /dev/null
+++ b/miplearn/problems/stab.py
@@ -0,0 +1,60 @@
+# MIPLearn: A Machine-Learning Framework for Mixed-Integer Optimization
+# Copyright (C) 2019-2020 Argonne National Laboratory. All rights reserved.
+# Written by Alinson S. Xavier <axavier@anl.gov>
+
+import numpy as np
+import pyomo.environ as pe
+import networkx as nx
+from miplearn import Instance
+import random
+
+class MaxStableSetGenerator:
+    def __init__(self, sizes=[50], densities=[0.1]):
+        self.sizes = sizes
+        self.densities = densities
+        
+    def generate(self):
+        size = random.choice(self.sizes)
+        density = random.choice(self.densities)
+        self.graph = nx.generators.random_graphs.binomial_graph(size, density)
+        weights = np.ones(self.graph.number_of_nodes())
+        return MaxStableSetInstance(self.graph, weights)
+    
+
+class MaxStableSetInstance(Instance):
+    def __init__(self, graph, weights):
+        self.graph = graph
+        self.weights = weights
+        
+    def to_model(self):
+        nodes = list(self.graph.nodes)
+        edges = list(self.graph.edges)
+        model = m = pe.ConcreteModel()
+        m.x = pe.Var(nodes, domain=pe.Binary)
+        m.OBJ = pe.Objective(rule=lambda m : sum(m.x[v] * self.weights[v] for v in nodes),
+                              sense=pe.maximize)
+        m.edge_eqs = pe.ConstraintList()
+        for edge in edges:
+            m.edge_eqs.add(m.x[edge[0]] + m.x[edge[1]] <= 1)
+        return m
+    
+    def get_instance_features(self):
+        return np.array([
+            self.graph.number_of_nodes(),
+            self.graph.number_of_edges(),
+        ])
+    
+    def get_variable_features(self, var, index):
+        first_neighbors = list(self.graph.neighbors(index))
+        second_neighbors = [list(self.graph.neighbors(u)) for u in first_neighbors]
+        degree = len(first_neighbors)
+        neighbor_degrees = sorted([len(nn) for nn in second_neighbors])
+        neighbor_degrees = neighbor_degrees + [100.] * 10
+        return np.array([
+            degree,
+            neighbor_degrees[0] - degree,
+            neighbor_degrees[1] - degree,
+            neighbor_degrees[2] - degree,
+        ])
+
+
diff --git a/miplearn/solvers.py b/miplearn/solvers.py
index e3bf3c0..c783216 100644
--- a/miplearn/solvers.py
+++ b/miplearn/solvers.py
@@ -2,24 +2,77 @@
 # Copyright (C) 2019-2020 Argonne National Laboratory. All rights reserved.
 # Written by Alinson S. Xavier <axavier@anl.gov>
 
+from .warmstart import *
 import pyomo.environ as pe
+import numpy as np
+from math import isfinite
 
 class LearningSolver:
     """
-    LearningSolver is a Mixed-Integer Linear Programming (MIP) solver that uses information from
-    previous runs to accelerate the solution of new, unseen instances.
+    Mixed-Integer Linear Programming (MIP) solver that extracts information from previous runs,
+    using Machine Learning methods, to accelerate the solution of new (yet unseen) instances.
     """
     
-    def __init__(self):
+    def __init__(self,
+                 threads = 4,
+                 ws_predictor = None):
         self.parent_solver = pe.SolverFactory('cplex_persistent')
-        self.parent_solver.options["threads"] = 4
+        self.parent_solver.options["threads"] = threads
+        self.train_x = None
+        self.train_y = None
+        self.ws_predictor = ws_predictor
         
-    def solve(self, params):
-        """
-        Solve the optimization problem represented by the given parameters.
-        The parameters and the obtained solution is recorded.
-        """
-        model = params.to_model()
+    def solve(self,
+              instance,
+              tee=False,
+              learn=True):
+        model = instance.to_model()
         self.parent_solver.set_instance(model)
-        self.parent_solver.solve(tee=True)
-    
\ No newline at end of file
+        self.cplex = self.parent_solver._solver_model
+        x = self._get_features(instance)
+        
+        if self.ws_predictor is not None:
+            self.cplex.MIP_starts.delete()
+            ws = self.ws_predictor.predict(x)
+            if ws is not None:
+                _add_warm_start(self.cplex, ws)
+        
+        self.parent_solver.solve(tee=tee)
+
+        solution = np.array(self.cplex.solution.get_values())
+        y = np.transpose(np.vstack((solution, 1 - solution)))
+        self._update_training_set(x, y)
+        return y
+    
+    def transform(self, instance):
+        model = instance.to_model()
+        self.parent_solver.set_instance(model)
+        self.cplex = self.parent_solver._solver_model
+        return self._get_features(instance)
+    
+    def predict(self, instance):
+        pass
+
+    def _update_training_set(self, x, y):
+        if self.train_x is None:
+            self.train_x = x
+            self.train_y = y
+        else:
+            self.train_x = np.vstack((self.train_x, x))
+            self.train_y = np.vstack((self.train_y, y))
+        
+    def fit(self):
+        if self.ws_predictor is not None:
+            self.ws_predictor.fit(self.train_x, self.train_y)
+
+def _add_warm_start(cplex, ws):
+    assert isinstance(ws, np.ndarray)
+    assert ws.shape == (cplex.variables.get_num(),)
+    indices, values = [], []
+    for k in range(len(ws)):
+        if isfinite(ws[k]):
+            indices += [k]
+            values += [ws[k]]
+    print("Adding warm start with %d values" % len(indices))
+    cplex.MIP_starts.add([indices, values], cplex.MIP_starts.effort_level.solve_MIP)
+
diff --git a/miplearn/test_stab.py b/miplearn/test_stab.py
deleted file mode 100644
index 027b163..0000000
--- a/miplearn/test_stab.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# MIPLearn: A Machine-Learning Framework for Mixed-Integer Optimization
-# Copyright (C) 2019-2020 Argonne National Laboratory. All rights reserved.
-# Written by Alinson S. Xavier <axavier@anl.gov>
-
-from .solvers import LearningSolver
-from .core import Parameters
-import numpy as np
-import pyomo.environ as pe
-import networkx as nx
-
-
-class MaxStableSetGenerator:
-    """Class that generates random instances of the Maximum Stable Set (MSS) Problem."""
-    
-    def __init__(self, n_vertices, density=0.1, seed=42):
-        self.graph = nx.generators.random_graphs.binomial_graph(n_vertices, density, seed)
-        self.base_weights = np.random.rand(self.graph.number_of_nodes()) * 10
-        
-    def generate(self):
-        perturbation = np.random.rand(self.graph.number_of_nodes()) * 0.1
-        weights = self.base_weights + perturbation
-        return MaxStableSetParameters(self.graph, weights)
-    
-
-class MaxStableSetParameters(Parameters):
-    def __init__(self, graph, weights):
-        self.graph = graph
-        self.weights = weights
-        
-    def to_model(self):
-        nodes = list(self.graph.nodes)
-        edges = list(self.graph.edges)
-        model = m = pe.ConcreteModel()
-        m.x = pe.Var(nodes, domain=pe.Binary)
-        m.OBJ = pe.Objective(rule=lambda m : sum(m.x[v] * self.weights[v] for v in nodes),
-                              sense=pe.maximize)
-        m.edge_eqs = pe.ConstraintList()
-        for edge in edges:
-            m.edge_eqs.add(m.x[edge[0]] + m.x[edge[1]] <= 1)
-        return m
-    
-    def to_array(self):
-        return self.weights
-
-
-def test_stab():
-    generator = MaxStableSetGenerator(n_vertices=100)
-    for k in range(5):
-        params = generator.generate()
-        solver = LearningSolver()
-        solver.solve(params)
\ No newline at end of file
diff --git a/miplearn/tests/__init__.py b/miplearn/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/miplearn/tests/test_transformer.py b/miplearn/tests/test_transformer.py
new file mode 100644
index 0000000..83d9171
--- /dev/null
+++ b/miplearn/tests/test_transformer.py
@@ -0,0 +1,75 @@
+# MIPLearn: A Machine-Learning Framework for Mixed-Integer Optimization
+# Copyright (C) 2019-2020 Argonne National Laboratory. All rights reserved.
+# Written by Alinson S. Xavier <axavier@anl.gov>
+
+from miplearn import Instance, LearningSolver
+from miplearn.transformers import PerVariableTransformer
+from miplearn.problems.knapsack import KnapsackInstance, KnapsackInstance2
+import numpy as np
+import pyomo.environ as pe
+
+def test_transform():
+    transformer = PerVariableTransformer()
+    instance = KnapsackInstance(weights=[23., 26., 20., 18.],
+                                prices=[505., 352., 458., 220.],
+                                capacity=67.)
+    model = instance.to_model()
+    
+    var_split = transformer.split_variables(instance, model)
+    var_split_expected = {
+        "default": [
+            (model.x, 0),
+            (model.x, 1),
+            (model.x, 2),
+            (model.x, 3)
+        ]
+    }
+    assert var_split == var_split_expected
+    var_index_pairs = [(model.x, i) for i in range(4)]
+    
+    x_actual = transformer.transform_instance(instance, var_index_pairs)
+    x_expected = np.array([
+        [67., 21.75, 23., 505.],
+        [67., 21.75, 26., 352.],
+        [67., 21.75, 20., 458.],
+        [67., 21.75, 18., 220.],
+    ])
+    assert x_expected.tolist() == x_actual.tolist()
+    
+    solver = pe.SolverFactory('cplex')
+    solver.options["threads"] = 1
+    solver.solve(model)
+    
+    y_actual = transformer.transform_solution(var_index_pairs)
+    y_expected = np.array([1., 0., 1., 1.])
+    assert y_actual.tolist() == y_expected.tolist()
+    
+    
+def test_transform_with_categories():
+    transformer = PerVariableTransformer()
+    instance = KnapsackInstance2(weights=[23., 26., 20., 18.],
+                                 prices=[505., 352., 458., 220.],
+                                 capacity=67.)
+    model = instance.to_model()
+    
+    var_split = transformer.split_variables(instance, model)
+    var_split_expected = {
+        0: [(model.x, 0)],
+        1: [(model.x, 1)],
+        2: [(model.x, 2)],
+        3: [(model.x, 3)],
+    }
+    assert var_split == var_split_expected
+
+    var_index_pairs = var_split[0]
+    x_actual = transformer.transform_instance(instance, var_index_pairs)
+    x_expected = np.array([[23., 26., 20., 18., 505., 352., 458., 220.]])
+    assert x_expected.tolist() == x_actual.tolist()
+
+    solver = pe.SolverFactory('cplex')
+    solver.options["threads"] = 1
+    solver.solve(model)
+    
+    y_actual = transformer.transform_solution(var_index_pairs)
+    y_expected = np.array([1.])
+    assert y_actual.tolist() == y_expected.tolist()
\ No newline at end of file
diff --git a/miplearn/transformers.py b/miplearn/transformers.py
new file mode 100644
index 0000000..733eef4
--- /dev/null
+++ b/miplearn/transformers.py
@@ -0,0 +1,57 @@
+# MIPLearn: A Machine-Learning Framework for Mixed-Integer Optimization
+# Copyright (C) 2019-2020 Argonne National Laboratory. All rights reserved.
+# Written by Alinson S. Xavier <axavier@anl.gov>
+
+import numpy as np
+from pyomo.core import Var
+
+class PerVariableTransformer:
+    """
+    Class that converts a miplearn.Instance into a matrix of features that is suitable
+    for training machine learning models that make one decision per decision variable.
+    """
+    def __init__(self):
+        pass
+        
+    def transform_instance(self, instance, var_index_pairs):
+        instance_features = self._get_instance_features(instance)
+        variable_features = self._get_variable_features(instance, var_index_pairs)
+        return np.vstack([
+            np.hstack([instance_features, vf])
+            for vf in variable_features
+        ])
+    
+    def _get_instance_features(self, instance):
+        features = instance.get_instance_features()
+        assert isinstance(features, np.ndarray)
+        return features
+
+    def _get_variable_features(self, instance, var_index_pairs):
+        features = []
+        expected_shape = None
+        for (var, index) in var_index_pairs:
+            vf = instance.get_variable_features(var, index)
+            assert isinstance(vf, np.ndarray)
+            if expected_shape is None:
+                assert len(vf.shape) == 1
+                expected_shape = vf.shape
+            else:
+                assert vf.shape == expected_shape
+            features += [vf]
+        return np.array(features)
+    
+    def transform_solution(self, var_index_pairs):
+        y = []
+        for (var, index) in var_index_pairs:
+            y += [var[index].value]
+        return np.array(y)
+    
+    def split_variables(self, instance, model):
+        result = {}
+        for var in model.component_objects(Var):
+            for index in var:
+                category = instance.get_variable_category(var, index)
+                if category not in result.keys():
+                    result[category] = []
+                result[category] += [(var,index)]
+        return result
\ No newline at end of file
diff --git a/miplearn/warmstart.py b/miplearn/warmstart.py
new file mode 100644
index 0000000..0ccb46d
--- /dev/null
+++ b/miplearn/warmstart.py
@@ -0,0 +1,29 @@
+# MIPLearn: A Machine-Learning Framework for Mixed-Integer Optimization
+# Copyright (C) 2019-2020 Argonne National Laboratory. All rights reserved.
+# Written by Alinson S. Xavier <axavier@anl.gov>
+
+import tensorflow as tf
+import tensorflow.keras as keras
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import Dense, Dropout, Flatten, Activation
+import numpy as np
+
+class WarmStartPredictor:
+    def __init__(self, model=None, threshold=0.80):
+        self.model = model
+        self.threshold = threshold
+    
+    def fit(self, train_x, train_y):
+        pass
+    
+    def predict(self, x):
+        if self.model is None: return None
+        assert isinstance(x, np.ndarray)
+        y = self.model.predict(x)
+        n_vars = y.shape[0]
+        ws = np.array([float("nan")] * n_vars)
+        ws[y[:,0] > self.threshold] = 1.0
+        ws[y[:,1] > self.threshold] = 0.0
+        return ws
+        
+    
\ No newline at end of file