From 8f141e6a9d76379d4f493133b64c060d8c80d7fe Mon Sep 17 00:00:00 2001
From: Alinson S Xavier <axavier@anl.gov>
Date: Thu, 23 Jan 2020 21:59:59 -0600
Subject: [PATCH] Implement BenchmarkRunner

---
 README.md                        | 38 ++++++++++++++++++++++++---
 miplearn/__init__.py             |  1 +
 miplearn/benchmark.py            | 45 ++++++++++++++++++++++++++++++++
 miplearn/problems/stab.py        | 10 ++++---
 miplearn/solvers.py              | 41 +++++++++++++++++------------
 miplearn/tests/test_benchmark.py | 41 +++++++++++++++++++++++++++++
 miplearn/tests/test_solver.py    |  2 +-
 miplearn/tests/test_stab.py      |  8 +++---
 requirements.txt                 |  3 ++-
 setup.py                         |  1 +
 10 files changed, 161 insertions(+), 29 deletions(-)
 create mode 100644 miplearn/benchmark.py
 create mode 100644 miplearn/tests/test_benchmark.py

diff --git a/README.md b/README.md
index 4b091a8..0b70e39 100644
--- a/README.md
+++ b/README.md
@@ -7,13 +7,14 @@ Table of contents
 -----------------
 * [Features](#features)
 * [Installation](#installation)
-* [Typical Usage](#typical-usage)
+* [Basic usage](#basic-usage)
     * [Using LearningSolver](#using-learningsolver)
     * [Selecting the internal MIP solver](#selecting-the-internal-mip-solver)
     * [Describing problem instances](#describing-problem-instances)
     * [Obtaining heuristic solutions](#obtaining-heuristic-solutions)
     * [Saving and loading solver state](#saving-and-loading-solver-state)
     * [Solving training instances in parallel](#solving-training-instances-in-parallel)
+* [Benchmark](#benchmark)
 * [Current Limitations](#current-limitations)
 * [References](#references)
 * [Authors](#authors)
@@ -38,8 +39,8 @@ The package is currently only available for Python and Pyomo. It can be installe
 pip install git+ssh://git@github.com/iSoron/miplearn.git
 ```
 
-Typical Usage
--------------
+Basic Usage
+-----------
 
 ### Using `LearningSolver`
 
@@ -136,6 +137,37 @@ solver.load("/tmp/data.bin")
 solver.solve(test_instance)
 ```
 
+Benchmark
+---------
+
+MIPLearn provides the utility class `BenchmarkRunner`, which simplifies the task of comparing the performance of different solvers. The snippet below shows its basic usage:
+
+```python
+from miplearn import BenchmarkRunner, LearningSolver
+
+# Create train and test instances
+train_instances = [...]
+test_instances  = [...]
+
+# Training phase...
+training_solver = LearningSolver(...)
+training_solver.parallel_solve(train_instances, n_jobs=10)
+training_solver.save("data.bin")
+
+# Test phase...
+test_solvers = {
+    "Baseline": LearningSolver(...), # each solver may have different parameters
+    "Strategy A": LearningSolver(...), 
+    "Strategy B": LearningSolver(...),
+    "Strategy C": LearningSolver(...),
+}
+benchmark = BenchmarkRunner(test_solvers)
+benchmark.load_fit("data.bin")
+benchmark.parallel_solve(test_instances, n_jobs=2)
+print(benchmark.raw_results())
+```
+
+The method `load_fit` loads the saved training data into each one of the provided solvers and trains their respective ML models. The method `parallel_solve` solves the test instances in parallel, and collects solver statistics such as running time and optimal value. Finally, `raw_results` produces a Pandas DataFrame containing the results.
 
 Current Limitations
 -------------------
diff --git a/miplearn/__init__.py b/miplearn/__init__.py
index 463ccd8..e74f38f 100644
--- a/miplearn/__init__.py
+++ b/miplearn/__init__.py
@@ -4,3 +4,4 @@
 
 from .instance import Instance
 from .solvers import LearningSolver
+from .benchmark import BenchmarkRunner
\ No newline at end of file
diff --git a/miplearn/benchmark.py b/miplearn/benchmark.py
new file mode 100644
index 0000000..4ad49cc
--- /dev/null
+++ b/miplearn/benchmark.py
@@ -0,0 +1,45 @@
+# MIPLearn, an extensible framework for Learning-Enhanced Mixed-Integer Optimization
+# Copyright (C) 2019-2020 Argonne National Laboratory. All rights reserved.
+# Written by Alinson S. Xavier <axavier@anl.gov>
+
+from .solvers import LearningSolver
+import pandas as pd
+
+class BenchmarkRunner:
+    def __init__(self, solvers):
+        assert isinstance(solvers, dict)
+        for solver in solvers.values():
+            assert isinstance(solver, LearningSolver)
+        self.solvers = solvers
+        self.results = None
+        
+    def load_fit(self, filename):
+        for (name, solver) in self.solvers.items():
+            solver.load(filename)
+            solver.fit()
+            
+    def parallel_solve(self, instances, n_jobs=1):
+        self.results = pd.DataFrame(columns=["Solver",
+                                             "Instance",
+                                             "Wallclock Time",
+                                             "Optimal Value",
+                                            ])
+        for (name, solver) in self.solvers.items():
+            results = solver.parallel_solve(instances, n_jobs=n_jobs, label=name)
+            for i in range(len(instances)):
+                wallclock_time = None
+                for key in ["Time", "Wall time", "Wallclock time"]:
+                    if key not in results[i]["Solver"][0].keys():
+                        continue
+                    if str(results[i]["Solver"][0][key]) == "<undefined>":
+                        continue
+                    wallclock_time = float(results[i]["Solver"][0][key])
+                self.results = self.results.append({
+                    "Solver": name,
+                    "Instance": i,
+                    "Wallclock Time": wallclock_time,
+                    "Optimal Value": results[i]["Problem"][0]["Lower bound"]
+                }, ignore_index=True)
+    
+    def raw_results(self):
+        return self.results
\ No newline at end of file
diff --git a/miplearn/problems/stab.py b/miplearn/problems/stab.py
index 5c7d1e0..e864ba6 100644
--- a/miplearn/problems/stab.py
+++ b/miplearn/problems/stab.py
@@ -15,10 +15,12 @@ class MaxStableSetGenerator:
         self.base_weights = base_weights
         self.perturbation_scale = perturbation_scale
         
-    def generate(self):
-        perturbation = np.random.rand(self.graph.number_of_nodes()) * self.perturbation_scale
-        weights = self.base_weights + perturbation
-        return MaxStableSetInstance(self.graph, weights)
+    def generate(self, n_samples):
+        def _sample():
+            perturbation = np.random.rand(self.graph.number_of_nodes()) * self.perturbation_scale
+            weights = self.base_weights + perturbation
+            return MaxStableSetInstance(self.graph, weights)
+        return [_sample() for _ in range(n_samples)]
 
 
 class MaxStableSetInstance(Instance):
diff --git a/miplearn/solvers.py b/miplearn/solvers.py
index c616041..f975544 100644
--- a/miplearn/solvers.py
+++ b/miplearn/solvers.py
@@ -72,7 +72,7 @@ class LearningSolver:
                             var[index].value = 1
 
         # Solve MILP
-        self._solve(model, tee=tee)
+        solve_results = self._solve(model, tee=tee)
 
         # Update y_train
         for category in var_split.keys():
@@ -83,28 +83,36 @@ class LearningSolver:
             else:
                 self.y_train[category] = np.vstack([self.y_train[category], y])
                 
-    def parallel_solve(self, instances, n_jobs=4):
+        return solve_results
+                
+    def parallel_solve(self, instances, n_jobs=4, label="Solve"):
         def _process(instance):
-            solver = copy(self)
-            solver.solve(instance)
-            return solver.x_train, solver.y_train
+            solver = deepcopy(self)
+            results = solver.solve(instance)
+            return {
+                "x_train": solver.x_train,
+                "y_train": solver.y_train,
+                "results": results,
+            }
 
         def _merge(results):
-            categories = results[0][0].keys()
-            x_entries = [np.vstack([r[0][c] for r in results]) for c in categories]
-            y_entries = [np.vstack([r[1][c] for r in results]) for c in categories]
+            categories = results[0]["x_train"].keys()
+            x_entries = [np.vstack([r["x_train"][c] for r in results]) for c in categories]
+            y_entries = [np.vstack([r["y_train"][c] for r in results]) for c in categories]
             x_train = dict(zip(categories, x_entries))
             y_train = dict(zip(categories, y_entries))
-            return x_train, y_train
+            results = [r["results"] for r in results]
+            return x_train, y_train, results
 
         results = Parallel(n_jobs=n_jobs)(
-            delayed(_process)(i)
-            for i in tqdm(instances)
+            delayed(_process)(instance)
+            for instance in tqdm(instances, desc=label)
         )
         
-        x_train, y_train = _merge(results)
+        x_train, y_train, results = _merge(results)
         self.x_train = x_train
         self.y_train = y_train
+        return results
 
     def fit(self, x_train_dict=None, y_train_dict=None):
         if x_train_dict is None:
@@ -113,8 +121,9 @@ class LearningSolver:
         for category in x_train_dict.keys():
             x_train = x_train_dict[category]
             y_train = y_train_dict[category]
-            self.ws_predictors[category] = deepcopy(self.ws_predictor_prototype)
-            self.ws_predictors[category].fit(x_train, y_train)
+            if self.ws_predictor_prototype is not None:
+                self.ws_predictors[category] = deepcopy(self.ws_predictor_prototype)
+                self.ws_predictors[category].fit(x_train, y_train)
             
     def save(self, filename):
         with open(filename, "wb") as file:
@@ -136,6 +145,6 @@ class LearningSolver:
     def _solve(self, model, tee=False):
         if hasattr(self.parent_solver, "set_instance"):
             self.parent_solver.set_instance(model)
-            self.parent_solver.solve(tee=tee, warmstart=True)
+            return self.parent_solver.solve(tee=tee, warmstart=True)
         else:
-            self.parent_solver.solve(model, tee=tee, warmstart=True)
+            return self.parent_solver.solve(model, tee=tee, warmstart=True)
diff --git a/miplearn/tests/test_benchmark.py b/miplearn/tests/test_benchmark.py
new file mode 100644
index 0000000..646128f
--- /dev/null
+++ b/miplearn/tests/test_benchmark.py
@@ -0,0 +1,41 @@
+# MIPLearn, an extensible framework for Learning-Enhanced Mixed-Integer Optimization
+# Copyright (C) 2019-2020 Argonne National Laboratory. All rights reserved.
+# Written by Alinson S. Xavier <axavier@anl.gov>
+
+from miplearn import LearningSolver, BenchmarkRunner
+from miplearn.warmstart import KnnWarmStartPredictor
+from miplearn.problems.stab import MaxStableSetInstance, MaxStableSetGenerator
+import networkx as nx
+import numpy as np
+import pyomo.environ as pe
+
+
+def test_benchmark():
+    graph = nx.cycle_graph(10)
+    base_weights = np.random.rand(10)
+    
+    # Generate training and test instances
+    train_instances = MaxStableSetGenerator(graph=graph,
+                                            base_weights=base_weights,
+                                            perturbation_scale=1.0,
+                                           ).generate(5)
+    
+    test_instances  = MaxStableSetGenerator(graph=graph,
+                                            base_weights=base_weights,
+                                            perturbation_scale=1.0,
+                                           ).generate(3)
+
+    # Training phase...
+    training_solver = LearningSolver()
+    training_solver.parallel_solve(train_instances, n_jobs=10)
+    training_solver.save("data.bin")
+
+    # Test phase...
+    test_solvers = {
+        "Strategy A": LearningSolver(ws_predictor=None),
+        "Strategy B": LearningSolver(ws_predictor=None),
+    }
+    benchmark = BenchmarkRunner(test_solvers)
+    benchmark.load_fit("data.bin")
+    benchmark.parallel_solve(test_instances, n_jobs=2)
+    print(benchmark.raw_results())
diff --git a/miplearn/tests/test_solver.py b/miplearn/tests/test_solver.py
index d14358a..00cd230 100644
--- a/miplearn/tests/test_solver.py
+++ b/miplearn/tests/test_solver.py
@@ -38,6 +38,6 @@ def test_parallel_solve():
                                    capacity=3.0)
                  for _ in range(10)]
     solver = LearningSolver()
-    solver.parallel_solve(instances, n_jobs=2)
+    solver.parallel_solve(instances, n_jobs=3)
     assert len(solver.x_train[0]) == 10
     assert len(solver.y_train[0]) == 10
\ No newline at end of file
diff --git a/miplearn/tests/test_stab.py b/miplearn/tests/test_stab.py
index 08ea704..7a97d78 100644
--- a/miplearn/tests/test_stab.py
+++ b/miplearn/tests/test_stab.py
@@ -20,10 +20,10 @@ def test_stab():
 def test_stab_generator():
     graph = nx.cycle_graph(5)
     base_weights = [1.0, 2.0, 3.0, 4.0, 5.0]
-    generator = MaxStableSetGenerator(graph=graph,
-                                      base_weights=base_weights,
-                                      perturbation_scale=1.0)
-    instances = [generator.generate() for _ in range(100_000)]
+    instances = MaxStableSetGenerator(graph=graph,
+                                       base_weights=base_weights,
+                                       perturbation_scale=1.0,
+                                      ).generate(100_000)
     weights = np.array([instance.weights for instance in instances])
     weights_avg = np.round(np.average(weights, axis=0), 2)
     weights_std = np.round(np.std(weights, axis=0), 2)
diff --git a/requirements.txt b/requirements.txt
index 67c30d6..127c973 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,4 +3,5 @@ numpy
 pytest
 sklearn
 networkx
-tqdm
\ No newline at end of file
+tqdm
+pandas
\ No newline at end of file
diff --git a/setup.py b/setup.py
index dabfb5e..139ef80 100644
--- a/setup.py
+++ b/setup.py
@@ -13,5 +13,6 @@ setup(
         'sklearn',
         'networkx',
         'tqdm',
+        'pandas',
     ],
 )
\ No newline at end of file