Implement BenchmarkRunner

2025-12-06 01:18:52 -06:00 · 2020-01-23 21:59:59 -06:00
parent 07090bac9e
commit 8f141e6a9d
10 changed files with 161 additions and 29 deletions
--- a/README.md
+++ b/README.md
@@ -7,13 +7,14 @@ Table of contents
 -----------------
 * [Features](#features)
 * [Installation](#installation)
-* [Typical Usage](#typical-usage)
+* [Basic usage](#basic-usage)
    * [Using LearningSolver](#using-learningsolver)
    * [Selecting the internal MIP solver](#selecting-the-internal-mip-solver)
    * [Describing problem instances](#describing-problem-instances)
    * [Obtaining heuristic solutions](#obtaining-heuristic-solutions)
    * [Saving and loading solver state](#saving-and-loading-solver-state)
    * [Solving training instances in parallel](#solving-training-instances-in-parallel)
+* [Benchmark](#benchmark)
 * [Current Limitations](#current-limitations)
 * [References](#references)
 * [Authors](#authors)
@@ -38,8 +39,8 @@ The package is currently only available for Python and Pyomo. It can be installe
 pip install git+ssh://git@github.com/iSoron/miplearn.git
 ```

-Typical Usage
-------------
+Basic Usage
+-----------

 ### Using `LearningSolver`

@@ -136,6 +137,37 @@ solver.load("/tmp/data.bin")
 solver.solve(test_instance)
 ```

+Benchmark
+---------
+
+MIPLearn provides the utility class `BenchmarkRunner`, which simplifies the task of comparing the performance of different solvers. The snippet below shows its basic usage:
+
+```python
+from miplearn import BenchmarkRunner, LearningSolver
+
+# Create train and test instances
+train_instances = [...]
+test_instances  = [...]
+
+# Training phase...
+training_solver = LearningSolver(...)
+training_solver.parallel_solve(train_instances, n_jobs=10)
+training_solver.save("data.bin")
+
+# Test phase...
+test_solvers = {
+    "Baseline": LearningSolver(...), # each solver may have different parameters
+    "Strategy A": LearningSolver(...), 
+    "Strategy B": LearningSolver(...),
+    "Strategy C": LearningSolver(...),
+}
+benchmark = BenchmarkRunner(test_solvers)
+benchmark.load_fit("data.bin")
+benchmark.parallel_solve(test_instances, n_jobs=2)
+print(benchmark.raw_results())
+```
+
+The method `load_fit` loads the saved training data into each one of the provided solvers and trains their respective ML models. The method `parallel_solve` solves the test instances in parallel, and collects solver statistics such as running time and optimal value. Finally, `raw_results` produces a Pandas DataFrame containing the results.

 Current Limitations
 -------------------
--- a/miplearn/init.py
+++ b/miplearn/init.py
@@ -4,3 +4,4 @@

 from .instance import Instance
 from .solvers import LearningSolver
+from .benchmark import BenchmarkRunner
--- a/miplearn/benchmark.py
+++ b/miplearn/benchmark.py
@@ -0,0 +1,45 @@
+# MIPLearn, an extensible framework for Learning-Enhanced Mixed-Integer Optimization
+# Copyright (C) 2019-2020 Argonne National Laboratory. All rights reserved.
+# Written by Alinson S. Xavier <axavier@anl.gov>
+
+from .solvers import LearningSolver
+import pandas as pd
+
+class BenchmarkRunner:
+    def __init__(self, solvers):
+        assert isinstance(solvers, dict)
+        for solver in solvers.values():
+            assert isinstance(solver, LearningSolver)
+        self.solvers = solvers
+        self.results = None
+        
+    def load_fit(self, filename):
+        for (name, solver) in self.solvers.items():
+            solver.load(filename)
+            solver.fit()
+            
+    def parallel_solve(self, instances, n_jobs=1):
+        self.results = pd.DataFrame(columns=["Solver",
+                                             "Instance",
+                                             "Wallclock Time",
+                                             "Optimal Value",
+                                            ])
+        for (name, solver) in self.solvers.items():
+            results = solver.parallel_solve(instances, n_jobs=n_jobs, label=name)
+            for i in range(len(instances)):
+                wallclock_time = None
+                for key in ["Time", "Wall time", "Wallclock time"]:
+                    if key not in results[i]["Solver"][0].keys():
+                        continue
+                    if str(results[i]["Solver"][0][key]) == "<undefined>":
+                        continue
+                    wallclock_time = float(results[i]["Solver"][0][key])
+                self.results = self.results.append({
+                    "Solver": name,
+                    "Instance": i,
+                    "Wallclock Time": wallclock_time,
+                    "Optimal Value": results[i]["Problem"][0]["Lower bound"]
+                }, ignore_index=True)
+    
+    def raw_results(self):
+        return self.results
--- a/miplearn/problems/stab.py
+++ b/miplearn/problems/stab.py
@@ -15,10 +15,12 @@ class MaxStableSetGenerator:
        self.base_weights = base_weights
        self.perturbation_scale = perturbation_scale
        
-    def generate(self):
-        perturbation = np.random.rand(self.graph.number_of_nodes()) * self.perturbation_scale
-        weights = self.base_weights + perturbation
-        return MaxStableSetInstance(self.graph, weights)
+    def generate(self, n_samples):
+        def _sample():
+            perturbation = np.random.rand(self.graph.number_of_nodes()) * self.perturbation_scale
+            weights = self.base_weights + perturbation
+            return MaxStableSetInstance(self.graph, weights)
+        return [_sample() for _ in range(n_samples)]


 class MaxStableSetInstance(Instance):
--- a/miplearn/solvers.py
+++ b/miplearn/solvers.py
@@ -72,7 +72,7 @@ class LearningSolver:
                            var[index].value = 1

        # Solve MILP
-        self._solve(model, tee=tee)
+        solve_results = self._solve(model, tee=tee)

        # Update y_train
        for category in var_split.keys():
@@ -83,28 +83,36 @@ class LearningSolver:
            else:
                self.y_train[category] = np.vstack([self.y_train[category], y])
                
-    def parallel_solve(self, instances, n_jobs=4):
+        return solve_results
+                
+    def parallel_solve(self, instances, n_jobs=4, label="Solve"):
        def _process(instance):
-            solver = copy(self)
-            solver.solve(instance)
-            return solver.x_train, solver.y_train
+            solver = deepcopy(self)
+            results = solver.solve(instance)
+            return {
+                "x_train": solver.x_train,
+                "y_train": solver.y_train,
+                "results": results,
+            }

        def _merge(results):
-            categories = results[0][0].keys()
-            x_entries = [np.vstack([r[0][c] for r in results]) for c in categories]
-            y_entries = [np.vstack([r[1][c] for r in results]) for c in categories]
+            categories = results[0]["x_train"].keys()
+            x_entries = [np.vstack([r["x_train"][c] for r in results]) for c in categories]
+            y_entries = [np.vstack([r["y_train"][c] for r in results]) for c in categories]
            x_train = dict(zip(categories, x_entries))
            y_train = dict(zip(categories, y_entries))
-            return x_train, y_train
+            results = [r["results"] for r in results]
+            return x_train, y_train, results

        results = Parallel(n_jobs=n_jobs)(
-            delayed(_process)(i)
-            for i in tqdm(instances)
+            delayed(_process)(instance)
+            for instance in tqdm(instances, desc=label)
        )
        
-        x_train, y_train = _merge(results)
+        x_train, y_train, results = _merge(results)
        self.x_train = x_train
        self.y_train = y_train
+        return results

    def fit(self, x_train_dict=None, y_train_dict=None):
        if x_train_dict is None:
@@ -113,8 +121,9 @@ class LearningSolver:
        for category in x_train_dict.keys():
            x_train = x_train_dict[category]
            y_train = y_train_dict[category]
-            self.ws_predictors[category] = deepcopy(self.ws_predictor_prototype)
-            self.ws_predictors[category].fit(x_train, y_train)
+            if self.ws_predictor_prototype is not None:
+                self.ws_predictors[category] = deepcopy(self.ws_predictor_prototype)
+                self.ws_predictors[category].fit(x_train, y_train)
            
    def save(self, filename):
        with open(filename, "wb") as file:
@@ -136,6 +145,6 @@ class LearningSolver:
    def _solve(self, model, tee=False):
        if hasattr(self.parent_solver, "set_instance"):
            self.parent_solver.set_instance(model)
-            self.parent_solver.solve(tee=tee, warmstart=True)
+            return self.parent_solver.solve(tee=tee, warmstart=True)
        else:
-            self.parent_solver.solve(model, tee=tee, warmstart=True)
+            return self.parent_solver.solve(model, tee=tee, warmstart=True)
--- a/miplearn/tests/test_benchmark.py
+++ b/miplearn/tests/test_benchmark.py
@@ -0,0 +1,41 @@
+# MIPLearn, an extensible framework for Learning-Enhanced Mixed-Integer Optimization
+# Copyright (C) 2019-2020 Argonne National Laboratory. All rights reserved.
+# Written by Alinson S. Xavier <axavier@anl.gov>
+
+from miplearn import LearningSolver, BenchmarkRunner
+from miplearn.warmstart import KnnWarmStartPredictor
+from miplearn.problems.stab import MaxStableSetInstance, MaxStableSetGenerator
+import networkx as nx
+import numpy as np
+import pyomo.environ as pe
+
+
+def test_benchmark():
+    graph = nx.cycle_graph(10)
+    base_weights = np.random.rand(10)
+    
+    # Generate training and test instances
+    train_instances = MaxStableSetGenerator(graph=graph,
+                                            base_weights=base_weights,
+                                            perturbation_scale=1.0,
+                                           ).generate(5)
+    
+    test_instances  = MaxStableSetGenerator(graph=graph,
+                                            base_weights=base_weights,
+                                            perturbation_scale=1.0,
+                                           ).generate(3)
+
+    # Training phase...
+    training_solver = LearningSolver()
+    training_solver.parallel_solve(train_instances, n_jobs=10)
+    training_solver.save("data.bin")
+
+    # Test phase...
+    test_solvers = {
+        "Strategy A": LearningSolver(ws_predictor=None),
+        "Strategy B": LearningSolver(ws_predictor=None),
+    }
+    benchmark = BenchmarkRunner(test_solvers)
+    benchmark.load_fit("data.bin")
+    benchmark.parallel_solve(test_instances, n_jobs=2)
+    print(benchmark.raw_results())
--- a/miplearn/tests/test_solver.py
+++ b/miplearn/tests/test_solver.py
@@ -38,6 +38,6 @@ def test_parallel_solve():
                                   capacity=3.0)
                 for _ in range(10)]
    solver = LearningSolver()
-    solver.parallel_solve(instances, n_jobs=2)
+    solver.parallel_solve(instances, n_jobs=3)
    assert len(solver.x_train[0]) == 10
    assert len(solver.y_train[0]) == 10
--- a/miplearn/tests/test_stab.py
+++ b/miplearn/tests/test_stab.py
@@ -20,10 +20,10 @@ def test_stab():
 def test_stab_generator():
    graph = nx.cycle_graph(5)
    base_weights = [1.0, 2.0, 3.0, 4.0, 5.0]
-    generator = MaxStableSetGenerator(graph=graph,
-                                      base_weights=base_weights,
-                                      perturbation_scale=1.0)
-    instances = [generator.generate() for _ in range(100_000)]
+    instances = MaxStableSetGenerator(graph=graph,
+                                       base_weights=base_weights,
+                                       perturbation_scale=1.0,
+                                      ).generate(100_000)
    weights = np.array([instance.weights for instance in instances])
    weights_avg = np.round(np.average(weights, axis=0), 2)
    weights_std = np.round(np.std(weights, axis=0), 2)
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,3 +4,4 @@ pytest
 sklearn
 networkx
 tqdm
+pandas
--- a/setup.py
+++ b/setup.py
@@ -13,5 +13,6 @@ setup(
        'sklearn',
        'networkx',
        'tqdm',
+        'pandas',
    ],
 )