Implement BenchmarkRunner

2025-12-06 09:28:51 -06:00 · 2020-01-23 21:59:59 -06:00
parent 07090bac9e
commit 8f141e6a9d
10 changed files with 161 additions and 29 deletions
--- a/README.md
+++ b/README.md
@@ -7,13 +7,14 @@ Table of contents
 -----------------
 * [Features](#features)
 * [Installation](#installation)
-* [Typical Usage](#typical-usage)
+* [Basic usage](#basic-usage)
    * [Using LearningSolver](#using-learningsolver)
    * [Selecting the internal MIP solver](#selecting-the-internal-mip-solver)
    * [Describing problem instances](#describing-problem-instances)
    * [Obtaining heuristic solutions](#obtaining-heuristic-solutions)
    * [Saving and loading solver state](#saving-and-loading-solver-state)
    * [Solving training instances in parallel](#solving-training-instances-in-parallel)
 * [Benchmark](#benchmark)
 * [Current Limitations](#current-limitations)
 * [References](#references)
 * [Authors](#authors)
@@ -38,8 +39,8 @@ The package is currently only available for Python and Pyomo. It can be installe
 pip install git+ssh://git@github.com/iSoron/miplearn.git
 ```
-Typical Usage
+Basic Usage
-------------
+-----------
 ### Using `LearningSolver`
@@ -136,6 +137,37 @@ solver.load("/tmp/data.bin")
 solver.solve(test_instance)
 ```
 Benchmark
 ---------
 MIPLearn provides the utility class `BenchmarkRunner`, which simplifies the task of comparing the performance of different solvers. The snippet below shows its basic usage:
 ```python
 from miplearn import BenchmarkRunner, LearningSolver
 # Create train and test instances
 train_instances = [...]
 test_instances  = [...]
 # Training phase...
 training_solver = LearningSolver(...)
 training_solver.parallel_solve(train_instances, n_jobs=10)
 training_solver.save("data.bin")
 # Test phase...
 test_solvers = {
    "Baseline": LearningSolver(...), # each solver may have different parameters
    "Strategy A": LearningSolver(...), 
    "Strategy B": LearningSolver(...),
    "Strategy C": LearningSolver(...),
 }
 benchmark = BenchmarkRunner(test_solvers)
 benchmark.load_fit("data.bin")
 benchmark.parallel_solve(test_instances, n_jobs=2)
 print(benchmark.raw_results())
 ```
 The method `load_fit` loads the saved training data into each one of the provided solvers and trains their respective ML models. The method `parallel_solve` solves the test instances in parallel, and collects solver statistics such as running time and optimal value. Finally, `raw_results` produces a Pandas DataFrame containing the results.
 Current Limitations
 -------------------
--- a/miplearn/init.py
+++ b/miplearn/init.py
@@ -4,3 +4,4 @@
 from .instance import Instance
 from .solvers import LearningSolver
 from .benchmark import BenchmarkRunner
--- a/miplearn/benchmark.py
+++ b/miplearn/benchmark.py
@@ -0,0 +1,45 @@
 # MIPLearn, an extensible framework for Learning-Enhanced Mixed-Integer Optimization
 # Copyright (C) 2019-2020 Argonne National Laboratory. All rights reserved.
 # Written by Alinson S. Xavier <axavier@anl.gov>
 from .solvers import LearningSolver
 import pandas as pd
 class BenchmarkRunner:
    def __init__(self, solvers):
        assert isinstance(solvers, dict)
        for solver in solvers.values():
            assert isinstance(solver, LearningSolver)
        self.solvers = solvers
        self.results = None
    def load_fit(self, filename):
        for (name, solver) in self.solvers.items():
            solver.load(filename)
            solver.fit()
    def parallel_solve(self, instances, n_jobs=1):
        self.results = pd.DataFrame(columns=["Solver",
                                             "Instance",
                                             "Wallclock Time",
                                             "Optimal Value",
                                            ])
        for (name, solver) in self.solvers.items():
            results = solver.parallel_solve(instances, n_jobs=n_jobs, label=name)
            for i in range(len(instances)):
                wallclock_time = None
                for key in ["Time", "Wall time", "Wallclock time"]:
                    if key not in results[i]["Solver"][0].keys():
                        continue
                    if str(results[i]["Solver"][0][key]) == "<undefined>":
                        continue
                    wallclock_time = float(results[i]["Solver"][0][key])
                self.results = self.results.append({
                    "Solver": name,
                    "Instance": i,
                    "Wallclock Time": wallclock_time,
                    "Optimal Value": results[i]["Problem"][0]["Lower bound"]
                }, ignore_index=True)
    def raw_results(self):
        return self.results
--- a/miplearn/problems/stab.py
+++ b/miplearn/problems/stab.py
@@ -15,10 +15,12 @@ class MaxStableSetGenerator:
        self.base_weights = base_weights
        self.perturbation_scale = perturbation_scale
-    def generate(self):
+    def generate(self, n_samples):
        def _sample():
            perturbation = np.random.rand(self.graph.number_of_nodes()) * self.perturbation_scale
            weights = self.base_weights + perturbation
            return MaxStableSetInstance(self.graph, weights)
        return [_sample() for _ in range(n_samples)]
 class MaxStableSetInstance(Instance):
--- a/miplearn/solvers.py
+++ b/miplearn/solvers.py
@@ -72,7 +72,7 @@ class LearningSolver:
                            var[index].value = 1
        # Solve MILP
-        self._solve(model, tee=tee)
+        solve_results = self._solve(model, tee=tee)
        # Update y_train
        for category in var_split.keys():
@@ -83,28 +83,36 @@ class LearningSolver:
            else:
                self.y_train[category] = np.vstack([self.y_train[category], y])
-    def parallel_solve(self, instances, n_jobs=4):
+        return solve_results
    def parallel_solve(self, instances, n_jobs=4, label="Solve"):
        def _process(instance):
-            solver = copy(self)
+            solver = deepcopy(self)
-            solver.solve(instance)
+            results = solver.solve(instance)
-            return solver.x_train, solver.y_train
+            return {
                "x_train": solver.x_train,
                "y_train": solver.y_train,
                "results": results,
            }
        def _merge(results):
-            categories = results[0][0].keys()
+            categories = results[0]["x_train"].keys()
-            x_entries = [np.vstack([r[0][c] for r in results]) for c in categories]
+            x_entries = [np.vstack([r["x_train"][c] for r in results]) for c in categories]
-            y_entries = [np.vstack([r[1][c] for r in results]) for c in categories]
+            y_entries = [np.vstack([r["y_train"][c] for r in results]) for c in categories]
            x_train = dict(zip(categories, x_entries))
            y_train = dict(zip(categories, y_entries))
-            return x_train, y_train
+            results = [r["results"] for r in results]
            return x_train, y_train, results
        results = Parallel(n_jobs=n_jobs)(
-            delayed(_process)(i)
+            delayed(_process)(instance)
-            for i in tqdm(instances)
+            for instance in tqdm(instances, desc=label)
        )
-        x_train, y_train = _merge(results)
+        x_train, y_train, results = _merge(results)
        self.x_train = x_train
        self.y_train = y_train
        return results
    def fit(self, x_train_dict=None, y_train_dict=None):
        if x_train_dict is None:
@@ -113,6 +121,7 @@ class LearningSolver:
        for category in x_train_dict.keys():
            x_train = x_train_dict[category]
            y_train = y_train_dict[category]
            if self.ws_predictor_prototype is not None:
                self.ws_predictors[category] = deepcopy(self.ws_predictor_prototype)
                self.ws_predictors[category].fit(x_train, y_train)
@@ -136,6 +145,6 @@ class LearningSolver:
    def _solve(self, model, tee=False):
        if hasattr(self.parent_solver, "set_instance"):
            self.parent_solver.set_instance(model)
-            self.parent_solver.solve(tee=tee, warmstart=True)
+            return self.parent_solver.solve(tee=tee, warmstart=True)
        else:
-            self.parent_solver.solve(model, tee=tee, warmstart=True)
+            return self.parent_solver.solve(model, tee=tee, warmstart=True)
--- a/miplearn/tests/test_benchmark.py
+++ b/miplearn/tests/test_benchmark.py
@@ -0,0 +1,41 @@
 # MIPLearn, an extensible framework for Learning-Enhanced Mixed-Integer Optimization
 # Copyright (C) 2019-2020 Argonne National Laboratory. All rights reserved.
 # Written by Alinson S. Xavier <axavier@anl.gov>
 from miplearn import LearningSolver, BenchmarkRunner
 from miplearn.warmstart import KnnWarmStartPredictor
 from miplearn.problems.stab import MaxStableSetInstance, MaxStableSetGenerator
 import networkx as nx
 import numpy as np
 import pyomo.environ as pe
 def test_benchmark():
    graph = nx.cycle_graph(10)
    base_weights = np.random.rand(10)
    # Generate training and test instances
    train_instances = MaxStableSetGenerator(graph=graph,
                                            base_weights=base_weights,
                                            perturbation_scale=1.0,
                                           ).generate(5)
    test_instances  = MaxStableSetGenerator(graph=graph,
                                            base_weights=base_weights,
                                            perturbation_scale=1.0,
                                           ).generate(3)
    # Training phase...
    training_solver = LearningSolver()
    training_solver.parallel_solve(train_instances, n_jobs=10)
    training_solver.save("data.bin")
    # Test phase...
    test_solvers = {
        "Strategy A": LearningSolver(ws_predictor=None),
        "Strategy B": LearningSolver(ws_predictor=None),
    }
    benchmark = BenchmarkRunner(test_solvers)
    benchmark.load_fit("data.bin")
    benchmark.parallel_solve(test_instances, n_jobs=2)
    print(benchmark.raw_results())
--- a/miplearn/tests/test_solver.py
+++ b/miplearn/tests/test_solver.py
@@ -38,6 +38,6 @@ def test_parallel_solve():
                                   capacity=3.0)
                 for _ in range(10)]
    solver = LearningSolver()
-    solver.parallel_solve(instances, n_jobs=2)
+    solver.parallel_solve(instances, n_jobs=3)
    assert len(solver.x_train[0]) == 10
    assert len(solver.y_train[0]) == 10
--- a/miplearn/tests/test_stab.py
+++ b/miplearn/tests/test_stab.py
@@ -20,10 +20,10 @@ def test_stab():
 def test_stab_generator():
    graph = nx.cycle_graph(5)
    base_weights = [1.0, 2.0, 3.0, 4.0, 5.0]
-    generator = MaxStableSetGenerator(graph=graph,
+    instances = MaxStableSetGenerator(graph=graph,
                                       base_weights=base_weights,
-                                      perturbation_scale=1.0)
+                                       perturbation_scale=1.0,
-    instances = [generator.generate() for _ in range(100_000)]
+                                      ).generate(100_000)
    weights = np.array([instance.weights for instance in instances])
    weights_avg = np.round(np.average(weights, axis=0), 2)
    weights_std = np.round(np.std(weights, axis=0), 2)
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,3 +4,4 @@ pytest
 sklearn
 networkx
 tqdm
 pandas
--- a/setup.py
+++ b/setup.py
@@ -13,5 +13,6 @@ setup(
        'sklearn',
        'networkx',
        'tqdm',
        'pandas',
    ],
 )