Simplify BenchmarkRunner; update docs

2025-12-06 01:18:52 -06:00 · 2021-01-22 07:22:19 -06:00
parent aa9cefb9c9
commit f755661fa6
11 changed files with 191 additions and 178 deletions
--- a/miplearn/benchmark.py
+++ b/miplearn/benchmark.py
@@ -4,40 +4,71 @@

 import logging
 import os
-from copy import deepcopy
+from typing import Dict, Union, List

 import pandas as pd
-from tqdm.auto import tqdm

+from miplearn.instance import Instance
 from miplearn.solvers.learning import LearningSolver
+from miplearn.types import LearningSolveStats


 class BenchmarkRunner:
-    def __init__(self, solvers):
-        assert isinstance(solvers, dict)
-        for solver in solvers.values():
-            assert isinstance(solver, LearningSolver)
-        self.solvers = solvers
-        self.results = None
+    """
+    Utility class that simplifies the task of comparing the performance of different
+    solvers.

-    def solve(self, instances, tee=False):
-        for (solver_name, solver) in self.solvers.items():
-            for i in tqdm(range(len((instances)))):
-                results = solver.solve(deepcopy(instances[i]), tee=tee)
-                self._push_result(
-                    results,
-                    solver=solver,
-                    solver_name=solver_name,
-                    instance=i,
-                )
+    Example
+    -------
+    ```python
+    benchmark = BenchmarkRunner({
+        "Baseline": LearningSolver(...),
+        "Strategy A": LearningSolver(...),
+        "Strategy B": LearningSolver(...),
+        "Strategy C": LearningSolver(...),
+    })
+    benchmark.fit(train_instances)
+    benchmark.parallel_solve(test_instances, n_jobs=5)
+    benchmark.save_results("result.csv")
+    ```
+
+    Parameters
+    ----------
+    solvers: Dict[str, LearningSolver]
+        Dictionary containing the solvers to compare. Solvers may have different
+        arguments and components. The key should be the name of the solver. It
+        appears in the exported tables of results.
+    """
+
+    def __init__(self, solvers: Dict[str, LearningSolver]) -> None:
+        self.solvers: Dict[str, LearningSolver] = solvers
+        self.results = pd.DataFrame(
+            columns=[
+                "Solver",
+                "Instance",
+            ]
+        )

    def parallel_solve(
        self,
-        instances,
-        n_jobs=1,
-        n_trials=1,
-        index_offset=0,
-    ):
+        instances: Union[List[str], List[Instance]],
+        n_jobs: int = 1,
+        n_trials: int = 3,
+    ) -> None:
+        """
+        Solves the given instances in parallel and collect benchmark statistics.
+
+        Parameters
+        ----------
+        instances: Union[List[str], List[Instance]]
+            List of instances to solve. This can either be a list of instances
+            already loaded in memory, or a list of filenames pointing to pickled (and
+            optionally gzipped) files.
+        n_jobs: int
+            List of instances to solve in parallel at a time.
+        n_trials: int
+            How many times each instance should be solved.
+        """
        self._silence_miplearn_logger()
        trials = instances * n_trials
        for (solver_name, solver) in self.solvers.items():
@@ -48,68 +79,44 @@ class BenchmarkRunner:
                discard_outputs=True,
            )
            for i in range(len(trials)):
-                idx = (i % len(instances)) + index_offset
-                self._push_result(
-                    results[i],
-                    solver=solver,
-                    solver_name=solver_name,
-                    instance=idx,
-                )
+                idx = i % len(instances)
+                results[i]["Solver"] = solver_name
+                results[i]["Instance"] = idx
+                self.results = self.results.append(pd.DataFrame([results[i]]))
        self._restore_miplearn_logger()

-    def raw_results(self):
-        return self.results
+    def write_csv(self, filename: str) -> None:
+        """
+        Writes the collected results to a CSV file.

-    def save_results(self, filename):
+        Parameters
+        ----------
+        filename: str
+            The name of the file.
+        """
        os.makedirs(os.path.dirname(filename), exist_ok=True)
        self.results.to_csv(filename)

-    def load_results(self, filename):
-        self.results = pd.concat([self.results, pd.read_csv(filename, index_col=0)])
+    def fit(self, instances: Union[List[str], List[Instance]]) -> None:
+        """
+        Trains all solvers with the provided training instances.

-    def load_state(self, filename):
+        Parameters
+        ----------
+        instances:  Union[List[str], List[Instance]]
+            List of training instances. This can either be a list of instances
+            already loaded in memory, or a list of filenames pointing to pickled (and
+            optionally gzipped) files.
+
+        """
        for (solver_name, solver) in self.solvers.items():
-            solver.load_state(filename)
+            solver.fit(instances)

-    def fit(self, training_instances):
-        for (solver_name, solver) in self.solvers.items():
-            solver.fit(training_instances)
-
-    @staticmethod
-    def _compute_gap(ub, lb):
-        if lb is None or ub is None or lb * ub < 0:
-            # solver did not find a solution and/or bound, use maximum gap possible
-            return 1.0
-        elif abs(ub - lb) < 1e-6:
-            # avoid division by zero when ub = lb = 0
-            return 0.0
-        else:
-            # divide by max(abs(ub),abs(lb)) to ensure gap <= 1
-            return (ub - lb) / max(abs(ub), abs(lb))
-
-    def _push_result(self, result, solver, solver_name, instance):
-        if self.results is None:
-            self.results = pd.DataFrame(
-                # Show the following columns first in the CSV file
-                columns=[
-                    "Solver",
-                    "Instance",
-                ]
-            )
-        result["Solver"] = solver_name
-        result["Instance"] = instance
-        result["Gap"] = self._compute_gap(
-            ub=result["Upper bound"],
-            lb=result["Lower bound"],
-        )
-        result["Mode"] = solver.mode
-        self.results = self.results.append(pd.DataFrame([result]))
-
-    def _silence_miplearn_logger(self):
+    def _silence_miplearn_logger(self) -> None:
        miplearn_logger = logging.getLogger("miplearn")
        self.prev_log_level = miplearn_logger.getEffectiveLevel()
        miplearn_logger.setLevel(logging.WARNING)

-    def _restore_miplearn_logger(self):
+    def _restore_miplearn_logger(self) -> None:
        miplearn_logger = logging.getLogger("miplearn")
        miplearn_logger.setLevel(self.prev_log_level)
--- a/miplearn/components/component.py
+++ b/miplearn/components/component.py
@@ -6,7 +6,7 @@ from abc import ABC, abstractmethod
 from typing import Any, List, Union, TYPE_CHECKING

 from miplearn.instance import Instance
-from miplearn.types import MIPSolveStats, TrainingSample
+from miplearn.types import LearningSolveStats, TrainingSample

 if TYPE_CHECKING:
    from miplearn.solvers.learning import LearningSolver
@@ -47,7 +47,7 @@ class Component(ABC):
        solver: "LearningSolver",
        instance: Instance,
        model: Any,
-        stats: MIPSolveStats,
+        stats: LearningSolveStats,
        training_data: TrainingSample,
    ) -> None:
        """
@@ -61,13 +61,13 @@ class Component(ABC):
            The instance being solved.
        model: Any
            The concrete optimization model being solved.
-        stats: dict
+        stats: LearningSolveStats
            A dictionary containing statistics about the solution process, such as
            number of nodes explored and running time. Components are free to add
            their own statistics here. For example, PrimalSolutionComponent adds
            statistics regarding the number of predicted variables. All statistics in
            this dictionary are exported to the benchmark CSV file.
-        training_data: dict
+        training_data: TrainingSample
            A dictionary containing data that may be useful for training machine
            learning models and accelerating the solution process. Components are
            free to add their own training data here. For example,
--- a/miplearn/solvers/learning.py
+++ b/miplearn/solvers/learning.py
@@ -20,7 +20,7 @@ from miplearn.instance import Instance
 from miplearn.solvers import _RedirectOutput
 from miplearn.solvers.internal import InternalSolver
 from miplearn.solvers.pyomo.gurobi import GurobiPyomoSolver
-from miplearn.types import MIPSolveStats, TrainingSample
+from miplearn.types import MIPSolveStats, TrainingSample, LearningSolveStats

 logger = logging.getLogger(__name__)

@@ -127,7 +127,7 @@ class LearningSolver:
        output_filename: Optional[str] = None,
        discard_output: bool = False,
        tee: bool = False,
-    ) -> MIPSolveStats:
+    ) -> LearningSolveStats:

        # Load instance from file, if necessary
        filename = None
@@ -203,15 +203,24 @@ class LearningSolver:

        # Solve MILP
        logger.info("Solving MILP...")
-        stats = self.internal_solver.solve(
-            tee=tee,
-            iteration_cb=iteration_cb_wrapper,
-            lazy_cb=lazy_cb,
+        stats = cast(
+            LearningSolveStats,
+            self.internal_solver.solve(
+                tee=tee,
+                iteration_cb=iteration_cb_wrapper,
+                lazy_cb=lazy_cb,
+            ),
        )
        if "LP value" in training_sample.keys():
            stats["LP value"] = training_sample["LP value"]
+        stats["Solver"] = "default"
+        stats["Gap"] = self._compute_gap(
+            ub=stats["Upper bound"],
+            lb=stats["Lower bound"],
+        )
+        stats["Mode"] = self.mode

-        # Read MIP solution and bounds
+        # Add some information to training_sample
        training_sample["Lower bound"] = stats["Lower bound"]
        training_sample["Upper bound"] = stats["Upper bound"]
        training_sample["MIP log"] = stats["Log"]
@@ -242,7 +251,7 @@ class LearningSolver:
        output_filename: Optional[str] = None,
        discard_output: bool = False,
        tee: bool = False,
-    ) -> MIPSolveStats:
+    ) -> LearningSolveStats:
        """
        Solves the given instance. If trained machine-learning models are
        available, they will be used to accelerate the solution process.
@@ -275,7 +284,7 @@ class LearningSolver:

        Returns
        -------
-        MIPSolveStats
+        LearningSolveStats
            A dictionary of solver statistics containing at least the following
            keys: "Lower bound", "Upper bound", "Wallclock time", "Nodes",
            "Sense", "Log", "Warm start value" and "LP value".
@@ -311,7 +320,7 @@ class LearningSolver:
        label: str = "Solve",
        output_filenames: Optional[List[str]] = None,
        discard_outputs: bool = False,
-    ) -> List[MIPSolveStats]:
+    ) -> List[LearningSolveStats]:
        """
        Solves multiple instances in parallel.

@@ -338,7 +347,7 @@ class LearningSolver:

        Returns
        -------
-        List[MIPSolveStats]
+        List[LearningSolveStats]
            List of solver statistics, with one entry for each provided instance.
            The list is the same you would obtain by calling
            `[solver.solve(p) for p in instances]`
@@ -384,3 +393,15 @@ class LearningSolver:
    def __getstate__(self) -> Dict:
        self.internal_solver = None
        return self.__dict__
+
+    @staticmethod
+    def _compute_gap(ub: Optional[float], lb: Optional[float]) -> Optional[float]:
+        if lb is None or ub is None or lb * ub < 0:
+            # solver did not find a solution and/or bound
+            return None
+        elif abs(ub - lb) < 1e-6:
+            # avoid division by zero when ub = lb = 0
+            return 0.0
+        else:
+            # divide by max(abs(ub),abs(lb)) to ensure gap <= 1
+            return (ub - lb) / max(abs(ub), abs(lb))
--- a/miplearn/solvers/tests/test_learning_solver.py
+++ b/miplearn/solvers/tests/test_learning_solver.py
@@ -130,3 +130,13 @@ def test_simulate_perfect():
        )
        stats = solver.solve(tmp.name)
        assert stats["Lower bound"] == stats["Predicted LB"]
+
+
+def test_gap():
+    assert LearningSolver._compute_gap(ub=0.0, lb=0.0) == 0.0
+    assert LearningSolver._compute_gap(ub=1.0, lb=0.5) == 0.5
+    assert LearningSolver._compute_gap(ub=1.0, lb=1.0) == 0.0
+    assert LearningSolver._compute_gap(ub=1.0, lb=-1.0) is None
+    assert LearningSolver._compute_gap(ub=1.0, lb=None) is None
+    assert LearningSolver._compute_gap(ub=None, lb=1.0) is None
+    assert LearningSolver._compute_gap(ub=None, lb=None) is None
--- a/miplearn/tests/test_benchmark.py
+++ b/miplearn/tests/test_benchmark.py
@@ -29,21 +29,7 @@ def test_benchmark():
    benchmark = BenchmarkRunner(test_solvers)
    benchmark.fit(train_instances)
    benchmark.parallel_solve(test_instances, n_jobs=2, n_trials=2)
-    assert benchmark.raw_results().values.shape == (12, 14)
+    assert benchmark.results.values.shape == (12, 14)

-    benchmark.save_results("/tmp/benchmark.csv")
+    benchmark.write_csv("/tmp/benchmark.csv")
    assert os.path.isfile("/tmp/benchmark.csv")
-
-    benchmark = BenchmarkRunner(test_solvers)
-    benchmark.load_results("/tmp/benchmark.csv")
-    assert benchmark.raw_results().values.shape == (12, 14)
-
-
-def test_gap():
-    assert BenchmarkRunner._compute_gap(ub=0.0, lb=0.0) == 0.0
-    assert BenchmarkRunner._compute_gap(ub=1.0, lb=0.5) == 0.5
-    assert BenchmarkRunner._compute_gap(ub=1.0, lb=1.0) == 0.0
-    assert BenchmarkRunner._compute_gap(ub=1.0, lb=-1.0) == 1.0
-    assert BenchmarkRunner._compute_gap(ub=1.0, lb=None) == 1.0
-    assert BenchmarkRunner._compute_gap(ub=None, lb=1.0) == 1.0
-    assert BenchmarkRunner._compute_gap(ub=None, lb=None) == 1.0
--- a/miplearn/types.py
+++ b/miplearn/types.py
@@ -47,6 +47,25 @@ MIPSolveStats = TypedDict(
    },
 )

+LearningSolveStats = TypedDict(
+    "LearningSolveStats",
+    {
+        "Gap": Optional[float],
+        "Instance": Union[str, int],
+        "LP value": Optional[float],
+        "Log": str,
+        "Lower bound": Optional[float],
+        "Mode": str,
+        "Nodes": Optional[int],
+        "Sense": str,
+        "Solver": str,
+        "Upper bound": Optional[float],
+        "Wallclock time": float,
+        "Warm start value": Optional[float],
+    },
+    total=False,
+)
+
 IterationCallback = Callable[[], bool]

 LazyCallback = Callable[[Any, Any], None]