Benchmark: Avoid loading instances to memory

2025-12-06 09:28:51 -06:00 · 2021-01-19 09:07:55 -06:00
parent aecc3a311f
commit f05db85df8
1 changed files with 106 additions and 76 deletions
--- a/benchmark/benchmark.py
+++ b/benchmark/benchmark.py
@@ -13,7 +13,7 @@ Usage:
 Options:
    -h --help               Show this screen
-    --jobs=<n>              Number of instances to solve simultaneously [default: 10]
+    --jobs=<n>              Number of instances to solve simultaneously [default: 5]
    --train-time-limit=<n>  Solver time limit during training in seconds [default: 3600]
    --test-time-limit=<n>   Solver time limit during test in seconds [default: 900]
    --solver-threads=<n>    Number of threads the solver is allowed to use [default: 4]
@@ -24,27 +24,27 @@ import logging
 import pathlib
 import pickle
 import sys
 import os
 import gzip
 import glob
 from docopt import docopt
 from numpy import median
 from pathlib import Path
-from miplearn import LearningSolver, BenchmarkRunner
+from miplearn import (
-
+    LearningSolver,
-logging.basicConfig(
+    BenchmarkRunner,
-    format="%(asctime)s %(levelname).1s %(name)s: %(message)12s",
+    setup_logger,
    datefmt="%H:%M:%S",
    level=logging.INFO,
    stream=sys.stdout,
 )
 setup_logger()
 logging.getLogger("gurobipy").setLevel(logging.ERROR)
 logging.getLogger("pyomo.core").setLevel(logging.ERROR)
 logging.getLogger("miplearn").setLevel(logging.INFO)
 logger = logging.getLogger("benchmark")
 args = docopt(__doc__)
 basepath = args["<challenge>"]
 pathlib.Path(basepath).mkdir(parents=True, exist_ok=True)
 n_jobs = int(args["--jobs"])
 n_threads = int(args["--solver-threads"])
 train_time_limit = int(args["--train-time-limit"])
@@ -52,74 +52,83 @@ test_time_limit = int(args["--test-time-limit"])
 internal_solver = args["--solver"]
-def save(obj, filename):
+def write_pickle_gz(obj, filename):
-    logger.info("Writing %s..." % filename)
+    logger.info(f"Writing: {filename}")
-    with open(filename, "wb") as file:
+    os.makedirs(os.path.dirname(filename), exist_ok=True)
    with gzip.GzipFile(filename, "wb") as file:
        pickle.dump(obj, file)
-def load(filename):
+def read_pickle_gz(filename):
-    import pickle
+    logger.info(f"Reading: {filename}")
-
+    with gzip.GzipFile(filename, "rb") as file:
    with open(filename, "rb") as file:
        return pickle.load(file)
 def write_multiple(objs, dirname):
    for (i, obj) in enumerate(objs):
        write_pickle_gz(obj, f"{dirname}/{i:05d}.pkl.gz")
 def train():
    problem_name, challenge_name = args["<challenge>"].split("/")
-    pkg = importlib.import_module("miplearn.problems.%s" % problem_name)
+    pkg = importlib.import_module(f"miplearn.problems.{problem_name}")
    challenge = getattr(pkg, challenge_name)()
-    train_instances = challenge.training_instances
+
-    test_instances = challenge.test_instances
+    if not os.path.isdir(f"{basepath}/train"):
-    solver = LearningSolver(
+        write_multiple(challenge.training_instances, f"{basepath}/train")
-        time_limit=train_time_limit,
+        write_multiple(challenge.test_instances, f"{basepath}/test")
-        solver=internal_solver,
+
-        threads=n_threads,
+    done_filename = f"{basepath}/train/done"
-    )
+    if not os.path.isfile(done_filename):
-    solver.parallel_solve(train_instances, n_jobs=n_jobs)
+        train_instances = glob.glob(f"{basepath}/train/*.gz")
-    save(train_instances, "%s/train_instances.bin" % basepath)
+        solver = LearningSolver(
-    save(test_instances, "%s/test_instances.bin" % basepath)
+            time_limit=train_time_limit,
            solver=internal_solver,
            threads=n_threads,
        )
        solver.parallel_solve(train_instances, n_jobs=n_jobs)
        Path(done_filename).touch(exist_ok=True)
 def test_baseline():
-    test_instances = load("%s/test_instances.bin" % basepath)
+    test_instances = glob.glob(f"{basepath}/test/*.gz")
-    solvers = {
+    csv_filename = f"{basepath}/benchmark_baseline.csv"
-        "baseline": LearningSolver(
+    if not os.path.isfile(csv_filename):
-            time_limit=test_time_limit,
+        solvers = {
-            solver=internal_solver,
+            "baseline": LearningSolver(
-            threads=n_threads,
+                time_limit=test_time_limit,
-        ),
+                solver=internal_solver,
-    }
+                threads=n_threads,
-    benchmark = BenchmarkRunner(solvers)
+            ),
-    benchmark.parallel_solve(test_instances, n_jobs=n_jobs)
+        }
-    benchmark.save_results("%s/benchmark_baseline.csv" % basepath)
+        benchmark = BenchmarkRunner(solvers)
        benchmark.parallel_solve(test_instances, n_jobs=n_jobs)
        benchmark.save_results(csv_filename)
 def test_ml():
-    logger.info("Loading instances...")
+    test_instances = glob.glob(f"{basepath}/test/*.gz")
-    train_instances = load("%s/train_instances.bin" % basepath)
+    train_instances = glob.glob(f"{basepath}/train/*.gz")
-    test_instances = load("%s/test_instances.bin" % basepath)
+    csv_filename = f"{basepath}/benchmark_ml.csv"
-    solvers = {
+    if not os.path.isfile(csv_filename):
-        "ml-exact": LearningSolver(
+        solvers = {
-            time_limit=test_time_limit,
+            "ml-exact": LearningSolver(
-            solver=internal_solver,
+                time_limit=test_time_limit,
-            threads=n_threads,
+                solver=internal_solver,
-        ),
+                threads=n_threads,
-        "ml-heuristic": LearningSolver(
+            ),
-            time_limit=test_time_limit,
+            "ml-heuristic": LearningSolver(
-            solver=internal_solver,
+                time_limit=test_time_limit,
-            threads=n_threads,
+                solver=internal_solver,
-            mode="heuristic",
+                threads=n_threads,
-        ),
+                mode="heuristic",
-    }
+            ),
-    benchmark = BenchmarkRunner(solvers)
+        }
-    logger.info("Loading results...")
+        benchmark = BenchmarkRunner(solvers)
-    benchmark.load_results("%s/benchmark_baseline.csv" % basepath)
+        benchmark.fit(train_instances)
-    logger.info("Fitting...")
+        benchmark.parallel_solve(test_instances, n_jobs=n_jobs)
-    benchmark.fit(train_instances)
+        benchmark.save_results(csv_filename)
    logger.info("Solving...")
    benchmark.parallel_solve(test_instances, n_jobs=n_jobs)
    benchmark.save_results("%s/benchmark_ml.csv" % basepath)
 def charts():
@@ -129,18 +138,19 @@ def charts():
    sns.set_style("whitegrid")
    sns.set_palette("Blues_r")
    benchmark = BenchmarkRunner({})
-    benchmark.load_results("%s/benchmark_ml.csv" % basepath)
+    benchmark.load_results(f"{basepath}/benchmark_baseline.csv")
    benchmark.load_results(f"{basepath}/benchmark_ml.csv")
    results = benchmark.raw_results()
    results["Gap (%)"] = results["Gap"] * 100.0
    sense = results.loc[0, "Sense"]
-    if sense == "min":
+    if (sense == "min").any():
-        primal_column = "Relative Upper Bound"
+        primal_column = "Relative upper bound"
-        obj_column = "Upper Bound"
+        obj_column = "Upper bound"
        predicted_obj_column = "Predicted UB"
    else:
-        primal_column = "Relative Lower Bound"
+        primal_column = "Relative lower bound"
-        obj_column = "Lower Bound"
+        obj_column = "Lower bound"
        predicted_obj_column = "Predicted LB"
    palette = {"baseline": "#9b59b6", "ml-exact": "#3498db", "ml-heuristic": "#95a5a6"}
@@ -150,9 +160,11 @@ def charts():
        figsize=(12, 4),
        gridspec_kw={"width_ratios": [2, 1, 1, 2]},
    )
    # Wallclock time
    sns.stripplot(
        x="Solver",
-        y="Wallclock Time",
+        y="Wallclock time",
        data=results,
        ax=ax1,
        jitter=0.25,
@@ -161,7 +173,7 @@ def charts():
    )
    sns.barplot(
        x="Solver",
-        y="Wallclock Time",
+        y="Wallclock time",
        data=results,
        ax=ax1,
        errwidth=0.0,
@@ -169,7 +181,9 @@ def charts():
        palette=palette,
        estimator=median,
    )
-    ax1.set(ylabel="Wallclock Time (s)")
+    ax1.set(ylabel="Wallclock time (s)")
    # Gap
    ax2.set_ylim(-0.5, 5.5)
    sns.stripplot(
        x="Solver",
@@ -180,6 +194,8 @@ def charts():
        palette=palette,
        size=4.0,
    )
    # Relative primal bound
    ax3.set_ylim(0.95, 1.05)
    sns.stripplot(
        x="Solver",
@@ -189,7 +205,6 @@ def charts():
        ax=ax3,
        palette=palette,
    )
    sns.scatterplot(
        x=obj_column,
        y=predicted_obj_column,
@@ -198,14 +213,29 @@ def charts():
        ax=ax4,
        palette=palette,
    )
    # Predicted vs actual primal bound
    xlim, ylim = ax4.get_xlim(), ax4.get_ylim()
-    ax4.plot([-1e10, 1e10], [-1e10, 1e10], ls="-", color="#cccccc")
+    ax4.plot(
        [-1e10, 1e10],
        [-1e10, 1e10],
        ls="-",
        color="#cccccc",
    )
    ax4.set_xlim(xlim)
    ax4.set_ylim(ylim)
    ax4.get_legend().remove()
    ax4.set(
        ylabel="Predicted value",
        xlabel="Actual value",
    )
    fig.tight_layout()
-    plt.savefig("%s/performance.png" % basepath, bbox_inches="tight", dpi=150)
+    plt.savefig(
        f"{basepath}/performance.png",
        bbox_inches="tight",
        dpi=150,
    )
 if __name__ == "__main__":