Benchmark: Avoid loading instances to memory

master
Alinson S. Xavier 5 years ago
parent aecc3a311f
commit f05db85df8
No known key found for this signature in database
GPG Key ID: A796166E4E218E02

@ -13,7 +13,7 @@ Usage:
Options: Options:
-h --help Show this screen -h --help Show this screen
--jobs=<n> Number of instances to solve simultaneously [default: 10] --jobs=<n> Number of instances to solve simultaneously [default: 5]
--train-time-limit=<n> Solver time limit during training in seconds [default: 3600] --train-time-limit=<n> Solver time limit during training in seconds [default: 3600]
--test-time-limit=<n> Solver time limit during test in seconds [default: 900] --test-time-limit=<n> Solver time limit during test in seconds [default: 900]
--solver-threads=<n> Number of threads the solver is allowed to use [default: 4] --solver-threads=<n> Number of threads the solver is allowed to use [default: 4]
@ -24,27 +24,27 @@ import logging
import pathlib import pathlib
import pickle import pickle
import sys import sys
import os
import gzip
import glob
from docopt import docopt from docopt import docopt
from numpy import median from numpy import median
from pathlib import Path
from miplearn import LearningSolver, BenchmarkRunner from miplearn import (
LearningSolver,
logging.basicConfig( BenchmarkRunner,
format="%(asctime)s %(levelname).1s %(name)s: %(message)12s", setup_logger,
datefmt="%H:%M:%S",
level=logging.INFO,
stream=sys.stdout,
) )
setup_logger()
logging.getLogger("gurobipy").setLevel(logging.ERROR) logging.getLogger("gurobipy").setLevel(logging.ERROR)
logging.getLogger("pyomo.core").setLevel(logging.ERROR) logging.getLogger("pyomo.core").setLevel(logging.ERROR)
logging.getLogger("miplearn").setLevel(logging.INFO)
logger = logging.getLogger("benchmark") logger = logging.getLogger("benchmark")
args = docopt(__doc__) args = docopt(__doc__)
basepath = args["<challenge>"] basepath = args["<challenge>"]
pathlib.Path(basepath).mkdir(parents=True, exist_ok=True)
n_jobs = int(args["--jobs"]) n_jobs = int(args["--jobs"])
n_threads = int(args["--solver-threads"]) n_threads = int(args["--solver-threads"])
train_time_limit = int(args["--train-time-limit"]) train_time_limit = int(args["--train-time-limit"])
@ -52,74 +52,83 @@ test_time_limit = int(args["--test-time-limit"])
internal_solver = args["--solver"] internal_solver = args["--solver"]
def save(obj, filename): def write_pickle_gz(obj, filename):
logger.info("Writing %s..." % filename) logger.info(f"Writing: {filename}")
with open(filename, "wb") as file: os.makedirs(os.path.dirname(filename), exist_ok=True)
with gzip.GzipFile(filename, "wb") as file:
pickle.dump(obj, file) pickle.dump(obj, file)
def load(filename): def read_pickle_gz(filename):
import pickle logger.info(f"Reading: {filename}")
with gzip.GzipFile(filename, "rb") as file:
with open(filename, "rb") as file:
return pickle.load(file) return pickle.load(file)
def write_multiple(objs, dirname):
for (i, obj) in enumerate(objs):
write_pickle_gz(obj, f"{dirname}/{i:05d}.pkl.gz")
def train(): def train():
problem_name, challenge_name = args["<challenge>"].split("/") problem_name, challenge_name = args["<challenge>"].split("/")
pkg = importlib.import_module("miplearn.problems.%s" % problem_name) pkg = importlib.import_module(f"miplearn.problems.{problem_name}")
challenge = getattr(pkg, challenge_name)() challenge = getattr(pkg, challenge_name)()
train_instances = challenge.training_instances
test_instances = challenge.test_instances
solver = LearningSolver(
time_limit=train_time_limit,
solver=internal_solver,
threads=n_threads,
)
solver.parallel_solve(train_instances, n_jobs=n_jobs)
save(train_instances, "%s/train_instances.bin" % basepath)
save(test_instances, "%s/test_instances.bin" % basepath)
if not os.path.isdir(f"{basepath}/train"):
write_multiple(challenge.training_instances, f"{basepath}/train")
write_multiple(challenge.test_instances, f"{basepath}/test")
def test_baseline(): done_filename = f"{basepath}/train/done"
test_instances = load("%s/test_instances.bin" % basepath) if not os.path.isfile(done_filename):
solvers = { train_instances = glob.glob(f"{basepath}/train/*.gz")
"baseline": LearningSolver( solver = LearningSolver(
time_limit=test_time_limit, time_limit=train_time_limit,
solver=internal_solver, solver=internal_solver,
threads=n_threads, threads=n_threads,
), )
} solver.parallel_solve(train_instances, n_jobs=n_jobs)
benchmark = BenchmarkRunner(solvers) Path(done_filename).touch(exist_ok=True)
benchmark.parallel_solve(test_instances, n_jobs=n_jobs)
benchmark.save_results("%s/benchmark_baseline.csv" % basepath)
def test_baseline():
test_instances = glob.glob(f"{basepath}/test/*.gz")
csv_filename = f"{basepath}/benchmark_baseline.csv"
if not os.path.isfile(csv_filename):
solvers = {
"baseline": LearningSolver(
time_limit=test_time_limit,
solver=internal_solver,
threads=n_threads,
),
}
benchmark = BenchmarkRunner(solvers)
benchmark.parallel_solve(test_instances, n_jobs=n_jobs)
benchmark.save_results(csv_filename)
def test_ml(): def test_ml():
logger.info("Loading instances...") test_instances = glob.glob(f"{basepath}/test/*.gz")
train_instances = load("%s/train_instances.bin" % basepath) train_instances = glob.glob(f"{basepath}/train/*.gz")
test_instances = load("%s/test_instances.bin" % basepath) csv_filename = f"{basepath}/benchmark_ml.csv"
solvers = { if not os.path.isfile(csv_filename):
"ml-exact": LearningSolver( solvers = {
time_limit=test_time_limit, "ml-exact": LearningSolver(
solver=internal_solver, time_limit=test_time_limit,
threads=n_threads, solver=internal_solver,
), threads=n_threads,
"ml-heuristic": LearningSolver( ),
time_limit=test_time_limit, "ml-heuristic": LearningSolver(
solver=internal_solver, time_limit=test_time_limit,
threads=n_threads, solver=internal_solver,
mode="heuristic", threads=n_threads,
), mode="heuristic",
} ),
benchmark = BenchmarkRunner(solvers) }
logger.info("Loading results...") benchmark = BenchmarkRunner(solvers)
benchmark.load_results("%s/benchmark_baseline.csv" % basepath) benchmark.fit(train_instances)
logger.info("Fitting...") benchmark.parallel_solve(test_instances, n_jobs=n_jobs)
benchmark.fit(train_instances) benchmark.save_results(csv_filename)
logger.info("Solving...")
benchmark.parallel_solve(test_instances, n_jobs=n_jobs)
benchmark.save_results("%s/benchmark_ml.csv" % basepath)
def charts(): def charts():
@ -129,18 +138,19 @@ def charts():
sns.set_style("whitegrid") sns.set_style("whitegrid")
sns.set_palette("Blues_r") sns.set_palette("Blues_r")
benchmark = BenchmarkRunner({}) benchmark = BenchmarkRunner({})
benchmark.load_results("%s/benchmark_ml.csv" % basepath) benchmark.load_results(f"{basepath}/benchmark_baseline.csv")
benchmark.load_results(f"{basepath}/benchmark_ml.csv")
results = benchmark.raw_results() results = benchmark.raw_results()
results["Gap (%)"] = results["Gap"] * 100.0 results["Gap (%)"] = results["Gap"] * 100.0
sense = results.loc[0, "Sense"] sense = results.loc[0, "Sense"]
if sense == "min": if (sense == "min").any():
primal_column = "Relative Upper Bound" primal_column = "Relative upper bound"
obj_column = "Upper Bound" obj_column = "Upper bound"
predicted_obj_column = "Predicted UB" predicted_obj_column = "Predicted UB"
else: else:
primal_column = "Relative Lower Bound" primal_column = "Relative lower bound"
obj_column = "Lower Bound" obj_column = "Lower bound"
predicted_obj_column = "Predicted LB" predicted_obj_column = "Predicted LB"
palette = {"baseline": "#9b59b6", "ml-exact": "#3498db", "ml-heuristic": "#95a5a6"} palette = {"baseline": "#9b59b6", "ml-exact": "#3498db", "ml-heuristic": "#95a5a6"}
@ -150,9 +160,11 @@ def charts():
figsize=(12, 4), figsize=(12, 4),
gridspec_kw={"width_ratios": [2, 1, 1, 2]}, gridspec_kw={"width_ratios": [2, 1, 1, 2]},
) )
# Wallclock time
sns.stripplot( sns.stripplot(
x="Solver", x="Solver",
y="Wallclock Time", y="Wallclock time",
data=results, data=results,
ax=ax1, ax=ax1,
jitter=0.25, jitter=0.25,
@ -161,7 +173,7 @@ def charts():
) )
sns.barplot( sns.barplot(
x="Solver", x="Solver",
y="Wallclock Time", y="Wallclock time",
data=results, data=results,
ax=ax1, ax=ax1,
errwidth=0.0, errwidth=0.0,
@ -169,7 +181,9 @@ def charts():
palette=palette, palette=palette,
estimator=median, estimator=median,
) )
ax1.set(ylabel="Wallclock Time (s)") ax1.set(ylabel="Wallclock time (s)")
# Gap
ax2.set_ylim(-0.5, 5.5) ax2.set_ylim(-0.5, 5.5)
sns.stripplot( sns.stripplot(
x="Solver", x="Solver",
@ -180,6 +194,8 @@ def charts():
palette=palette, palette=palette,
size=4.0, size=4.0,
) )
# Relative primal bound
ax3.set_ylim(0.95, 1.05) ax3.set_ylim(0.95, 1.05)
sns.stripplot( sns.stripplot(
x="Solver", x="Solver",
@ -189,7 +205,6 @@ def charts():
ax=ax3, ax=ax3,
palette=palette, palette=palette,
) )
sns.scatterplot( sns.scatterplot(
x=obj_column, x=obj_column,
y=predicted_obj_column, y=predicted_obj_column,
@ -198,14 +213,29 @@ def charts():
ax=ax4, ax=ax4,
palette=palette, palette=palette,
) )
# Predicted vs actual primal bound
xlim, ylim = ax4.get_xlim(), ax4.get_ylim() xlim, ylim = ax4.get_xlim(), ax4.get_ylim()
ax4.plot([-1e10, 1e10], [-1e10, 1e10], ls="-", color="#cccccc") ax4.plot(
[-1e10, 1e10],
[-1e10, 1e10],
ls="-",
color="#cccccc",
)
ax4.set_xlim(xlim) ax4.set_xlim(xlim)
ax4.set_ylim(ylim) ax4.set_ylim(ylim)
ax4.get_legend().remove() ax4.get_legend().remove()
ax4.set(
ylabel="Predicted value",
xlabel="Actual value",
)
fig.tight_layout() fig.tight_layout()
plt.savefig("%s/performance.png" % basepath, bbox_inches="tight", dpi=150) plt.savefig(
f"{basepath}/performance.png",
bbox_inches="tight",
dpi=150,
)
if __name__ == "__main__": if __name__ == "__main__":

Loading…
Cancel
Save