#!/usr/bin/env python
#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
#  Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
#  Released under the modified BSD license. See COPYING.md for more details.

"""MIPLearn Benchmark Scripts

Usage:
    benchmark.py train [options] <challenge>
    benchmark.py test-baseline [options] <challenge>
    benchmark.py test-ml [options] <challenge>
    benchmark.py charts <challenge>
    
Options:
    -h --help               Show this screen
    --jobs=<n>              Number of instances to solve simultaneously [default: 5]
    --train-time-limit=<n>  Solver time limit during training in seconds [default: 3600]
    --test-time-limit=<n>   Solver time limit during test in seconds [default: 900]
    --solver-threads=<n>    Number of threads the solver is allowed to use [default: 4]
    --solver=<s>            Internal MILP solver to use [default: gurobi]
"""
import importlib
import logging
import pathlib
import pickle
import sys
import os
import gzip
import glob

from docopt import docopt
from numpy import median
from pathlib import Path

from miplearn import (
    LearningSolver,
    BenchmarkRunner,
    setup_logger,
)

setup_logger()
logging.getLogger("gurobipy").setLevel(logging.ERROR)
logging.getLogger("pyomo.core").setLevel(logging.ERROR)
logger = logging.getLogger("benchmark")

args = docopt(__doc__)
basepath = args["<challenge>"]
n_jobs = int(args["--jobs"])
n_threads = int(args["--solver-threads"])
train_time_limit = int(args["--train-time-limit"])
test_time_limit = int(args["--test-time-limit"])
internal_solver = args["--solver"]


def write_pickle_gz(obj, filename):
    logger.info(f"Writing: {filename}")
    os.makedirs(os.path.dirname(filename), exist_ok=True)
    with gzip.GzipFile(filename, "wb") as file:
        pickle.dump(obj, file)


def read_pickle_gz(filename):
    logger.info(f"Reading: {filename}")
    with gzip.GzipFile(filename, "rb") as file:
        return pickle.load(file)


def write_multiple(objs, dirname):
    for (i, obj) in enumerate(objs):
        write_pickle_gz(obj, f"{dirname}/{i:05d}.pkl.gz")


def train():
    problem_name, challenge_name = args["<challenge>"].split("/")
    pkg = importlib.import_module(f"miplearn.problems.{problem_name}")
    challenge = getattr(pkg, challenge_name)()

    if not os.path.isdir(f"{basepath}/train"):
        write_multiple(challenge.training_instances, f"{basepath}/train")
        write_multiple(challenge.test_instances, f"{basepath}/test")

    done_filename = f"{basepath}/train/done"
    if not os.path.isfile(done_filename):
        train_instances = glob.glob(f"{basepath}/train/*.gz")
        solver = LearningSolver(
            time_limit=train_time_limit,
            solver=internal_solver,
            threads=n_threads,
        )
        solver.parallel_solve(train_instances, n_jobs=n_jobs)
        Path(done_filename).touch(exist_ok=True)


def test_baseline():
    test_instances = glob.glob(f"{basepath}/test/*.gz")
    csv_filename = f"{basepath}/benchmark_baseline.csv"
    if not os.path.isfile(csv_filename):
        solvers = {
            "baseline": LearningSolver(
                time_limit=test_time_limit,
                solver=internal_solver,
                threads=n_threads,
            ),
        }
        benchmark = BenchmarkRunner(solvers)
        benchmark.parallel_solve(test_instances, n_jobs=n_jobs)
        benchmark.save_results(csv_filename)


def test_ml():
    test_instances = glob.glob(f"{basepath}/test/*.gz")
    train_instances = glob.glob(f"{basepath}/train/*.gz")
    csv_filename = f"{basepath}/benchmark_ml.csv"
    if not os.path.isfile(csv_filename):
        solvers = {
            "ml-exact": LearningSolver(
                time_limit=test_time_limit,
                solver=internal_solver,
                threads=n_threads,
            ),
            "ml-heuristic": LearningSolver(
                time_limit=test_time_limit,
                solver=internal_solver,
                threads=n_threads,
                mode="heuristic",
            ),
        }
        benchmark = BenchmarkRunner(solvers)
        benchmark.fit(train_instances)
        benchmark.parallel_solve(test_instances, n_jobs=n_jobs)
        benchmark.save_results(csv_filename)


def charts():
    import matplotlib.pyplot as plt
    import seaborn as sns

    sns.set_style("whitegrid")
    sns.set_palette("Blues_r")
    benchmark = BenchmarkRunner({})
    benchmark.load_results(f"{basepath}/benchmark_baseline.csv")
    benchmark.load_results(f"{basepath}/benchmark_ml.csv")
    results = benchmark.raw_results()
    results["Gap (%)"] = results["Gap"] * 100.0

    sense = results.loc[0, "Sense"]
    if (sense == "min").any():
        primal_column = "Relative upper bound"
        obj_column = "Upper bound"
        predicted_obj_column = "Predicted UB"
    else:
        primal_column = "Relative lower bound"
        obj_column = "Lower bound"
        predicted_obj_column = "Predicted LB"

    palette = {"baseline": "#9b59b6", "ml-exact": "#3498db", "ml-heuristic": "#95a5a6"}
    fig, (ax1, ax2, ax3, ax4) = plt.subplots(
        nrows=1,
        ncols=4,
        figsize=(12, 4),
        gridspec_kw={"width_ratios": [2, 1, 1, 2]},
    )

    # Wallclock time
    sns.stripplot(
        x="Solver",
        y="Wallclock time",
        data=results,
        ax=ax1,
        jitter=0.25,
        palette=palette,
        size=4.0,
    )
    sns.barplot(
        x="Solver",
        y="Wallclock time",
        data=results,
        ax=ax1,
        errwidth=0.0,
        alpha=0.4,
        palette=palette,
        estimator=median,
    )
    ax1.set(ylabel="Wallclock time (s)")

    # Gap
    ax2.set_ylim(-0.5, 5.5)
    sns.stripplot(
        x="Solver",
        y="Gap (%)",
        jitter=0.25,
        data=results[results["Solver"] != "ml-heuristic"],
        ax=ax2,
        palette=palette,
        size=4.0,
    )

    # Relative primal bound
    ax3.set_ylim(0.95, 1.05)
    sns.stripplot(
        x="Solver",
        y=primal_column,
        jitter=0.25,
        data=results[results["Solver"] == "ml-heuristic"],
        ax=ax3,
        palette=palette,
    )
    sns.scatterplot(
        x=obj_column,
        y=predicted_obj_column,
        hue="Solver",
        data=results[results["Solver"] == "ml-exact"],
        ax=ax4,
        palette=palette,
    )

    # Predicted vs actual primal bound
    xlim, ylim = ax4.get_xlim(), ax4.get_ylim()
    ax4.plot(
        [-1e10, 1e10],
        [-1e10, 1e10],
        ls="-",
        color="#cccccc",
    )
    ax4.set_xlim(xlim)
    ax4.set_ylim(ylim)
    ax4.get_legend().remove()
    ax4.set(
        ylabel="Predicted value",
        xlabel="Actual value",
    )

    fig.tight_layout()
    plt.savefig(
        f"{basepath}/performance.png",
        bbox_inches="tight",
        dpi=150,
    )


if __name__ == "__main__":
    if args["train"]:
        train()
    if args["test-baseline"]:
        test_baseline()
    if args["test-ml"]:
        test_ml()
    if args["charts"]:
        charts()