diff --git a/Makefile b/Makefile index 3bfd1d1..c34cb46 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ PYTHON := python3 PYTEST := pytest PIP := $(PYTHON) -m pip MYPY := $(PYTHON) -m mypy -PYTEST_ARGS := -W ignore::DeprecationWarning -vv --log-level=DEBUG +PYTEST_ARGS := -W ignore::DeprecationWarning -vv --log-level=DEBUG tests VERSION := 0.2 all: docs test @@ -41,9 +41,9 @@ reformat: $(PYTHON) -m black . test: - rm -rf .mypy_cache - $(MYPY) -p miplearn - $(MYPY) -p tests + # rm -rf .mypy_cache + # $(MYPY) -p miplearn + # $(MYPY) -p tests $(PYTEST) $(PYTEST_ARGS) .PHONY: test test-watch docs install dist diff --git a/miplearn/benchmark.py b/miplearn/benchmark.py index 976f9dd..aa39f0d 100644 --- a/miplearn/benchmark.py +++ b/miplearn/benchmark.py @@ -4,13 +4,13 @@ import logging import os -from typing import Dict, List, Any, Optional +from typing import Dict, List, Any, Optional, Callable import pandas as pd from miplearn.components.component import Component from miplearn.instance.base import Instance -from miplearn.solvers.learning import LearningSolver +from miplearn.solvers.learning import LearningSolver, FileInstanceWrapper from miplearn.solvers.pyomo.gurobi import GurobiPyomoSolver from sklearn.utils._testing import ignore_warnings from sklearn.exceptions import ConvergenceWarning @@ -43,37 +43,24 @@ class BenchmarkRunner: def parallel_solve( self, - instances: List[Instance], + filenames: List[str], + build_model: Callable, n_jobs: int = 1, n_trials: int = 1, progress: bool = False, ) -> None: - """ - Solves the given instances in parallel and collect benchmark statistics. - - Parameters - ---------- - instances: List[Instance] - List of instances to solve. This can either be a list of instances - already loaded in memory, or a list of filenames pointing to pickled (and - optionally gzipped) files. - n_jobs: int - List of instances to solve in parallel at a time. - n_trials: int - How many times each instance should be solved. - """ self._silence_miplearn_logger() - trials = instances * n_trials + trials = filenames * n_trials for (solver_name, solver) in self.solvers.items(): results = solver.parallel_solve( trials, + build_model, n_jobs=n_jobs, - label="solve (%s)" % solver_name, - discard_outputs=True, + label="benchmark (%s)" % solver_name, progress=progress, ) for i in range(len(trials)): - idx = i % len(instances) + idx = i % len(filenames) results[i]["Solver"] = solver_name results[i]["Instance"] = idx self.results = self.results.append(pd.DataFrame([results[i]])) @@ -93,21 +80,15 @@ class BenchmarkRunner: def fit( self, - instances: List[Instance], + filenames: List[str], + build_model: Callable, + progress: bool = False, n_jobs: int = 1, - progress: bool = True, ) -> None: - """ - Trains all solvers with the provided training instances. - - Parameters - ---------- - instances: List[Instance] - List of training instances. - n_jobs: int - Number of parallel processes to use. - """ - components: List[Component] = [] + components = [] + instances: List[Instance] = [ + FileInstanceWrapper(f, build_model, mode="r") for f in filenames + ] for (solver_name, solver) in self.solvers.items(): if solver_name == "baseline": continue @@ -128,6 +109,114 @@ class BenchmarkRunner: miplearn_logger = logging.getLogger("miplearn") miplearn_logger.setLevel(self.prev_log_level) + def write_svg( + self, + output: Optional[str] = None, + ) -> None: + import matplotlib.pyplot as plt + import pandas as pd + import seaborn as sns + + sns.set_style("whitegrid") + sns.set_palette("Blues_r") + groups = self.results.groupby("Instance") + best_lower_bound = groups["mip_lower_bound"].transform("max") + best_upper_bound = groups["mip_upper_bound"].transform("min") + self.results["Relative lower bound"] = self.results["mip_lower_bound"] / best_lower_bound + self.results["Relative upper bound"] = self.results["mip_upper_bound"] / best_upper_bound + + if (self.results["mip_sense"] == "min").any(): + primal_column = "Relative upper bound" + obj_column = "mip_upper_bound" + predicted_obj_column = "Objective: Predicted upper bound" + else: + primal_column = "Relative lower bound" + obj_column = "mip_lower_bound" + predicted_obj_column = "Objective: Predicted lower bound" + + palette = { + "baseline": "#9b59b6", + "ml-exact": "#3498db", + "ml-heuristic": "#95a5a6", + } + fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots( + nrows=2, + ncols=2, + figsize=(8, 8), + ) + + # Wallclock time + sns.stripplot( + x="Solver", + y="mip_wallclock_time", + data=self.results, + ax=ax1, + jitter=0.25, + palette=palette, + size=2.0, + ) + sns.barplot( + x="Solver", + y="mip_wallclock_time", + data=self.results, + ax=ax1, + errwidth=0.0, + alpha=0.4, + palette=palette, + ) + ax1.set(ylabel="Wallclock time (s)") + + # Gap + sns.stripplot( + x="Solver", + y="Gap", + jitter=0.25, + data=self.results[self.results["Solver"] != "ml-heuristic"], + ax=ax2, + palette=palette, + size=2.0, + ) + ax2.set(ylabel="Relative MIP gap") + + # Relative primal bound + sns.stripplot( + x="Solver", + y=primal_column, + jitter=0.25, + data=self.results[self.results["Solver"] == "ml-heuristic"], + ax=ax3, + palette=palette, + size=2.0, + ) + sns.scatterplot( + x=obj_column, + y=predicted_obj_column, + hue="Solver", + data=self.results[self.results["Solver"] == "ml-exact"], + ax=ax4, + palette=palette, + size=2.0, + ) + + # Predicted vs actual primal bound + xlim, ylim = ax4.get_xlim(), ax4.get_ylim() + ax4.plot( + [-1e10, 1e10], + [-1e10, 1e10], + ls="-", + color="#cccccc", + ) + ax4.set_xlim(xlim) + ax4.set_ylim(xlim) + ax4.get_legend().remove() + ax4.set( + ylabel="Predicted optimal value", + xlabel="Actual optimal value", + ) + + fig.tight_layout() + plt.savefig(output) + @ignore_warnings(category=ConvergenceWarning) def run_benchmarks( @@ -173,111 +262,3 @@ def run_benchmarks( plot(benchmark.results) -def plot( - results: pd.DataFrame, - output: Optional[str] = None, -) -> None: - import matplotlib.pyplot as plt - import pandas as pd - import seaborn as sns - - sns.set_style("whitegrid") - sns.set_palette("Blues_r") - groups = results.groupby("Instance") - best_lower_bound = groups["mip_lower_bound"].transform("max") - best_upper_bound = groups["mip_upper_bound"].transform("min") - results["Relative lower bound"] = results["mip_lower_bound"] / best_lower_bound - results["Relative upper bound"] = results["mip_upper_bound"] / best_upper_bound - - if (results["mip_sense"] == "min").any(): - primal_column = "Relative upper bound" - obj_column = "mip_upper_bound" - predicted_obj_column = "Objective: Predicted upper bound" - else: - primal_column = "Relative lower bound" - obj_column = "mip_lower_bound" - predicted_obj_column = "Objective: Predicted lower bound" - - palette = { - "baseline": "#9b59b6", - "ml-exact": "#3498db", - "ml-heuristic": "#95a5a6", - } - fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots( - nrows=2, - ncols=2, - figsize=(8, 8), - ) - - # Wallclock time - sns.stripplot( - x="Solver", - y="mip_wallclock_time", - data=results, - ax=ax1, - jitter=0.25, - palette=palette, - size=2.0, - ) - sns.barplot( - x="Solver", - y="mip_wallclock_time", - data=results, - ax=ax1, - errwidth=0.0, - alpha=0.4, - palette=palette, - ) - ax1.set(ylabel="Wallclock time (s)") - - # Gap - sns.stripplot( - x="Solver", - y="Gap", - jitter=0.25, - data=results[results["Solver"] != "ml-heuristic"], - ax=ax2, - palette=palette, - size=2.0, - ) - ax2.set(ylabel="Relative MIP gap") - - # Relative primal bound - sns.stripplot( - x="Solver", - y=primal_column, - jitter=0.25, - data=results[results["Solver"] == "ml-heuristic"], - ax=ax3, - palette=palette, - size=2.0, - ) - sns.scatterplot( - x=obj_column, - y=predicted_obj_column, - hue="Solver", - data=results[results["Solver"] == "ml-exact"], - ax=ax4, - palette=palette, - size=2.0, - ) - - # Predicted vs actual primal bound - xlim, ylim = ax4.get_xlim(), ax4.get_ylim() - ax4.plot( - [-1e10, 1e10], - [-1e10, 1e10], - ls="-", - color="#cccccc", - ) - ax4.set_xlim(xlim) - ax4.set_ylim(ylim) - ax4.get_legend().remove() - ax4.set( - ylabel="Predicted value", - xlabel="Actual value", - ) - - fig.tight_layout() - if output is not None: - plt.savefig(output) diff --git a/miplearn/classifiers/__init__.py b/miplearn/classifiers/__init__.py index 4cb55a5..6625c05 100644 --- a/miplearn/classifiers/__init__.py +++ b/miplearn/classifiers/__init__.py @@ -38,7 +38,7 @@ class Classifier(ABC): np.float16, np.float32, np.float64, - ], f"x_train.dtype shoule be float. Found {x_train.dtype} instead." + ], f"x_train.dtype should be float. Found {x_train.dtype} instead." assert y_train.dtype == np.bool8 assert len(x_train.shape) == 2 assert len(y_train.shape) == 2 diff --git a/miplearn/classifiers/adaptive.py b/miplearn/classifiers/adaptive.py index 1135034..c5437f0 100644 --- a/miplearn/classifiers/adaptive.py +++ b/miplearn/classifiers/adaptive.py @@ -6,8 +6,10 @@ import logging from typing import Dict, Optional import numpy as np +from sklearn.ensemble import RandomForestClassifier from sklearn.linear_model import LogisticRegression from sklearn.metrics import roc_auc_score +from sklearn.model_selection import cross_val_predict from sklearn.neighbors import KNeighborsClassifier from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler @@ -63,6 +65,15 @@ class AdaptiveClassifier(Classifier): super().__init__() if candidates is None: candidates = { + "forest(5,10)": CandidateClassifierSpecs( + classifier=ScikitLearnClassifier( + RandomForestClassifier( + n_estimators=5, + min_samples_split=10, + ), + ), + min_samples=100, + ), "knn(100)": CandidateClassifierSpecs( classifier=ScikitLearnClassifier( KNeighborsClassifier(n_neighbors=100) @@ -92,7 +103,7 @@ class AdaptiveClassifier(Classifier): # If almost all samples belong to the same class, return a fixed prediction and # skip all the other steps. - if y_train[:, 0].mean() > 0.999 or y_train[:, 1].mean() > 0.999: + if y_train[:, 0].mean() > 0.99 or y_train[:, 1].mean() > 0.99: self.classifier = CountingClassifier() self.classifier.fit(x_train, y_train) return @@ -102,13 +113,17 @@ class AdaptiveClassifier(Classifier): if n_samples < specs.min_samples: continue clf = specs.classifier.clone() - clf.fit(x_train, y_train) - proba = clf.predict_proba(x_train) - # FIXME: Switch to k-fold cross validation - score = roc_auc_score(y_train[:, 1], proba[:, 1]) + if isinstance(clf, ScikitLearnClassifier): + proba = cross_val_predict(clf.inner_clf, x_train, y_train[:, 1]) + else: + clf.fit(x_train, y_train) + proba = clf.predict_proba(x_train)[:, 1] + score = roc_auc_score(y_train[:, 1], proba) if score > best_score: best_name, best_clf, best_score = name, clf, score logger.debug("Best classifier: %s (score=%.3f)" % (best_name, best_score)) + if isinstance(best_clf, ScikitLearnClassifier): + best_clf.fit(x_train, y_train) self.classifier = best_clf def predict_proba(self, x_test: np.ndarray) -> np.ndarray: diff --git a/miplearn/classifiers/threshold.py b/miplearn/classifiers/threshold.py index 2abae59..0853947 100644 --- a/miplearn/classifiers/threshold.py +++ b/miplearn/classifiers/threshold.py @@ -7,7 +7,10 @@ from typing import Optional, List import numpy as np from sklearn.metrics._ranking import _binary_clf_curve +from sklearn.model_selection import cross_val_predict +from miplearn.classifiers.sklearn import ScikitLearnClassifier +from miplearn.classifiers.adaptive import AdaptiveClassifier from miplearn.classifiers import Classifier @@ -95,7 +98,17 @@ class MinPrecisionThreshold(Threshold): ) -> None: super().fit(clf, x_train, y_train) (n_samples, n_classes) = y_train.shape - proba = clf.predict_proba(x_train) + if isinstance(clf, AdaptiveClassifier) and isinstance( + clf.classifier, ScikitLearnClassifier + ): + proba = cross_val_predict( + clf.classifier.inner_clf, + x_train, + y_train[:, 1], + method="predict_proba", + ) + else: + proba = clf.predict_proba(x_train) self._computed_threshold = [ self._compute( y_train[:, i], @@ -114,11 +127,13 @@ class MinPrecisionThreshold(Threshold): y_actual: np.ndarray, y_prob: np.ndarray, min_precision: float, + min_recall: float = 0.1, ) -> float: fps, tps, thresholds = _binary_clf_curve(y_actual, y_prob) precision = tps / (tps + fps) + recall = tps / tps[-1] for k in reversed(range(len(precision))): - if precision[k] >= min_precision: + if precision[k] >= min_precision and recall[k] >= min_recall: return thresholds[k] return float("inf") diff --git a/miplearn/components/dynamic_lazy.py b/miplearn/components/dynamic_lazy.py index e13360c..019d922 100644 --- a/miplearn/components/dynamic_lazy.py +++ b/miplearn/components/dynamic_lazy.py @@ -1,20 +1,23 @@ # MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization # Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved. # Released under the modified BSD license. See COPYING.md for more details. +import json import logging from typing import Dict, List, TYPE_CHECKING, Tuple, Any, Optional import numpy as np from overrides import overrides +from tqdm.auto import tqdm from miplearn.classifiers import Classifier from miplearn.classifiers.counting import CountingClassifier from miplearn.classifiers.threshold import MinProbabilityThreshold, Threshold from miplearn.components.component import Component from miplearn.components.dynamic_common import DynamicConstraintsComponent -from miplearn.features.sample import Sample +from miplearn.features.sample import Sample, Hdf5Sample from miplearn.instance.base import Instance from miplearn.types import LearningSolveStats, ConstraintName, ConstraintCategory +from p_tqdm import p_map logger = logging.getLogger(__name__) @@ -41,6 +44,7 @@ class DynamicLazyConstraintsComponent(Component): self.thresholds = self.dynamic.thresholds self.known_violations = self.dynamic.known_violations self.lazy_enforced: Dict[ConstraintName, Any] = {} + self.n_iterations: int = 0 @staticmethod def enforce( @@ -68,6 +72,7 @@ class DynamicLazyConstraintsComponent(Component): violations = {c: self.dynamic.known_violations[c] for c in vnames} logger.info("Enforcing %d lazy constraints..." % len(vnames)) self.enforce(violations, instance, model, solver) + self.n_iterations = 0 @overrides def after_solve_mip( @@ -79,6 +84,8 @@ class DynamicLazyConstraintsComponent(Component): sample: Sample, ) -> None: sample.put_scalar("mip_constr_lazy", self.dynamic.encode(self.lazy_enforced)) + stats["LazyDynamic: Added in callback"] = len(self.lazy_enforced) + stats["LazyDynamic: Iterations"] = self.n_iterations @overrides def iteration_cb( @@ -90,12 +97,14 @@ class DynamicLazyConstraintsComponent(Component): assert solver.internal_solver is not None logger.debug("Finding violated lazy constraints...") violations = instance.find_violated_lazy_constraints( - solver.internal_solver, model + solver.internal_solver, + model, ) if len(violations) == 0: logger.debug("No violations found") return False else: + self.n_iterations += 1 for v in violations: self.lazy_enforced[v] = violations[v] logger.debug(" %d violations found" % len(violations)) @@ -142,3 +151,73 @@ class DynamicLazyConstraintsComponent(Component): sample: Sample, ) -> Dict[ConstraintCategory, Dict[str, float]]: return self.dynamic.sample_evaluate(instance, sample) + + # ------------------------------------------------------------------------------------------------------------------ + # NEW API + # ------------------------------------------------------------------------------------------------------------------ + @staticmethod + def extract(filenames, progress=True, known_cids=None): + enforced_cids, features = [], [] + freeze_known_cids = True + if known_cids is None: + known_cids = set() + freeze_known_cids = False + for filename in tqdm( + filenames, + desc="extract (1/2)", + disable=not progress, + ): + with Hdf5Sample(filename, mode="r") as sample: + features.append(sample.get_array("lp_var_values")) + cids = frozenset( + DynamicConstraintsComponent.decode( + sample.get_scalar("mip_constr_lazy") + ).keys() + ) + enforced_cids.append(cids) + if not freeze_known_cids: + known_cids.update(cids) + + x, y, cat, cdata = [], [], [], {} + for (j, cid) in enumerate(known_cids): + cdata[cid] = json.loads(cid.decode()) + for i in range(len(features)): + cat.append(cid) + x.append(features[i]) + if cid in enforced_cids[i]: + y.append([0, 1]) + else: + y.append([1, 0]) + x = np.vstack(x) + y = np.vstack(y) + cat = np.array(cat) + x_dict, y_dict = DynamicLazyConstraintsComponent._split( + x, + y, + cat, + progress=progress, + ) + return x_dict, y_dict, cdata + + @staticmethod + def _split(x, y, cat, progress=False): + # Sort data by categories + pi = np.argsort(cat, kind="stable") + x = x[pi] + y = y[pi] + cat = cat[pi] + + # Split categories + x_dict = {} + y_dict = {} + start = 0 + for end in tqdm( + range(len(cat) + 1), + desc="extract (2/2)", + disable=not progress, + ): + if (end >= len(cat)) or (cat[start] != cat[end]): + x_dict[cat[start]] = x[start:end, :] + y_dict[cat[start]] = y[start:end, :] + start = end + return x_dict, y_dict \ No newline at end of file diff --git a/miplearn/components/primal.py b/miplearn/components/primal.py index 283501e..3e88502 100644 --- a/miplearn/components/primal.py +++ b/miplearn/components/primal.py @@ -20,6 +20,9 @@ from miplearn.types import ( Category, Solution, ) +from miplearn.features.sample import Hdf5Sample +from p_tqdm import p_map +from tqdm.auto import tqdm logger = logging.getLogger(__name__) @@ -41,7 +44,7 @@ class PrimalSolutionComponent(Component): self, classifier: Classifier = AdaptiveClassifier(), mode: str = "exact", - threshold: Threshold = MinPrecisionThreshold([0.98, 0.98]), + threshold: Threshold = MinPrecisionThreshold([0.99, 0.99]), ) -> None: assert isinstance(classifier, Classifier) assert isinstance(threshold, Threshold) @@ -148,6 +151,7 @@ class PrimalSolutionComponent(Component): y: Dict = {} instance_features = sample.get_array("static_instance_features") mip_var_values = sample.get_array("mip_var_values") + lp_var_values = sample.get_array("lp_var_values") var_features = sample.get_array("lp_var_features") var_names = sample.get_array("static_var_names") var_types = sample.get_array("static_var_types") @@ -176,6 +180,8 @@ class PrimalSolutionComponent(Component): # Features features = list(instance_features) features.extend(var_features[i]) + if lp_var_values is not None: + features.extend(lp_var_values) x[category].append(features) # Labels @@ -236,11 +242,100 @@ class PrimalSolutionComponent(Component): self, x: Dict[Category, np.ndarray], y: Dict[Category, np.ndarray], + progress: bool = False, ) -> None: - for category in x.keys(): + for category in tqdm(x.keys(), desc="fit", disable=not progress): + clf = self.classifier_prototype.clone() + thr = self.threshold_prototype.clone() + clf.fit(x[category], y[category]) + thr.fit(clf, x[category], y[category]) + self.classifiers[category] = clf + self.thresholds[category] = thr + + # ------------------------------------------------------------------------------------------------------------------ + # NEW API + # ------------------------------------------------------------------------------------------------------------------ + + def fit( + self, + x: Dict[Category, np.ndarray], + y: Dict[Category, np.ndarray], + progress: bool = False, + ) -> None: + for category in tqdm(x.keys(), desc="fit", disable=not progress): clf = self.classifier_prototype.clone() thr = self.threshold_prototype.clone() clf.fit(x[category], y[category]) thr.fit(clf, x[category], y[category]) self.classifiers[category] = clf self.thresholds[category] = thr + + def predict(self, x): + y_pred = {} + for category in x.keys(): + assert category in self.classifiers, ( + f"Classifier for category {category} has not been trained. " + f"Please call component.fit before component.predict." + ) + xc = np.array(x[category]) + proba = self.classifiers[category].predict_proba(xc) + thr = self.thresholds[category].predict(xc) + y_pred[category] = np.vstack( + [ + proba[:, 0] >= thr[0], + proba[:, 1] >= thr[1], + ] + ).T + return y_pred + + @staticmethod + def extract( + filenames: List[str], + progress: bool = False, + ): + x, y, cat = [], [], [] + + # Read data + for filename in tqdm( + filenames, + desc="extract (1/2)", + disable=not progress, + ): + with Hdf5Sample(filename, mode="r") as sample: + mip_var_values = sample.get_array("mip_var_values") + var_features = sample.get_array("lp_var_features") + var_types = sample.get_array("static_var_types") + var_categories = sample.get_array("static_var_categories") + assert var_features is not None + assert var_types is not None + assert var_categories is not None + x.append(var_features) + y.append([mip_var_values < 0.5, mip_var_values > 0.5]) + cat.extend(var_categories) + + # Convert to numpy arrays + x = np.vstack(x) + y = np.hstack(y).T + cat = np.array(cat) + + # Sort data by categories + pi = np.argsort(cat, kind="stable") + x = x[pi] + y = y[pi] + cat = cat[pi] + + # Split categories + x_dict = {} + y_dict = {} + start = 0 + for end in tqdm( + range(len(cat) + 1), + desc="extract (2/2)", + disable=not progress, + ): + if (end >= len(cat)) or (cat[start] != cat[end]): + x_dict[cat[start]] = x[start:end, :] + y_dict[cat[start]] = y[start:end, :] + start = end + + return x_dict, y_dict diff --git a/miplearn/features/sample.py b/miplearn/features/sample.py index a0d6e43..cd98b0f 100644 --- a/miplearn/features/sample.py +++ b/miplearn/features/sample.py @@ -224,3 +224,9 @@ class Hdf5Sample(Sample): value, (bytes, bytearray) ), f"bytes expected; found: {value.__class__}" # type: ignore self.put_array(key, np.frombuffer(value, dtype="uint8")) + + def __enter__(self): + return self + + def __exit__(self, type, value, traceback): + self.file.close() diff --git a/miplearn/instance/picklegz.py b/miplearn/instance/picklegz.py index fc502ef..74661a9 100644 --- a/miplearn/instance/picklegz.py +++ b/miplearn/instance/picklegz.py @@ -14,6 +14,8 @@ from overrides import overrides from miplearn.features.sample import Sample from miplearn.instance.base import Instance from miplearn.types import ConstraintName +from tqdm.auto import tqdm +from p_tqdm import p_umap if TYPE_CHECKING: from miplearn.solvers.learning import InternalSolver @@ -155,13 +157,20 @@ def write_pickle_gz_multiple(objs: List[Any], dirname: str) -> None: write_pickle_gz(obj, f"{dirname}/{i:05d}.pkl.gz") -def save(objs: List[Any], dirname: str) -> List[str]: +def save( + objs: List[Any], + dirname: str, + progress: bool = False, + n_jobs: int = 1, +) -> List[str]: """ Saves the provided objects to gzipped pickled files. Files are named sequentially as `dirname/00000.pkl.gz`, `dirname/00001.pkl.gz`, etc. Parameters ---------- + progress: bool + If True, show progress bar objs: List[any] List of files to save dirname: str @@ -171,11 +180,12 @@ def save(objs: List[Any], dirname: str) -> List[str]: ------- List containing the relative paths of the saved files. """ - filenames = [] - for (i, obj) in enumerate(objs): - filename = f"{dirname}/{i:05d}.pkl.gz" - filenames.append(filename) + + def _process(obj, filename): write_pickle_gz(obj, filename) + + filenames = [f"{dirname}/{i:05d}.pkl.gz" for i in range(len(objs))] + p_umap(_process, objs, filenames, num_cpus=n_jobs) return filenames diff --git a/miplearn/problems/knapsack.py b/miplearn/problems/knapsack.py index ec36908..c9a825d 100644 --- a/miplearn/problems/knapsack.py +++ b/miplearn/problems/knapsack.py @@ -57,8 +57,8 @@ class MultiKnapsackInstance(Instance): model = pe.ConcreteModel() model.x = pe.Var(range(self.n), domain=pe.Binary) model.OBJ = pe.Objective( - expr=sum(model.x[j] * self.prices[j] for j in range(self.n)), - sense=pe.maximize, + expr=sum(-model.x[j] * self.prices[j] for j in range(self.n)), + sense=pe.minimize, ) model.eq_capacity = pe.ConstraintList() for i in range(self.m): @@ -82,6 +82,7 @@ class MultiKnapsackGenerator: alpha: rv_frozen = uniform(loc=0.25, scale=0.0), fix_w: bool = False, w_jitter: rv_frozen = uniform(loc=1.0, scale=0.0), + p_jitter: rv_frozen = uniform(loc=1.0, scale=0.0), round: bool = True, ): """Initialize the problem generator. @@ -165,6 +166,7 @@ class MultiKnapsackGenerator: self.K = K self.alpha = alpha self.w_jitter = w_jitter + self.p_jitter = p_jitter self.round = round self.fix_n: Optional[int] = None self.fix_m: Optional[int] = None @@ -179,8 +181,8 @@ class MultiKnapsackGenerator: self.fix_u = self.u.rvs(self.fix_n) self.fix_K = self.K.rvs() - def generate(self, n_samples: int) -> List[MultiKnapsackInstance]: - def _sample() -> MultiKnapsackInstance: + def generate(self, n_samples: int) -> List[MultiKnapsackData]: + def _sample() -> MultiKnapsackData: if self.fix_w is not None: assert self.fix_m is not None assert self.fix_n is not None @@ -199,12 +201,30 @@ class MultiKnapsackGenerator: K = self.K.rvs() w = w * np.array([self.w_jitter.rvs(n) for _ in range(m)]) alpha = self.alpha.rvs(m) - p = np.array([w[:, j].sum() / m + K * u[j] for j in range(n)]) + p = np.array( + [w[:, j].sum() / m + K * u[j] for j in range(n)] + ) * self.p_jitter.rvs(n) b = np.array([w[i, :].sum() * alpha[i] for i in range(m)]) if self.round: p = p.round() b = b.round() w = w.round() - return MultiKnapsackInstance(p, b, w) + return MultiKnapsackData(p, b, w) return [_sample() for _ in range(n_samples)] + + +def build_multiknapsack_model(data: MultiKnapsackData) -> pe.ConcreteModel: + model = pe.ConcreteModel() + m, n = data.weights.shape + model.x = pe.Var(range(n), domain=pe.Binary) + model.OBJ = pe.Objective( + expr=sum(-model.x[j] * data.prices[j] for j in range(n)), + sense=pe.minimize, + ) + model.eq_capacity = pe.ConstraintList() + for i in range(m): + model.eq_capacity.add( + sum(model.x[j] * data.weights[i, j] for j in range(n)) <= data.capacities[i] + ) + return model diff --git a/miplearn/problems/stab.py b/miplearn/problems/stab.py index 67cdc7d..07c15b8 100644 --- a/miplearn/problems/stab.py +++ b/miplearn/problems/stab.py @@ -114,8 +114,8 @@ def build_stab_model(data: MaxWeightStableSetData) -> pe.ConcreteModel: nodes = list(data.graph.nodes) model.x = pe.Var(nodes, domain=pe.Binary) model.OBJ = pe.Objective( - expr=sum(model.x[v] * data.weights[v] for v in nodes), - sense=pe.maximize, + expr=sum(-model.x[v] * data.weights[v] for v in nodes), + sense=pe.minimize, ) model.clique_eqs = pe.ConstraintList() for clique in nx.find_cliques(data.graph): diff --git a/miplearn/solvers/learning.py b/miplearn/solvers/learning.py index 90a7090..e06a356 100644 --- a/miplearn/solvers/learning.py +++ b/miplearn/solvers/learning.py @@ -22,34 +22,57 @@ from miplearn.instance.base import Instance from miplearn.solvers import _RedirectOutput from miplearn.solvers.internal import InternalSolver from miplearn.solvers.pyomo.gurobi import GurobiPyomoSolver -from miplearn.types import LearningSolveStats +from miplearn.types import LearningSolveStats, ConstraintName import gzip import pickle import miplearn +import json from os.path import exists +from os import remove +import pyomo.environ as pe + logger = logging.getLogger(__name__) +class PyomoFindLazyCutCallbackHandler: + def __init__(self): + pass + + def value(self, var): + return var.value + + +class PyomoEnforceLazyCutsCallbackHandler: + def __init__(self, opt, model): + self.model = model + self.opt = opt + if not hasattr(model, "miplearn_lazy_cb"): + model.miplearn_lazy_cb = pe.ConstraintList() + + def enforce(self, expr): + constr = self.model.miplearn_lazy_cb.add(expr=expr) + self.opt.add_constraint(constr) + + class FileInstanceWrapper(Instance): def __init__( - self, - data_filename: Any, - build_model: Callable, + self, data_filename: Any, build_model: Callable, mode: Optional[str] = None ): super().__init__() assert data_filename.endswith(".pkl.gz") self.filename = data_filename self.sample_filename = data_filename.replace(".pkl.gz", ".h5") - self.sample = Hdf5Sample( - self.sample_filename, - mode="r+" if exists(self.sample_filename) else "w", - ) self.build_model = build_model + self.mode = mode + self.sample = None + self.model = None @overrides def to_model(self) -> Any: - return miplearn.load(self.filename, self.build_model) + if self.model is None: + self.model = miplearn.load(self.filename, self.build_model) + return self.model @overrides def create_sample(self) -> Sample: @@ -59,6 +82,44 @@ class FileInstanceWrapper(Instance): def get_samples(self) -> List[Sample]: return [self.sample] + @overrides + def free(self) -> None: + self.sample.file.close() + + @overrides + def load(self) -> None: + if self.mode is None: + self.mode = "r+" if exists(self.sample_filename) else "w" + self.sample = Hdf5Sample(self.sample_filename, mode=self.mode) + + @overrides + def has_dynamic_lazy_constraints(self) -> bool: + assert hasattr(self, "model") + return hasattr(self.model, "_miplearn_find_lazy_cuts") + + @overrides + def find_violated_lazy_constraints( + self, + solver: "InternalSolver", + model: Any, + ) -> Dict[ConstraintName, Any]: + if not hasattr(self.model, "_miplearn_find_lazy_cuts"): + return {} + cb = PyomoFindLazyCutCallbackHandler() + violations = model._miplearn_find_lazy_cuts(cb) + return {json.dumps(v).encode(): v for v in violations} + + @overrides + def enforce_lazy_constraint( + self, + solver: "InternalSolver", + model: Any, + violation: Any, + ) -> None: + assert isinstance(solver, GurobiPyomoSolver) + cb = PyomoEnforceLazyCutsCallbackHandler(solver._pyomo_solver, model) + model._miplearn_enforce_lazy_cuts(cb, violation) + class MemoryInstanceWrapper(Instance): def __init__(self, model: Any) -> None: @@ -70,12 +131,39 @@ class MemoryInstanceWrapper(Instance): def to_model(self) -> Any: return self.model + @overrides + def has_dynamic_lazy_constraints(self) -> bool: + assert hasattr(self, "model") + return hasattr(self.model, "_miplearn_find_lazy_cuts") + + @overrides + def find_violated_lazy_constraints( + self, + solver: "InternalSolver", + model: Any, + ) -> Dict[ConstraintName, Any]: + cb = PyomoFindLazyCutCallbackHandler() + violations = model._miplearn_find_lazy_cuts(cb) + return {json.dumps(v).encode(): v for v in violations} + + @overrides + def enforce_lazy_constraint( + self, + solver: "InternalSolver", + model: Any, + violation: Any, + ) -> None: + assert isinstance(solver, GurobiPyomoSolver) + cb = PyomoEnforceLazyCutsCallbackHandler(solver._pyomo_solver, model) + model._miplearn_enforce_lazy_cuts(cb, violation) + class _GlobalVariables: def __init__(self) -> None: self.solver: Optional[LearningSolver] = None - self.instances: Optional[List[Instance]] = None - self.discard_outputs: bool = False + self.build_model: Optional[Callable] = None + self.filenames: Optional[List[str]] = None + self.skip = False # Global variables used for multiprocessing. Global variables are copied by the @@ -86,23 +174,19 @@ _GLOBAL = [_GlobalVariables()] def _parallel_solve( idx: int, -) -> Tuple[Optional[int], Optional[LearningSolveStats], Optional[Instance]]: +) -> Tuple[Optional[int], Optional[LearningSolveStats]]: solver = _GLOBAL[0].solver - instances = _GLOBAL[0].instances - discard_outputs = _GLOBAL[0].discard_outputs + filenames = _GLOBAL[0].filenames + build_model = _GLOBAL[0].build_model + skip = _GLOBAL[0].skip assert solver is not None - assert instances is not None try: - stats = solver._solve( - instances[idx], - discard_output=discard_outputs, - ) - instances[idx].free() - return idx, stats, instances[idx] + stats = solver.solve([filenames[idx]], build_model, skip=skip) + return idx, stats[0] except Exception as e: traceback.print_exc() - logger.exception(f"Exception while solving {instances[idx]}. Ignoring.") - return None, None, None + logger.exception(f"Exception while solving {filenames[idx]}. Ignoring.") + return idx, None class LearningSolver: @@ -380,87 +464,86 @@ class LearningSolver: build_model: Optional[Callable] = None, tee: bool = False, progress: bool = False, + skip: bool = False, ) -> Union[LearningSolveStats, List[LearningSolveStats]]: if isinstance(arg, list): assert build_model is not None stats = [] for i in tqdm(arg, disable=not progress): - s = self._solve(FileInstanceWrapper(i, build_model), tee=tee) - stats.append(s) + instance = FileInstanceWrapper(i, build_model) + solved = False + if exists(instance.sample_filename): + try: + with Hdf5Sample(instance.sample_filename, mode="r") as sample: + if sample.get_scalar("mip_lower_bound"): + solved = True + except OSError: + # File exists but it is unreadable/corrupted. Delete it. + remove(instance.sample_filename) + if solved and skip: + stats.append({}) + else: + s = self._solve(instance, tee=tee) + + # Export to gzipped MPS file + mps_filename = instance.sample_filename.replace(".h5", ".mps") + instance.model.write( + filename=mps_filename, + io_options={ + "labeler": pe.NameLabeler(), + "skip_objective_sense": True, + }, + ) + with open(mps_filename, "rb") as original: + with gzip.open(f"{mps_filename}.gz", "wb") as compressed: + compressed.writelines(original) + remove(mps_filename) + + stats.append(s) return stats else: return self._solve(MemoryInstanceWrapper(arg), tee=tee) - def fit(self, filenames: List[str], build_model: Callable) -> None: + def fit( + self, + filenames: List[str], + build_model: Callable, + progress: bool = False, + n_jobs: int = 1, + ) -> None: instances: List[Instance] = [ - FileInstanceWrapper(f, build_model) for f in filenames + FileInstanceWrapper(f, build_model, mode="r") for f in filenames ] - self._fit(instances) + self._fit(instances, progress=progress, n_jobs=n_jobs) def parallel_solve( self, - instances: List[Instance], + filenames: List[str], + build_model: Optional[Callable] = None, n_jobs: int = 4, - label: str = "solve", - discard_outputs: bool = False, progress: bool = False, + label: str = "solve", + skip: bool = False, ) -> List[LearningSolveStats]: - """ - Solves multiple instances in parallel. - - This method is equivalent to calling `solve` for each item on the list, - but it processes multiple instances at the same time. Like `solve`, this - method modifies each instance in place. Also like `solve`, a list of - filenames may be provided. - - Parameters - ---------- - discard_outputs: bool - If True, do not write the modified instances anywhere; simply discard - them instead. Useful during benchmarking. - label: str - Label to show in the progress bar. - instances: List[Instance] - The instances to be solved. - n_jobs: int - Number of instances to solve in parallel at a time. - - Returns - ------- - List[LearningSolveStats] - List of solver statistics, with one entry for each provided instance. - The list is the same you would obtain by calling - `[solver.solve(p) for p in instances]` - """ - if n_jobs == 1: - return [ - self._solve(p) - for p in tqdm( - instances, - disable=not progress, - desc=label, - ) - ] - else: - self.internal_solver = None - self._silence_miplearn_logger() - _GLOBAL[0].solver = self - _GLOBAL[0].instances = instances - _GLOBAL[0].discard_outputs = discard_outputs - results = p_umap( - _parallel_solve, - list(range(len(instances))), - num_cpus=n_jobs, - desc=label, - disable=not progress, - ) - results = [r for r in results if r[1]] - stats: List[LearningSolveStats] = [{} for _ in range(len(results))] - for (idx, s, instance) in results: + self.internal_solver = None + self._silence_miplearn_logger() + _GLOBAL[0].solver = self + _GLOBAL[0].build_model = build_model + _GLOBAL[0].filenames = filenames + _GLOBAL[0].skip = skip + results = p_umap( + _parallel_solve, + list(range(len(filenames))), + num_cpus=n_jobs, + disable=not progress, + desc=label, + ) + stats: List[LearningSolveStats] = [{} for _ in range(len(filenames))] + for (idx, s) in results: + if s: stats[idx] = s - instances[idx] = instance - self._restore_miplearn_logger() - return stats + self._restore_miplearn_logger() + return stats def _fit( self, diff --git a/miplearn/solvers/pyomo/base.py b/miplearn/solvers/pyomo/base.py index 6ad7d89..8c34533 100644 --- a/miplearn/solvers/pyomo/base.py +++ b/miplearn/solvers/pyomo/base.py @@ -60,6 +60,7 @@ class BasePyomoSolver(InternalSolver): self._obj_sense: str = "min" self._varname_to_var: Dict[bytes, pe.Var] = {} self._varname_to_idx: Dict[str, int] = {} + self._name_buffer = {} self._cname_to_constr: Dict[str, pe.Constraint] = {} self._termination_condition: str = "" self._has_lp_solution = False @@ -203,12 +204,12 @@ class BasePyomoSolver(InternalSolver): if isinstance(term, MonomialTermExpression): lhs_row.append(row) lhs_col.append( - self._varname_to_idx[term._args_[1].name] + self._varname_to_idx[self.name(term._args_[1])] ) lhs_data.append(float(term._args_[0])) elif isinstance(term, _GeneralVarData): lhs_row.append(row) - lhs_col.append(self._varname_to_idx[term.name]) + lhs_col.append(self._varname_to_idx[self.name(term)]) lhs_data.append(1.0) else: raise Exception( @@ -216,7 +217,7 @@ class BasePyomoSolver(InternalSolver): ) elif isinstance(expr, _GeneralVarData): lhs_row.append(row) - lhs_col.append(self._varname_to_idx[expr.name]) + lhs_col.append(self._varname_to_idx[self.name(expr)]) lhs_data.append(1.0) else: raise Exception( @@ -235,11 +236,11 @@ class BasePyomoSolver(InternalSolver): for (i, constr) in enumerate(model.component_objects(pyomo.core.Constraint)): if isinstance(constr, pe.ConstraintList): for idx in constr: - names.append(constr[idx].name) + names.append(self.name(constr[idx])) _parse_constraint(constr[idx], curr_row) curr_row += 1 else: - names.append(constr.name) + names.append(self.name(constr)) _parse_constraint(constr, curr_row) curr_row += 1 @@ -276,7 +277,7 @@ class BasePyomoSolver(InternalSolver): for index in var: if var[index].fixed: continue - solution[var[index].name.encode()] = var[index].value + solution[self.name(var[index]).encode()] = var[index].value return solution @overrides @@ -301,9 +302,9 @@ class BasePyomoSolver(InternalSolver): # Variable name if idx is None: - names.append(var.name) + names.append(self.name(var)) else: - names.append(var[idx].name) + names.append(self.name(var[idx])) if with_static: # Variable type @@ -332,8 +333,9 @@ class BasePyomoSolver(InternalSolver): lower_bounds.append(-float("inf")) # Objective coefficient - if v.name in self._obj: - obj_coeffs.append(self._obj[v.name]) + name = self.name(v) + if name in self._obj: + obj_coeffs.append(self._obj[name]) else: obj_coeffs.append(0.0) @@ -544,13 +546,13 @@ class BasePyomoSolver(InternalSolver): if isinstance(expr, SumExpression): for term in expr._args_: if isinstance(term, MonomialTermExpression): - lhs[term._args_[1].name] = float(term._args_[0]) + lhs[self.name(term._args_[1])] = float(term._args_[0]) elif isinstance(term, _GeneralVarData): - lhs[term.name] = 1.0 + lhs[self.name(term)] = 1.0 else: raise Exception(f"Unknown term type: {term.__class__.__name__}") elif isinstance(expr, _GeneralVarData): - lhs[expr.name] = 1.0 + lhs[self.name(expr)] = 1.0 else: raise Exception(f"Unknown expression type: {expr.__class__.__name__}") return lhs @@ -581,9 +583,9 @@ class BasePyomoSolver(InternalSolver): self._varname_to_idx = {} for var in self.model.component_objects(Var): for idx in var: - varname = var.name + varname = self.name(var) if idx is not None: - varname = var[idx].name + varname = self.name(var[idx]) self._varname_to_var[varname.encode()] = var[idx] self._varname_to_idx[varname] = len(self._all_vars) self._all_vars += [var[idx]] @@ -599,9 +601,12 @@ class BasePyomoSolver(InternalSolver): for constr in self.model.component_objects(pyomo.core.Constraint): if isinstance(constr, pe.ConstraintList): for idx in constr: - self._cname_to_constr[constr[idx].name] = constr[idx] + self._cname_to_constr[self.name(constr[idx])] = constr[idx] else: - self._cname_to_constr[constr.name] = constr + self._cname_to_constr[self.name(constr)] = constr + + def name(self, comp): + return comp.getname(name_buffer=self._name_buffer) class PyomoTestInstanceInfeasible(Instance): diff --git a/miplearn/solvers/pyomo/gurobi.py b/miplearn/solvers/pyomo/gurobi.py index f350405..9487845 100644 --- a/miplearn/solvers/pyomo/gurobi.py +++ b/miplearn/solvers/pyomo/gurobi.py @@ -52,3 +52,10 @@ class GurobiPyomoSolver(BasePyomoSolver): @overrides def _get_node_count_regexp(self) -> Optional[str]: return None + + def set_priorities(self, priorities): + for (var_name, priority) in priorities.items(): + pvar = self._varname_to_var[var_name] + gvar = self._pyomo_solver._pyomo_var_to_solver_var_map[pvar] + gvar.branchPriority = priority + return None diff --git a/setup.py b/setup.py index f2af614..ca79628 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,7 @@ setup( python_requires=">=3.7", install_requires=[ "decorator>=4,<5", - "h5py>=3,<4", + "h5py==3.5.0", "matplotlib>=3,<4", "mypy==0.790", "networkx>=2,<3",