From 522f3a7e180631d2cbf5ca2130b78991d93675b0 Mon Sep 17 00:00:00 2001 From: "Alinson S. Xavier" Date: Tue, 22 Feb 2022 15:21:56 -0600 Subject: [PATCH] Change LearningSolver.solve and fit --- miplearn/problems/stab.py | 14 ++ miplearn/solvers/learning.py | 156 ++++++++++++--------- tests/components/test_dynamic_user_cuts.py | 6 +- tests/components/test_objective.py | 6 +- tests/components/test_primal.py | 6 +- tests/instance/test_file.py | 2 +- tests/problems/test_knapsack.py | 2 +- tests/problems/test_stab.py | 2 +- tests/problems/test_tsp.py | 8 +- tests/solvers/test_learning_solver.py | 62 +++++--- 10 files changed, 157 insertions(+), 107 deletions(-) diff --git a/miplearn/problems/stab.py b/miplearn/problems/stab.py index 97e5559..caa74c2 100644 --- a/miplearn/problems/stab.py +++ b/miplearn/problems/stab.py @@ -131,3 +131,17 @@ class MaxWeightStableSetGenerator: def _generate_graph(self) -> Graph: return nx.generators.random_graphs.binomial_graph(self.n.rvs(), self.p.rvs()) + + +def build_stab_model(data: MaxWeightStableSetData) -> pe.ConcreteModel: + model = pe.ConcreteModel() + nodes = list(data.graph.nodes) + model.x = pe.Var(nodes, domain=pe.Binary) + model.OBJ = pe.Objective( + expr=sum(model.x[v] * data.weights[v] for v in nodes), + sense=pe.maximize, + ) + model.clique_eqs = pe.ConstraintList() + for clique in nx.find_cliques(data.graph): + model.clique_eqs.add(sum(model.x[v] for v in clique) <= 1) + return model diff --git a/miplearn/solvers/learning.py b/miplearn/solvers/learning.py index 753a228..1232a14 100644 --- a/miplearn/solvers/learning.py +++ b/miplearn/solvers/learning.py @@ -5,10 +5,12 @@ import logging import time import traceback -from typing import Optional, List, Any, cast, Dict, Tuple +from typing import Optional, List, Any, cast, Dict, Tuple, Callable, IO +from overrides import overrides from p_tqdm import p_map +from miplearn.features.sample import Hdf5Sample, Sample from miplearn.components.component import Component from miplearn.components.dynamic_lazy import DynamicLazyConstraintsComponent from miplearn.components.dynamic_user_cuts import UserCutsComponent @@ -16,15 +18,44 @@ from miplearn.components.objective import ObjectiveValueComponent from miplearn.components.primal import PrimalSolutionComponent from miplearn.features.extractor import FeaturesExtractor from miplearn.instance.base import Instance -from miplearn.instance.picklegz import PickleGzInstance from miplearn.solvers import _RedirectOutput from miplearn.solvers.internal import InternalSolver from miplearn.solvers.pyomo.gurobi import GurobiPyomoSolver from miplearn.types import LearningSolveStats +import gzip +import pickle +from os.path import exists logger = logging.getLogger(__name__) +class InstanceWrapper(Instance): + def __init__(self, data_filename: Any, build_model: Callable): + super().__init__() + assert data_filename.endswith(".pkl.gz") + self.filename = data_filename + self.sample_filename = data_filename.replace(".pkl.gz", ".h5") + self.sample = Hdf5Sample( + self.sample_filename, + mode="r+" if exists(self.sample_filename) else "w", + ) + self.build_model = build_model + + @overrides + def to_model(self) -> Any: + with gzip.GzipFile(self.filename, "rb") as file: + data = pickle.load(cast(IO[bytes], file)) + return self.build_model(data) + + @overrides + def create_sample(self) -> Sample: + return self.sample + + @overrides + def get_samples(self) -> List[Sample]: + return [self.sample] + + class _GlobalVariables: def __init__(self) -> None: self.solver: Optional[LearningSolver] = None @@ -47,7 +78,7 @@ def _parallel_solve( assert solver is not None assert instances is not None try: - stats = solver.solve( + stats = solver._solve( instances[idx], discard_output=discard_outputs, ) @@ -86,11 +117,6 @@ class LearningSolver: option should be activated if the LP relaxation is not very expensive to solve and if it provides good hints for the integer solution. - simulate_perfect: bool - If true, each call to solve actually performs three actions: solve - the original problem, train the ML models on the data that was just - collected, and solve the problem again. This is useful for evaluating - the theoretical performance of perfect ML models. """ def __init__( @@ -100,7 +126,6 @@ class LearningSolver: solver: Optional[InternalSolver] = None, use_lazy_cb: bool = False, solve_lp: bool = True, - simulate_perfect: bool = False, extractor: Optional[FeaturesExtractor] = None, extract_lhs: bool = True, extract_sa: bool = True, @@ -117,7 +142,6 @@ class LearningSolver: self.internal_solver: Optional[InternalSolver] = None self.internal_solver_prototype: InternalSolver = solver self.mode: str = mode - self.simulate_perfect: bool = simulate_perfect self.solve_lp: bool = solve_lp self.tee = False self.use_lazy_cb: bool = use_lazy_cb @@ -139,6 +163,44 @@ class LearningSolver: discard_output: bool = False, tee: bool = False, ) -> LearningSolveStats: + """ + Solves the given instance. If trained machine-learning models are + available, they will be used to accelerate the solution process. + + The argument `instance` may be either an Instance object or a + filename pointing to a pickled Instance object. + + This method adds a new training sample to `instance.training_sample`. + If a filename is provided, then the file is modified in-place. That is, + the original file is overwritten. + + If `solver.solve_lp_first` is False, the properties lp_solution and + lp_value will be set to dummy values. + + Parameters + ---------- + instance: Instance + The instance to be solved. + model: Any + The corresponding Pyomo model. If not provided, it will be created. + discard_output: bool + If True, do not write the modified instances anywhere; simply discard + them. Useful during benchmarking. + tee: bool + If true, prints solver log to screen. + + Returns + ------- + LearningSolveStats + A dictionary of solver statistics containing at least the following + keys: "Lower bound", "Upper bound", "Wallclock time", "Nodes", + "Sense", "Log", "Warm start value" and "LP value". + + Additional components may generate additional keys. For example, + ObjectiveValueComponent adds the keys "Predicted LB" and + "Predicted UB". See the documentation of each component for more + details. + """ # Generate model # ------------------------------------------------------- @@ -299,65 +361,19 @@ class LearningSolver: def solve( self, - instance: Instance, - model: Any = None, - discard_output: bool = False, - tee: bool = False, - ) -> LearningSolveStats: - """ - Solves the given instance. If trained machine-learning models are - available, they will be used to accelerate the solution process. - - The argument `instance` may be either an Instance object or a - filename pointing to a pickled Instance object. - - This method adds a new training sample to `instance.training_sample`. - If a filename is provided, then the file is modified in-place. That is, - the original file is overwritten. - - If `solver.solve_lp_first` is False, the properties lp_solution and - lp_value will be set to dummy values. - - Parameters - ---------- - instance: Instance - The instance to be solved. - model: Any - The corresponding Pyomo model. If not provided, it will be created. - discard_output: bool - If True, do not write the modified instances anywhere; simply discard - them. Useful during benchmarking. - tee: bool - If true, prints solver log to screen. - - Returns - ------- - LearningSolveStats - A dictionary of solver statistics containing at least the following - keys: "Lower bound", "Upper bound", "Wallclock time", "Nodes", - "Sense", "Log", "Warm start value" and "LP value". + filenames: List[str], + build_model: Callable, + tee: bool = True, + ) -> List[LearningSolveStats]: + stats = [] + for f in filenames: + s = self._solve(InstanceWrapper(f, build_model), tee=tee) + stats.append(s) + return stats - Additional components may generate additional keys. For example, - ObjectiveValueComponent adds the keys "Predicted LB" and - "Predicted UB". See the documentation of each component for more - details. - """ - if self.simulate_perfect: - if not isinstance(instance, PickleGzInstance): - raise Exception("Not implemented") - self._solve( - instance=instance, - model=model, - tee=tee, - ) - self.fit([instance]) - instance.instance = None - return self._solve( - instance=instance, - model=model, - discard_output=discard_output, - tee=tee, - ) + def fit(self, filenames: List[str], build_model: Callable) -> None: + instances: List[Instance] = [InstanceWrapper(f, build_model) for f in filenames] + self._fit(instances) def parallel_solve( self, @@ -394,7 +410,7 @@ class LearningSolver: `[solver.solve(p) for p in instances]` """ if n_jobs == 1: - return [self.solve(p) for p in instances] + return [self._solve(p) for p in instances] else: self.internal_solver = None self._silence_miplearn_logger() @@ -415,7 +431,7 @@ class LearningSolver: self._restore_miplearn_logger() return stats - def fit( + def _fit( self, training_instances: List[Instance], n_jobs: int = 1, diff --git a/tests/components/test_dynamic_user_cuts.py b/tests/components/test_dynamic_user_cuts.py index 10e688d..f8b3a5f 100644 --- a/tests/components/test_dynamic_user_cuts.py +++ b/tests/components/test_dynamic_user_cuts.py @@ -87,7 +87,7 @@ def test_usage( stab_instance: Instance, solver: LearningSolver, ) -> None: - stats_before = solver.solve(stab_instance) + stats_before = solver._solve(stab_instance) sample = stab_instance.get_samples()[0] user_cuts_encoded = sample.get_scalar("mip_user_cuts") assert user_cuts_encoded is not None @@ -97,8 +97,8 @@ def test_usage( assert stats_before["UserCuts: Added ahead-of-time"] == 0 assert stats_before["UserCuts: Added in callback"] > 0 - solver.fit([stab_instance]) - stats_after = solver.solve(stab_instance) + solver._fit([stab_instance]) + stats_after = solver._solve(stab_instance) assert ( stats_after["UserCuts: Added ahead-of-time"] == stats_before["UserCuts: Added in callback"] diff --git a/tests/components/test_objective.py b/tests/components/test_objective.py index fc45083..f81eb8d 100644 --- a/tests/components/test_objective.py +++ b/tests/components/test_objective.py @@ -134,8 +134,8 @@ def test_sample_evaluate(sample: Sample) -> None: def test_usage() -> None: solver = LearningSolver(components=[ObjectiveValueComponent()]) instance = GurobiPyomoSolver().build_test_instance_knapsack() - solver.solve(instance) - solver.fit([instance]) - stats = solver.solve(instance) + solver._solve(instance) + solver._fit([instance]) + stats = solver._solve(instance) assert stats["mip_lower_bound"] == stats["Objective: Predicted lower bound"] assert stats["mip_upper_bound"] == stats["Objective: Predicted upper bound"] diff --git a/tests/components/test_primal.py b/tests/components/test_primal.py index 83b1096..aa6074a 100644 --- a/tests/components/test_primal.py +++ b/tests/components/test_primal.py @@ -110,9 +110,9 @@ def test_usage() -> None: gen = TravelingSalesmanGenerator(n=randint(low=5, high=6)) data = gen.generate(1) instance = TravelingSalesmanInstance(data[0].n_cities, data[0].distances) - solver.solve(instance) - solver.fit([instance]) - stats = solver.solve(instance) + solver._solve(instance) + solver._fit([instance]) + stats = solver._solve(instance) assert stats["Primal: Free"] == 0 assert stats["Primal: One"] + stats["Primal: Zero"] == 10 assert stats["mip_lower_bound"] == stats["mip_warm_start_value"] diff --git a/tests/instance/test_file.py b/tests/instance/test_file.py index bad2fc5..446cb2e 100644 --- a/tests/instance/test_file.py +++ b/tests/instance/test_file.py @@ -22,7 +22,7 @@ def test_usage() -> None: # Solve instance from disk solver = LearningSolver(solver=GurobiSolver()) - solver.solve(FileInstance(filename)) + solver._solve(FileInstance(filename)) # Assert HDF5 contains training data sample = FileInstance(filename).get_samples()[0] diff --git a/tests/problems/test_knapsack.py b/tests/problems/test_knapsack.py index 59561d0..760b58c 100644 --- a/tests/problems/test_knapsack.py +++ b/tests/problems/test_knapsack.py @@ -36,4 +36,4 @@ def test_knapsack() -> None: weights=data[0].weights, ) solver = LearningSolver() - solver.solve(instance) + solver._solve(instance) diff --git a/tests/problems/test_stab.py b/tests/problems/test_stab.py index e04a5e0..27a2e78 100644 --- a/tests/problems/test_stab.py +++ b/tests/problems/test_stab.py @@ -15,7 +15,7 @@ def test_stab() -> None: weights = np.array([1.0, 1.0, 1.0, 1.0, 1.0]) instance = MaxWeightStableSetInstance(graph, weights) solver = LearningSolver() - stats = solver.solve(instance) + stats = solver._solve(instance) assert stats["mip_lower_bound"] == 2.0 diff --git a/tests/problems/test_tsp.py b/tests/problems/test_tsp.py index 5c6fbc8..f3cc510 100644 --- a/tests/problems/test_tsp.py +++ b/tests/problems/test_tsp.py @@ -40,7 +40,7 @@ def test_instance() -> None: ) instance = TravelingSalesmanInstance(n_cities, distances) solver = LearningSolver() - solver.solve(instance) + solver._solve(instance) assert len(instance.get_samples()) == 1 sample = instance.get_samples()[0] assert_equals(sample.get_array("mip_var_values"), [1.0, 0.0, 1.0, 1.0, 0.0, 1.0]) @@ -63,7 +63,7 @@ def test_subtour() -> None: distances = squareform(pdist(cities)) instance = TravelingSalesmanInstance(n_cities, distances) solver = LearningSolver() - solver.solve(instance) + solver._solve(instance) samples = instance.get_samples() assert len(samples) == 1 sample = samples[0] @@ -96,5 +96,5 @@ def test_subtour() -> None: 1.0, ], ) - solver.fit([instance]) - solver.solve(instance) + solver._fit([instance]) + solver._solve(instance) diff --git a/tests/solvers/test_learning_solver.py b/tests/solvers/test_learning_solver.py index 97fcf47..02d08f6 100644 --- a/tests/solvers/test_learning_solver.py +++ b/tests/solvers/test_learning_solver.py @@ -5,19 +5,27 @@ import logging import os import tempfile +from os.path import exists from typing import List, cast import dill +from scipy.stats import randint +from miplearn.features.sample import Hdf5Sample from miplearn.instance.base import Instance -from miplearn.instance.picklegz import PickleGzInstance, write_pickle_gz, read_pickle_gz -from miplearn.solvers.gurobi import GurobiSolver +from miplearn.instance.picklegz import ( + PickleGzInstance, + write_pickle_gz, + read_pickle_gz, + save, +) +from miplearn.problems.stab import MaxWeightStableSetGenerator, build_stab_model from miplearn.solvers.internal import InternalSolver from miplearn.solvers.learning import LearningSolver +from miplearn.solvers.tests import assert_equals # noinspection PyUnresolvedReferences from tests.solvers.test_internal_solver import internal_solvers -from miplearn.solvers.tests import assert_equals logger = logging.getLogger(__name__) @@ -34,7 +42,7 @@ def test_learning_solver( mode=mode, ) - solver.solve(instance) + solver._solve(instance) assert len(instance.get_samples()) > 0 sample = instance.get_samples()[0] @@ -55,8 +63,8 @@ def test_learning_solver( assert lp_log is not None assert len(lp_log) > 100 - solver.fit([instance], n_jobs=4) - solver.solve(instance) + solver._fit([instance], n_jobs=4) + solver._solve(instance) # Assert solver is picklable with tempfile.TemporaryFile() as file: @@ -73,9 +81,9 @@ def test_solve_without_lp( solver=internal_solver, solve_lp=False, ) - solver.solve(instance) - solver.fit([instance]) - solver.solve(instance) + solver._solve(instance) + solver._fit([instance]) + solver._solve(instance) def test_parallel_solve( @@ -104,7 +112,7 @@ def test_solve_fit_from_disk( # Test: solve solver = LearningSolver(solver=internal_solver) - solver.solve(instances[0]) + solver._solve(instances[0]) instance_loaded = read_pickle_gz(cast(PickleGzInstance, instances[0]).filename) assert len(instance_loaded.get_samples()) > 0 @@ -119,17 +127,29 @@ def test_solve_fit_from_disk( os.remove(cast(PickleGzInstance, instance).filename) -def test_simulate_perfect() -> None: - internal_solver = GurobiSolver() - instance = internal_solver.build_test_instance_knapsack() - with tempfile.NamedTemporaryFile(suffix=".pkl", delete=False) as tmp: - write_pickle_gz(instance, tmp.name) - solver = LearningSolver( - solver=internal_solver, - simulate_perfect=True, - ) - stats = solver.solve(PickleGzInstance(tmp.name)) - assert stats["mip_lower_bound"] == stats["Objective: Predicted lower bound"] +def test_basic_usage() -> None: + with tempfile.TemporaryDirectory() as dirname: + # Generate instances + data = MaxWeightStableSetGenerator(n=randint(low=20, high=21)).generate(4) + train_files = save(data[0:3], f"{dirname}/train") + test_files = save(data[3:4], f"{dirname}/test") + + # Solve training instances + solver = LearningSolver() + stats = solver.solve(train_files, build_stab_model) + assert len(stats) == 3 + for f in train_files: + sample_filename = f.replace(".pkl.gz", ".h5") + assert exists(sample_filename) + sample = Hdf5Sample(sample_filename) + assert sample.get_scalar("mip_lower_bound") > 0 + + # Fit + solver.fit(train_files, build_stab_model) + + # Solve test instances + stats = solver.solve(test_files, build_stab_model) + assert "Objective: Predicted lower bound" in stats[0].keys() def test_gap() -> None: