Change LearningSolver.solve and fit

master
Alinson S. Xavier 4 years ago
parent c98ff4eab4
commit 522f3a7e18
No known key found for this signature in database
GPG Key ID: DCA0DAD4D2F58624

@ -131,3 +131,17 @@ class MaxWeightStableSetGenerator:
def _generate_graph(self) -> Graph: def _generate_graph(self) -> Graph:
return nx.generators.random_graphs.binomial_graph(self.n.rvs(), self.p.rvs()) return nx.generators.random_graphs.binomial_graph(self.n.rvs(), self.p.rvs())
def build_stab_model(data: MaxWeightStableSetData) -> pe.ConcreteModel:
model = pe.ConcreteModel()
nodes = list(data.graph.nodes)
model.x = pe.Var(nodes, domain=pe.Binary)
model.OBJ = pe.Objective(
expr=sum(model.x[v] * data.weights[v] for v in nodes),
sense=pe.maximize,
)
model.clique_eqs = pe.ConstraintList()
for clique in nx.find_cliques(data.graph):
model.clique_eqs.add(sum(model.x[v] for v in clique) <= 1)
return model

@ -5,10 +5,12 @@
import logging import logging
import time import time
import traceback import traceback
from typing import Optional, List, Any, cast, Dict, Tuple from typing import Optional, List, Any, cast, Dict, Tuple, Callable, IO
from overrides import overrides
from p_tqdm import p_map from p_tqdm import p_map
from miplearn.features.sample import Hdf5Sample, Sample
from miplearn.components.component import Component from miplearn.components.component import Component
from miplearn.components.dynamic_lazy import DynamicLazyConstraintsComponent from miplearn.components.dynamic_lazy import DynamicLazyConstraintsComponent
from miplearn.components.dynamic_user_cuts import UserCutsComponent from miplearn.components.dynamic_user_cuts import UserCutsComponent
@ -16,15 +18,44 @@ from miplearn.components.objective import ObjectiveValueComponent
from miplearn.components.primal import PrimalSolutionComponent from miplearn.components.primal import PrimalSolutionComponent
from miplearn.features.extractor import FeaturesExtractor from miplearn.features.extractor import FeaturesExtractor
from miplearn.instance.base import Instance from miplearn.instance.base import Instance
from miplearn.instance.picklegz import PickleGzInstance
from miplearn.solvers import _RedirectOutput from miplearn.solvers import _RedirectOutput
from miplearn.solvers.internal import InternalSolver from miplearn.solvers.internal import InternalSolver
from miplearn.solvers.pyomo.gurobi import GurobiPyomoSolver from miplearn.solvers.pyomo.gurobi import GurobiPyomoSolver
from miplearn.types import LearningSolveStats from miplearn.types import LearningSolveStats
import gzip
import pickle
from os.path import exists
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class InstanceWrapper(Instance):
def __init__(self, data_filename: Any, build_model: Callable):
super().__init__()
assert data_filename.endswith(".pkl.gz")
self.filename = data_filename
self.sample_filename = data_filename.replace(".pkl.gz", ".h5")
self.sample = Hdf5Sample(
self.sample_filename,
mode="r+" if exists(self.sample_filename) else "w",
)
self.build_model = build_model
@overrides
def to_model(self) -> Any:
with gzip.GzipFile(self.filename, "rb") as file:
data = pickle.load(cast(IO[bytes], file))
return self.build_model(data)
@overrides
def create_sample(self) -> Sample:
return self.sample
@overrides
def get_samples(self) -> List[Sample]:
return [self.sample]
class _GlobalVariables: class _GlobalVariables:
def __init__(self) -> None: def __init__(self) -> None:
self.solver: Optional[LearningSolver] = None self.solver: Optional[LearningSolver] = None
@ -47,7 +78,7 @@ def _parallel_solve(
assert solver is not None assert solver is not None
assert instances is not None assert instances is not None
try: try:
stats = solver.solve( stats = solver._solve(
instances[idx], instances[idx],
discard_output=discard_outputs, discard_output=discard_outputs,
) )
@ -86,11 +117,6 @@ class LearningSolver:
option should be activated if the LP relaxation is not very option should be activated if the LP relaxation is not very
expensive to solve and if it provides good hints for the integer expensive to solve and if it provides good hints for the integer
solution. solution.
simulate_perfect: bool
If true, each call to solve actually performs three actions: solve
the original problem, train the ML models on the data that was just
collected, and solve the problem again. This is useful for evaluating
the theoretical performance of perfect ML models.
""" """
def __init__( def __init__(
@ -100,7 +126,6 @@ class LearningSolver:
solver: Optional[InternalSolver] = None, solver: Optional[InternalSolver] = None,
use_lazy_cb: bool = False, use_lazy_cb: bool = False,
solve_lp: bool = True, solve_lp: bool = True,
simulate_perfect: bool = False,
extractor: Optional[FeaturesExtractor] = None, extractor: Optional[FeaturesExtractor] = None,
extract_lhs: bool = True, extract_lhs: bool = True,
extract_sa: bool = True, extract_sa: bool = True,
@ -117,7 +142,6 @@ class LearningSolver:
self.internal_solver: Optional[InternalSolver] = None self.internal_solver: Optional[InternalSolver] = None
self.internal_solver_prototype: InternalSolver = solver self.internal_solver_prototype: InternalSolver = solver
self.mode: str = mode self.mode: str = mode
self.simulate_perfect: bool = simulate_perfect
self.solve_lp: bool = solve_lp self.solve_lp: bool = solve_lp
self.tee = False self.tee = False
self.use_lazy_cb: bool = use_lazy_cb self.use_lazy_cb: bool = use_lazy_cb
@ -139,6 +163,44 @@ class LearningSolver:
discard_output: bool = False, discard_output: bool = False,
tee: bool = False, tee: bool = False,
) -> LearningSolveStats: ) -> LearningSolveStats:
"""
Solves the given instance. If trained machine-learning models are
available, they will be used to accelerate the solution process.
The argument `instance` may be either an Instance object or a
filename pointing to a pickled Instance object.
This method adds a new training sample to `instance.training_sample`.
If a filename is provided, then the file is modified in-place. That is,
the original file is overwritten.
If `solver.solve_lp_first` is False, the properties lp_solution and
lp_value will be set to dummy values.
Parameters
----------
instance: Instance
The instance to be solved.
model: Any
The corresponding Pyomo model. If not provided, it will be created.
discard_output: bool
If True, do not write the modified instances anywhere; simply discard
them. Useful during benchmarking.
tee: bool
If true, prints solver log to screen.
Returns
-------
LearningSolveStats
A dictionary of solver statistics containing at least the following
keys: "Lower bound", "Upper bound", "Wallclock time", "Nodes",
"Sense", "Log", "Warm start value" and "LP value".
Additional components may generate additional keys. For example,
ObjectiveValueComponent adds the keys "Predicted LB" and
"Predicted UB". See the documentation of each component for more
details.
"""
# Generate model # Generate model
# ------------------------------------------------------- # -------------------------------------------------------
@ -299,65 +361,19 @@ class LearningSolver:
def solve( def solve(
self, self,
instance: Instance, filenames: List[str],
model: Any = None, build_model: Callable,
discard_output: bool = False, tee: bool = True,
tee: bool = False, ) -> List[LearningSolveStats]:
) -> LearningSolveStats: stats = []
""" for f in filenames:
Solves the given instance. If trained machine-learning models are s = self._solve(InstanceWrapper(f, build_model), tee=tee)
available, they will be used to accelerate the solution process. stats.append(s)
return stats
The argument `instance` may be either an Instance object or a
filename pointing to a pickled Instance object.
This method adds a new training sample to `instance.training_sample`.
If a filename is provided, then the file is modified in-place. That is,
the original file is overwritten.
If `solver.solve_lp_first` is False, the properties lp_solution and
lp_value will be set to dummy values.
Parameters
----------
instance: Instance
The instance to be solved.
model: Any
The corresponding Pyomo model. If not provided, it will be created.
discard_output: bool
If True, do not write the modified instances anywhere; simply discard
them. Useful during benchmarking.
tee: bool
If true, prints solver log to screen.
Returns
-------
LearningSolveStats
A dictionary of solver statistics containing at least the following
keys: "Lower bound", "Upper bound", "Wallclock time", "Nodes",
"Sense", "Log", "Warm start value" and "LP value".
Additional components may generate additional keys. For example, def fit(self, filenames: List[str], build_model: Callable) -> None:
ObjectiveValueComponent adds the keys "Predicted LB" and instances: List[Instance] = [InstanceWrapper(f, build_model) for f in filenames]
"Predicted UB". See the documentation of each component for more self._fit(instances)
details.
"""
if self.simulate_perfect:
if not isinstance(instance, PickleGzInstance):
raise Exception("Not implemented")
self._solve(
instance=instance,
model=model,
tee=tee,
)
self.fit([instance])
instance.instance = None
return self._solve(
instance=instance,
model=model,
discard_output=discard_output,
tee=tee,
)
def parallel_solve( def parallel_solve(
self, self,
@ -394,7 +410,7 @@ class LearningSolver:
`[solver.solve(p) for p in instances]` `[solver.solve(p) for p in instances]`
""" """
if n_jobs == 1: if n_jobs == 1:
return [self.solve(p) for p in instances] return [self._solve(p) for p in instances]
else: else:
self.internal_solver = None self.internal_solver = None
self._silence_miplearn_logger() self._silence_miplearn_logger()
@ -415,7 +431,7 @@ class LearningSolver:
self._restore_miplearn_logger() self._restore_miplearn_logger()
return stats return stats
def fit( def _fit(
self, self,
training_instances: List[Instance], training_instances: List[Instance],
n_jobs: int = 1, n_jobs: int = 1,

@ -87,7 +87,7 @@ def test_usage(
stab_instance: Instance, stab_instance: Instance,
solver: LearningSolver, solver: LearningSolver,
) -> None: ) -> None:
stats_before = solver.solve(stab_instance) stats_before = solver._solve(stab_instance)
sample = stab_instance.get_samples()[0] sample = stab_instance.get_samples()[0]
user_cuts_encoded = sample.get_scalar("mip_user_cuts") user_cuts_encoded = sample.get_scalar("mip_user_cuts")
assert user_cuts_encoded is not None assert user_cuts_encoded is not None
@ -97,8 +97,8 @@ def test_usage(
assert stats_before["UserCuts: Added ahead-of-time"] == 0 assert stats_before["UserCuts: Added ahead-of-time"] == 0
assert stats_before["UserCuts: Added in callback"] > 0 assert stats_before["UserCuts: Added in callback"] > 0
solver.fit([stab_instance]) solver._fit([stab_instance])
stats_after = solver.solve(stab_instance) stats_after = solver._solve(stab_instance)
assert ( assert (
stats_after["UserCuts: Added ahead-of-time"] stats_after["UserCuts: Added ahead-of-time"]
== stats_before["UserCuts: Added in callback"] == stats_before["UserCuts: Added in callback"]

@ -134,8 +134,8 @@ def test_sample_evaluate(sample: Sample) -> None:
def test_usage() -> None: def test_usage() -> None:
solver = LearningSolver(components=[ObjectiveValueComponent()]) solver = LearningSolver(components=[ObjectiveValueComponent()])
instance = GurobiPyomoSolver().build_test_instance_knapsack() instance = GurobiPyomoSolver().build_test_instance_knapsack()
solver.solve(instance) solver._solve(instance)
solver.fit([instance]) solver._fit([instance])
stats = solver.solve(instance) stats = solver._solve(instance)
assert stats["mip_lower_bound"] == stats["Objective: Predicted lower bound"] assert stats["mip_lower_bound"] == stats["Objective: Predicted lower bound"]
assert stats["mip_upper_bound"] == stats["Objective: Predicted upper bound"] assert stats["mip_upper_bound"] == stats["Objective: Predicted upper bound"]

@ -110,9 +110,9 @@ def test_usage() -> None:
gen = TravelingSalesmanGenerator(n=randint(low=5, high=6)) gen = TravelingSalesmanGenerator(n=randint(low=5, high=6))
data = gen.generate(1) data = gen.generate(1)
instance = TravelingSalesmanInstance(data[0].n_cities, data[0].distances) instance = TravelingSalesmanInstance(data[0].n_cities, data[0].distances)
solver.solve(instance) solver._solve(instance)
solver.fit([instance]) solver._fit([instance])
stats = solver.solve(instance) stats = solver._solve(instance)
assert stats["Primal: Free"] == 0 assert stats["Primal: Free"] == 0
assert stats["Primal: One"] + stats["Primal: Zero"] == 10 assert stats["Primal: One"] + stats["Primal: Zero"] == 10
assert stats["mip_lower_bound"] == stats["mip_warm_start_value"] assert stats["mip_lower_bound"] == stats["mip_warm_start_value"]

@ -22,7 +22,7 @@ def test_usage() -> None:
# Solve instance from disk # Solve instance from disk
solver = LearningSolver(solver=GurobiSolver()) solver = LearningSolver(solver=GurobiSolver())
solver.solve(FileInstance(filename)) solver._solve(FileInstance(filename))
# Assert HDF5 contains training data # Assert HDF5 contains training data
sample = FileInstance(filename).get_samples()[0] sample = FileInstance(filename).get_samples()[0]

@ -36,4 +36,4 @@ def test_knapsack() -> None:
weights=data[0].weights, weights=data[0].weights,
) )
solver = LearningSolver() solver = LearningSolver()
solver.solve(instance) solver._solve(instance)

@ -15,7 +15,7 @@ def test_stab() -> None:
weights = np.array([1.0, 1.0, 1.0, 1.0, 1.0]) weights = np.array([1.0, 1.0, 1.0, 1.0, 1.0])
instance = MaxWeightStableSetInstance(graph, weights) instance = MaxWeightStableSetInstance(graph, weights)
solver = LearningSolver() solver = LearningSolver()
stats = solver.solve(instance) stats = solver._solve(instance)
assert stats["mip_lower_bound"] == 2.0 assert stats["mip_lower_bound"] == 2.0

@ -40,7 +40,7 @@ def test_instance() -> None:
) )
instance = TravelingSalesmanInstance(n_cities, distances) instance = TravelingSalesmanInstance(n_cities, distances)
solver = LearningSolver() solver = LearningSolver()
solver.solve(instance) solver._solve(instance)
assert len(instance.get_samples()) == 1 assert len(instance.get_samples()) == 1
sample = instance.get_samples()[0] sample = instance.get_samples()[0]
assert_equals(sample.get_array("mip_var_values"), [1.0, 0.0, 1.0, 1.0, 0.0, 1.0]) assert_equals(sample.get_array("mip_var_values"), [1.0, 0.0, 1.0, 1.0, 0.0, 1.0])
@ -63,7 +63,7 @@ def test_subtour() -> None:
distances = squareform(pdist(cities)) distances = squareform(pdist(cities))
instance = TravelingSalesmanInstance(n_cities, distances) instance = TravelingSalesmanInstance(n_cities, distances)
solver = LearningSolver() solver = LearningSolver()
solver.solve(instance) solver._solve(instance)
samples = instance.get_samples() samples = instance.get_samples()
assert len(samples) == 1 assert len(samples) == 1
sample = samples[0] sample = samples[0]
@ -96,5 +96,5 @@ def test_subtour() -> None:
1.0, 1.0,
], ],
) )
solver.fit([instance]) solver._fit([instance])
solver.solve(instance) solver._solve(instance)

@ -5,19 +5,27 @@
import logging import logging
import os import os
import tempfile import tempfile
from os.path import exists
from typing import List, cast from typing import List, cast
import dill import dill
from scipy.stats import randint
from miplearn.features.sample import Hdf5Sample
from miplearn.instance.base import Instance from miplearn.instance.base import Instance
from miplearn.instance.picklegz import PickleGzInstance, write_pickle_gz, read_pickle_gz from miplearn.instance.picklegz import (
from miplearn.solvers.gurobi import GurobiSolver PickleGzInstance,
write_pickle_gz,
read_pickle_gz,
save,
)
from miplearn.problems.stab import MaxWeightStableSetGenerator, build_stab_model
from miplearn.solvers.internal import InternalSolver from miplearn.solvers.internal import InternalSolver
from miplearn.solvers.learning import LearningSolver from miplearn.solvers.learning import LearningSolver
from miplearn.solvers.tests import assert_equals
# noinspection PyUnresolvedReferences # noinspection PyUnresolvedReferences
from tests.solvers.test_internal_solver import internal_solvers from tests.solvers.test_internal_solver import internal_solvers
from miplearn.solvers.tests import assert_equals
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -34,7 +42,7 @@ def test_learning_solver(
mode=mode, mode=mode,
) )
solver.solve(instance) solver._solve(instance)
assert len(instance.get_samples()) > 0 assert len(instance.get_samples()) > 0
sample = instance.get_samples()[0] sample = instance.get_samples()[0]
@ -55,8 +63,8 @@ def test_learning_solver(
assert lp_log is not None assert lp_log is not None
assert len(lp_log) > 100 assert len(lp_log) > 100
solver.fit([instance], n_jobs=4) solver._fit([instance], n_jobs=4)
solver.solve(instance) solver._solve(instance)
# Assert solver is picklable # Assert solver is picklable
with tempfile.TemporaryFile() as file: with tempfile.TemporaryFile() as file:
@ -73,9 +81,9 @@ def test_solve_without_lp(
solver=internal_solver, solver=internal_solver,
solve_lp=False, solve_lp=False,
) )
solver.solve(instance) solver._solve(instance)
solver.fit([instance]) solver._fit([instance])
solver.solve(instance) solver._solve(instance)
def test_parallel_solve( def test_parallel_solve(
@ -104,7 +112,7 @@ def test_solve_fit_from_disk(
# Test: solve # Test: solve
solver = LearningSolver(solver=internal_solver) solver = LearningSolver(solver=internal_solver)
solver.solve(instances[0]) solver._solve(instances[0])
instance_loaded = read_pickle_gz(cast(PickleGzInstance, instances[0]).filename) instance_loaded = read_pickle_gz(cast(PickleGzInstance, instances[0]).filename)
assert len(instance_loaded.get_samples()) > 0 assert len(instance_loaded.get_samples()) > 0
@ -119,17 +127,29 @@ def test_solve_fit_from_disk(
os.remove(cast(PickleGzInstance, instance).filename) os.remove(cast(PickleGzInstance, instance).filename)
def test_simulate_perfect() -> None: def test_basic_usage() -> None:
internal_solver = GurobiSolver() with tempfile.TemporaryDirectory() as dirname:
instance = internal_solver.build_test_instance_knapsack() # Generate instances
with tempfile.NamedTemporaryFile(suffix=".pkl", delete=False) as tmp: data = MaxWeightStableSetGenerator(n=randint(low=20, high=21)).generate(4)
write_pickle_gz(instance, tmp.name) train_files = save(data[0:3], f"{dirname}/train")
solver = LearningSolver( test_files = save(data[3:4], f"{dirname}/test")
solver=internal_solver,
simulate_perfect=True, # Solve training instances
) solver = LearningSolver()
stats = solver.solve(PickleGzInstance(tmp.name)) stats = solver.solve(train_files, build_stab_model)
assert stats["mip_lower_bound"] == stats["Objective: Predicted lower bound"] assert len(stats) == 3
for f in train_files:
sample_filename = f.replace(".pkl.gz", ".h5")
assert exists(sample_filename)
sample = Hdf5Sample(sample_filename)
assert sample.get_scalar("mip_lower_bound") > 0
# Fit
solver.fit(train_files, build_stab_model)
# Solve test instances
stats = solver.solve(test_files, build_stab_model)
assert "Objective: Predicted lower bound" in stats[0].keys()
def test_gap() -> None: def test_gap() -> None:

Loading…
Cancel
Save