Change LearningSolver.solve and fit

2025-12-06 01:18:52 -06:00 · 2022-02-22 15:21:56 -06:00
parent c98ff4eab4
commit 522f3a7e18
10 changed files with 157 additions and 107 deletions
--- a/miplearn/problems/stab.py
+++ b/miplearn/problems/stab.py
@@ -131,3 +131,17 @@ class MaxWeightStableSetGenerator:

    def _generate_graph(self) -> Graph:
        return nx.generators.random_graphs.binomial_graph(self.n.rvs(), self.p.rvs())
+
+
+def build_stab_model(data: MaxWeightStableSetData) -> pe.ConcreteModel:
+    model = pe.ConcreteModel()
+    nodes = list(data.graph.nodes)
+    model.x = pe.Var(nodes, domain=pe.Binary)
+    model.OBJ = pe.Objective(
+        expr=sum(model.x[v] * data.weights[v] for v in nodes),
+        sense=pe.maximize,
+    )
+    model.clique_eqs = pe.ConstraintList()
+    for clique in nx.find_cliques(data.graph):
+        model.clique_eqs.add(sum(model.x[v] for v in clique) <= 1)
+    return model
--- a/miplearn/solvers/learning.py
+++ b/miplearn/solvers/learning.py
@@ -5,10 +5,12 @@
 import logging
 import time
 import traceback
-from typing import Optional, List, Any, cast, Dict, Tuple
+from typing import Optional, List, Any, cast, Dict, Tuple, Callable, IO

+from overrides import overrides
 from p_tqdm import p_map

+from miplearn.features.sample import Hdf5Sample, Sample
 from miplearn.components.component import Component
 from miplearn.components.dynamic_lazy import DynamicLazyConstraintsComponent
 from miplearn.components.dynamic_user_cuts import UserCutsComponent
@@ -16,15 +18,44 @@ from miplearn.components.objective import ObjectiveValueComponent
 from miplearn.components.primal import PrimalSolutionComponent
 from miplearn.features.extractor import FeaturesExtractor
 from miplearn.instance.base import Instance
-from miplearn.instance.picklegz import PickleGzInstance
 from miplearn.solvers import _RedirectOutput
 from miplearn.solvers.internal import InternalSolver
 from miplearn.solvers.pyomo.gurobi import GurobiPyomoSolver
 from miplearn.types import LearningSolveStats
+import gzip
+import pickle
+from os.path import exists

 logger = logging.getLogger(__name__)


+class InstanceWrapper(Instance):
+    def __init__(self, data_filename: Any, build_model: Callable):
+        super().__init__()
+        assert data_filename.endswith(".pkl.gz")
+        self.filename = data_filename
+        self.sample_filename = data_filename.replace(".pkl.gz", ".h5")
+        self.sample = Hdf5Sample(
+            self.sample_filename,
+            mode="r+" if exists(self.sample_filename) else "w",
+        )
+        self.build_model = build_model
+
+    @overrides
+    def to_model(self) -> Any:
+        with gzip.GzipFile(self.filename, "rb") as file:
+            data = pickle.load(cast(IO[bytes], file))
+            return self.build_model(data)
+
+    @overrides
+    def create_sample(self) -> Sample:
+        return self.sample
+
+    @overrides
+    def get_samples(self) -> List[Sample]:
+        return [self.sample]
+
+
 class _GlobalVariables:
    def __init__(self) -> None:
        self.solver: Optional[LearningSolver] = None
@@ -47,7 +78,7 @@ def _parallel_solve(
    assert solver is not None
    assert instances is not None
    try:
-        stats = solver.solve(
+        stats = solver._solve(
            instances[idx],
            discard_output=discard_outputs,
        )
@@ -86,11 +117,6 @@ class LearningSolver:
        option should be activated if the LP relaxation is not very
        expensive to solve and if it provides good hints for the integer
        solution.
-    simulate_perfect: bool
-        If true, each call to solve actually performs three actions: solve
-        the original problem, train the ML models on the data that was just
-        collected, and solve the problem again. This is useful for evaluating
-        the theoretical performance of perfect ML models.
    """

    def __init__(
@@ -100,7 +126,6 @@ class LearningSolver:
        solver: Optional[InternalSolver] = None,
        use_lazy_cb: bool = False,
        solve_lp: bool = True,
-        simulate_perfect: bool = False,
        extractor: Optional[FeaturesExtractor] = None,
        extract_lhs: bool = True,
        extract_sa: bool = True,
@@ -117,7 +142,6 @@ class LearningSolver:
        self.internal_solver: Optional[InternalSolver] = None
        self.internal_solver_prototype: InternalSolver = solver
        self.mode: str = mode
-        self.simulate_perfect: bool = simulate_perfect
        self.solve_lp: bool = solve_lp
        self.tee = False
        self.use_lazy_cb: bool = use_lazy_cb
@@ -139,6 +163,44 @@ class LearningSolver:
        discard_output: bool = False,
        tee: bool = False,
    ) -> LearningSolveStats:
+        """
+        Solves the given instance. If trained machine-learning models are
+        available, they will be used to accelerate the solution process.
+
+        The argument `instance` may be either an Instance object or a
+        filename pointing to a pickled Instance object.
+
+        This method adds a new training sample to `instance.training_sample`.
+        If a filename is provided, then the file is modified in-place. That is,
+        the original file is overwritten.
+
+        If `solver.solve_lp_first` is False, the properties lp_solution and
+        lp_value will be set to dummy values.
+
+        Parameters
+        ----------
+        instance: Instance
+            The instance to be solved.
+        model: Any
+            The corresponding Pyomo model. If not provided, it will be created.
+        discard_output: bool
+            If True, do not write the modified instances anywhere; simply discard
+            them. Useful during benchmarking.
+        tee: bool
+            If true, prints solver log to screen.
+
+        Returns
+        -------
+        LearningSolveStats
+            A dictionary of solver statistics containing at least the following
+            keys: "Lower bound", "Upper bound", "Wallclock time", "Nodes",
+            "Sense", "Log", "Warm start value" and "LP value".
+
+            Additional components may generate additional keys. For example,
+            ObjectiveValueComponent adds the keys "Predicted LB" and
+            "Predicted UB". See the documentation of each component for more
+            details.
+        """

        # Generate model
        # -------------------------------------------------------
@@ -299,65 +361,19 @@ class LearningSolver:

    def solve(
        self,
-        instance: Instance,
-        model: Any = None,
-        discard_output: bool = False,
-        tee: bool = False,
-    ) -> LearningSolveStats:
-        """
-        Solves the given instance. If trained machine-learning models are
-        available, they will be used to accelerate the solution process.
+        filenames: List[str],
+        build_model: Callable,
+        tee: bool = True,
+    ) -> List[LearningSolveStats]:
+        stats = []
+        for f in filenames:
+            s = self._solve(InstanceWrapper(f, build_model), tee=tee)
+            stats.append(s)
+        return stats

-        The argument `instance` may be either an Instance object or a
-        filename pointing to a pickled Instance object.
-
-        This method adds a new training sample to `instance.training_sample`.
-        If a filename is provided, then the file is modified in-place. That is,
-        the original file is overwritten.
-
-        If `solver.solve_lp_first` is False, the properties lp_solution and
-        lp_value will be set to dummy values.
-
-        Parameters
-        ----------
-        instance: Instance
-            The instance to be solved.
-        model: Any
-            The corresponding Pyomo model. If not provided, it will be created.
-        discard_output: bool
-            If True, do not write the modified instances anywhere; simply discard
-            them. Useful during benchmarking.
-        tee: bool
-            If true, prints solver log to screen.
-
-        Returns
-        -------
-        LearningSolveStats
-            A dictionary of solver statistics containing at least the following
-            keys: "Lower bound", "Upper bound", "Wallclock time", "Nodes",
-            "Sense", "Log", "Warm start value" and "LP value".
-
-            Additional components may generate additional keys. For example,
-            ObjectiveValueComponent adds the keys "Predicted LB" and
-            "Predicted UB". See the documentation of each component for more
-            details.
-        """
-        if self.simulate_perfect:
-            if not isinstance(instance, PickleGzInstance):
-                raise Exception("Not implemented")
-            self._solve(
-                instance=instance,
-                model=model,
-                tee=tee,
-            )
-            self.fit([instance])
-            instance.instance = None
-        return self._solve(
-            instance=instance,
-            model=model,
-            discard_output=discard_output,
-            tee=tee,
-        )
+    def fit(self, filenames: List[str], build_model: Callable) -> None:
+        instances: List[Instance] = [InstanceWrapper(f, build_model) for f in filenames]
+        self._fit(instances)

    def parallel_solve(
        self,
@@ -394,7 +410,7 @@ class LearningSolver:
            `[solver.solve(p) for p in instances]`
        """
        if n_jobs == 1:
-            return [self.solve(p) for p in instances]
+            return [self._solve(p) for p in instances]
        else:
            self.internal_solver = None
            self._silence_miplearn_logger()
@@ -415,7 +431,7 @@ class LearningSolver:
            self._restore_miplearn_logger()
            return stats

-    def fit(
+    def _fit(
        self,
        training_instances: List[Instance],
        n_jobs: int = 1,
--- a/tests/components/test_dynamic_user_cuts.py
+++ b/tests/components/test_dynamic_user_cuts.py
@@ -87,7 +87,7 @@ def test_usage(
    stab_instance: Instance,
    solver: LearningSolver,
 ) -> None:
-    stats_before = solver.solve(stab_instance)
+    stats_before = solver._solve(stab_instance)
    sample = stab_instance.get_samples()[0]
    user_cuts_encoded = sample.get_scalar("mip_user_cuts")
    assert user_cuts_encoded is not None
@@ -97,8 +97,8 @@ def test_usage(
    assert stats_before["UserCuts: Added ahead-of-time"] == 0
    assert stats_before["UserCuts: Added in callback"] > 0

-    solver.fit([stab_instance])
-    stats_after = solver.solve(stab_instance)
+    solver._fit([stab_instance])
+    stats_after = solver._solve(stab_instance)
    assert (
        stats_after["UserCuts: Added ahead-of-time"]
        == stats_before["UserCuts: Added in callback"]
--- a/tests/components/test_objective.py
+++ b/tests/components/test_objective.py
@@ -134,8 +134,8 @@ def test_sample_evaluate(sample: Sample) -> None:
 def test_usage() -> None:
    solver = LearningSolver(components=[ObjectiveValueComponent()])
    instance = GurobiPyomoSolver().build_test_instance_knapsack()
-    solver.solve(instance)
-    solver.fit([instance])
-    stats = solver.solve(instance)
+    solver._solve(instance)
+    solver._fit([instance])
+    stats = solver._solve(instance)
    assert stats["mip_lower_bound"] == stats["Objective: Predicted lower bound"]
    assert stats["mip_upper_bound"] == stats["Objective: Predicted upper bound"]
--- a/tests/components/test_primal.py
+++ b/tests/components/test_primal.py
@@ -110,9 +110,9 @@ def test_usage() -> None:
    gen = TravelingSalesmanGenerator(n=randint(low=5, high=6))
    data = gen.generate(1)
    instance = TravelingSalesmanInstance(data[0].n_cities, data[0].distances)
-    solver.solve(instance)
-    solver.fit([instance])
-    stats = solver.solve(instance)
+    solver._solve(instance)
+    solver._fit([instance])
+    stats = solver._solve(instance)
    assert stats["Primal: Free"] == 0
    assert stats["Primal: One"] + stats["Primal: Zero"] == 10
    assert stats["mip_lower_bound"] == stats["mip_warm_start_value"]
--- a/tests/instance/test_file.py
+++ b/tests/instance/test_file.py
@@ -22,7 +22,7 @@ def test_usage() -> None:

    # Solve instance from disk
    solver = LearningSolver(solver=GurobiSolver())
-    solver.solve(FileInstance(filename))
+    solver._solve(FileInstance(filename))

    # Assert HDF5 contains training data
    sample = FileInstance(filename).get_samples()[0]
--- a/tests/problems/test_knapsack.py
+++ b/tests/problems/test_knapsack.py
@@ -36,4 +36,4 @@ def test_knapsack() -> None:
        weights=data[0].weights,
    )
    solver = LearningSolver()
-    solver.solve(instance)
+    solver._solve(instance)
--- a/tests/problems/test_stab.py
+++ b/tests/problems/test_stab.py
@@ -15,7 +15,7 @@ def test_stab() -> None:
    weights = np.array([1.0, 1.0, 1.0, 1.0, 1.0])
    instance = MaxWeightStableSetInstance(graph, weights)
    solver = LearningSolver()
-    stats = solver.solve(instance)
+    stats = solver._solve(instance)
    assert stats["mip_lower_bound"] == 2.0


--- a/tests/problems/test_tsp.py
+++ b/tests/problems/test_tsp.py
@@ -40,7 +40,7 @@ def test_instance() -> None:
    )
    instance = TravelingSalesmanInstance(n_cities, distances)
    solver = LearningSolver()
-    solver.solve(instance)
+    solver._solve(instance)
    assert len(instance.get_samples()) == 1
    sample = instance.get_samples()[0]
    assert_equals(sample.get_array("mip_var_values"), [1.0, 0.0, 1.0, 1.0, 0.0, 1.0])
@@ -63,7 +63,7 @@ def test_subtour() -> None:
    distances = squareform(pdist(cities))
    instance = TravelingSalesmanInstance(n_cities, distances)
    solver = LearningSolver()
-    solver.solve(instance)
+    solver._solve(instance)
    samples = instance.get_samples()
    assert len(samples) == 1
    sample = samples[0]
@@ -96,5 +96,5 @@ def test_subtour() -> None:
            1.0,
        ],
    )
-    solver.fit([instance])
-    solver.solve(instance)
+    solver._fit([instance])
+    solver._solve(instance)
--- a/tests/solvers/test_learning_solver.py
+++ b/tests/solvers/test_learning_solver.py
@@ -5,19 +5,27 @@
 import logging
 import os
 import tempfile
+from os.path import exists
 from typing import List, cast

 import dill
+from scipy.stats import randint

+from miplearn.features.sample import Hdf5Sample
 from miplearn.instance.base import Instance
-from miplearn.instance.picklegz import PickleGzInstance, write_pickle_gz, read_pickle_gz
-from miplearn.solvers.gurobi import GurobiSolver
+from miplearn.instance.picklegz import (
+    PickleGzInstance,
+    write_pickle_gz,
+    read_pickle_gz,
+    save,
+)
+from miplearn.problems.stab import MaxWeightStableSetGenerator, build_stab_model
 from miplearn.solvers.internal import InternalSolver
 from miplearn.solvers.learning import LearningSolver
+from miplearn.solvers.tests import assert_equals

 # noinspection PyUnresolvedReferences
 from tests.solvers.test_internal_solver import internal_solvers
-from miplearn.solvers.tests import assert_equals

 logger = logging.getLogger(__name__)

@@ -34,7 +42,7 @@ def test_learning_solver(
                mode=mode,
            )

-            solver.solve(instance)
+            solver._solve(instance)
            assert len(instance.get_samples()) > 0
            sample = instance.get_samples()[0]

@@ -55,8 +63,8 @@ def test_learning_solver(
            assert lp_log is not None
            assert len(lp_log) > 100

-            solver.fit([instance], n_jobs=4)
-            solver.solve(instance)
+            solver._fit([instance], n_jobs=4)
+            solver._solve(instance)

            # Assert solver is picklable
            with tempfile.TemporaryFile() as file:
@@ -73,9 +81,9 @@ def test_solve_without_lp(
            solver=internal_solver,
            solve_lp=False,
        )
-        solver.solve(instance)
-        solver.fit([instance])
-        solver.solve(instance)
+        solver._solve(instance)
+        solver._fit([instance])
+        solver._solve(instance)


 def test_parallel_solve(
@@ -104,7 +112,7 @@ def test_solve_fit_from_disk(

        # Test: solve
        solver = LearningSolver(solver=internal_solver)
-        solver.solve(instances[0])
+        solver._solve(instances[0])
        instance_loaded = read_pickle_gz(cast(PickleGzInstance, instances[0]).filename)
        assert len(instance_loaded.get_samples()) > 0

@@ -119,17 +127,29 @@ def test_solve_fit_from_disk(
            os.remove(cast(PickleGzInstance, instance).filename)


-def test_simulate_perfect() -> None:
-    internal_solver = GurobiSolver()
-    instance = internal_solver.build_test_instance_knapsack()
-    with tempfile.NamedTemporaryFile(suffix=".pkl", delete=False) as tmp:
-        write_pickle_gz(instance, tmp.name)
-        solver = LearningSolver(
-            solver=internal_solver,
-            simulate_perfect=True,
-        )
-        stats = solver.solve(PickleGzInstance(tmp.name))
-        assert stats["mip_lower_bound"] == stats["Objective: Predicted lower bound"]
+def test_basic_usage() -> None:
+    with tempfile.TemporaryDirectory() as dirname:
+        # Generate instances
+        data = MaxWeightStableSetGenerator(n=randint(low=20, high=21)).generate(4)
+        train_files = save(data[0:3], f"{dirname}/train")
+        test_files = save(data[3:4], f"{dirname}/test")
+
+        # Solve training instances
+        solver = LearningSolver()
+        stats = solver.solve(train_files, build_stab_model)
+        assert len(stats) == 3
+        for f in train_files:
+            sample_filename = f.replace(".pkl.gz", ".h5")
+            assert exists(sample_filename)
+            sample = Hdf5Sample(sample_filename)
+            assert sample.get_scalar("mip_lower_bound") > 0
+
+        # Fit
+        solver.fit(train_files, build_stab_model)
+
+        # Solve test instances
+        stats = solver.solve(test_files, build_stab_model)
+        assert "Objective: Predicted lower bound" in stats[0].keys()


 def test_gap() -> None: