Change LearningSolver.solve and fit

2025-12-06 01:18:52 -06:00 · 2022-02-22 15:21:56 -06:00
parent c98ff4eab4
commit 522f3a7e18
10 changed files with 157 additions and 107 deletions
--- a/miplearn/problems/stab.py
+++ b/miplearn/problems/stab.py
@@ -131,3 +131,17 @@ class MaxWeightStableSetGenerator:
    def _generate_graph(self) -> Graph:
        return nx.generators.random_graphs.binomial_graph(self.n.rvs(), self.p.rvs())
 def build_stab_model(data: MaxWeightStableSetData) -> pe.ConcreteModel:
    model = pe.ConcreteModel()
    nodes = list(data.graph.nodes)
    model.x = pe.Var(nodes, domain=pe.Binary)
    model.OBJ = pe.Objective(
        expr=sum(model.x[v] * data.weights[v] for v in nodes),
        sense=pe.maximize,
    )
    model.clique_eqs = pe.ConstraintList()
    for clique in nx.find_cliques(data.graph):
        model.clique_eqs.add(sum(model.x[v] for v in clique) <= 1)
    return model
--- a/miplearn/solvers/learning.py
+++ b/miplearn/solvers/learning.py
@@ -5,10 +5,12 @@
 import logging
 import time
 import traceback
-from typing import Optional, List, Any, cast, Dict, Tuple
+from typing import Optional, List, Any, cast, Dict, Tuple, Callable, IO
 from overrides import overrides
 from p_tqdm import p_map
 from miplearn.features.sample import Hdf5Sample, Sample
 from miplearn.components.component import Component
 from miplearn.components.dynamic_lazy import DynamicLazyConstraintsComponent
 from miplearn.components.dynamic_user_cuts import UserCutsComponent
@@ -16,15 +18,44 @@ from miplearn.components.objective import ObjectiveValueComponent
 from miplearn.components.primal import PrimalSolutionComponent
 from miplearn.features.extractor import FeaturesExtractor
 from miplearn.instance.base import Instance
 from miplearn.instance.picklegz import PickleGzInstance
 from miplearn.solvers import _RedirectOutput
 from miplearn.solvers.internal import InternalSolver
 from miplearn.solvers.pyomo.gurobi import GurobiPyomoSolver
 from miplearn.types import LearningSolveStats
 import gzip
 import pickle
 from os.path import exists
 logger = logging.getLogger(__name__)
 class InstanceWrapper(Instance):
    def __init__(self, data_filename: Any, build_model: Callable):
        super().__init__()
        assert data_filename.endswith(".pkl.gz")
        self.filename = data_filename
        self.sample_filename = data_filename.replace(".pkl.gz", ".h5")
        self.sample = Hdf5Sample(
            self.sample_filename,
            mode="r+" if exists(self.sample_filename) else "w",
        )
        self.build_model = build_model
    @overrides
    def to_model(self) -> Any:
        with gzip.GzipFile(self.filename, "rb") as file:
            data = pickle.load(cast(IO[bytes], file))
            return self.build_model(data)
    @overrides
    def create_sample(self) -> Sample:
        return self.sample
    @overrides
    def get_samples(self) -> List[Sample]:
        return [self.sample]
 class _GlobalVariables:
    def __init__(self) -> None:
        self.solver: Optional[LearningSolver] = None
@@ -47,7 +78,7 @@ def _parallel_solve(
    assert solver is not None
    assert instances is not None
    try:
-        stats = solver.solve(
+        stats = solver._solve(
            instances[idx],
            discard_output=discard_outputs,
        )
@@ -86,11 +117,6 @@ class LearningSolver:
        option should be activated if the LP relaxation is not very
        expensive to solve and if it provides good hints for the integer
        solution.
    simulate_perfect: bool
        If true, each call to solve actually performs three actions: solve
        the original problem, train the ML models on the data that was just
        collected, and solve the problem again. This is useful for evaluating
        the theoretical performance of perfect ML models.
    """
    def __init__(
@@ -100,7 +126,6 @@ class LearningSolver:
        solver: Optional[InternalSolver] = None,
        use_lazy_cb: bool = False,
        solve_lp: bool = True,
        simulate_perfect: bool = False,
        extractor: Optional[FeaturesExtractor] = None,
        extract_lhs: bool = True,
        extract_sa: bool = True,
@@ -117,7 +142,6 @@ class LearningSolver:
        self.internal_solver: Optional[InternalSolver] = None
        self.internal_solver_prototype: InternalSolver = solver
        self.mode: str = mode
        self.simulate_perfect: bool = simulate_perfect
        self.solve_lp: bool = solve_lp
        self.tee = False
        self.use_lazy_cb: bool = use_lazy_cb
@@ -139,6 +163,44 @@ class LearningSolver:
        discard_output: bool = False,
        tee: bool = False,
    ) -> LearningSolveStats:
        """
        Solves the given instance. If trained machine-learning models are
        available, they will be used to accelerate the solution process.
        The argument `instance` may be either an Instance object or a
        filename pointing to a pickled Instance object.
        This method adds a new training sample to `instance.training_sample`.
        If a filename is provided, then the file is modified in-place. That is,
        the original file is overwritten.
        If `solver.solve_lp_first` is False, the properties lp_solution and
        lp_value will be set to dummy values.
        Parameters
        ----------
        instance: Instance
            The instance to be solved.
        model: Any
            The corresponding Pyomo model. If not provided, it will be created.
        discard_output: bool
            If True, do not write the modified instances anywhere; simply discard
            them. Useful during benchmarking.
        tee: bool
            If true, prints solver log to screen.
        Returns
        -------
        LearningSolveStats
            A dictionary of solver statistics containing at least the following
            keys: "Lower bound", "Upper bound", "Wallclock time", "Nodes",
            "Sense", "Log", "Warm start value" and "LP value".
            Additional components may generate additional keys. For example,
            ObjectiveValueComponent adds the keys "Predicted LB" and
            "Predicted UB". See the documentation of each component for more
            details.
        """
        # Generate model
        # -------------------------------------------------------
@@ -299,65 +361,19 @@ class LearningSolver:
    def solve(
        self,
-        instance: Instance,
+        filenames: List[str],
-        model: Any = None,
+        build_model: Callable,
-        discard_output: bool = False,
+        tee: bool = True,
-        tee: bool = False,
+    ) -> List[LearningSolveStats]:
-    ) -> LearningSolveStats:
+        stats = []
-        """
+        for f in filenames:
-        Solves the given instance. If trained machine-learning models are
+            s = self._solve(InstanceWrapper(f, build_model), tee=tee)
-        available, they will be used to accelerate the solution process.
+            stats.append(s)
        return stats
-        The argument `instance` may be either an Instance object or a
+    def fit(self, filenames: List[str], build_model: Callable) -> None:
-        filename pointing to a pickled Instance object.
+        instances: List[Instance] = [InstanceWrapper(f, build_model) for f in filenames]
-
+        self._fit(instances)
        This method adds a new training sample to `instance.training_sample`.
        If a filename is provided, then the file is modified in-place. That is,
        the original file is overwritten.
        If `solver.solve_lp_first` is False, the properties lp_solution and
        lp_value will be set to dummy values.
        Parameters
        ----------
        instance: Instance
            The instance to be solved.
        model: Any
            The corresponding Pyomo model. If not provided, it will be created.
        discard_output: bool
            If True, do not write the modified instances anywhere; simply discard
            them. Useful during benchmarking.
        tee: bool
            If true, prints solver log to screen.
        Returns
        -------
        LearningSolveStats
            A dictionary of solver statistics containing at least the following
            keys: "Lower bound", "Upper bound", "Wallclock time", "Nodes",
            "Sense", "Log", "Warm start value" and "LP value".
            Additional components may generate additional keys. For example,
            ObjectiveValueComponent adds the keys "Predicted LB" and
            "Predicted UB". See the documentation of each component for more
            details.
        """
        if self.simulate_perfect:
            if not isinstance(instance, PickleGzInstance):
                raise Exception("Not implemented")
            self._solve(
                instance=instance,
                model=model,
                tee=tee,
            )
            self.fit([instance])
            instance.instance = None
        return self._solve(
            instance=instance,
            model=model,
            discard_output=discard_output,
            tee=tee,
        )
    def parallel_solve(
        self,
@@ -394,7 +410,7 @@ class LearningSolver:
            `[solver.solve(p) for p in instances]`
        """
        if n_jobs == 1:
-            return [self.solve(p) for p in instances]
+            return [self._solve(p) for p in instances]
        else:
            self.internal_solver = None
            self._silence_miplearn_logger()
@@ -415,7 +431,7 @@ class LearningSolver:
            self._restore_miplearn_logger()
            return stats
-    def fit(
+    def _fit(
        self,
        training_instances: List[Instance],
        n_jobs: int = 1,
--- a/tests/components/test_dynamic_user_cuts.py
+++ b/tests/components/test_dynamic_user_cuts.py
@@ -87,7 +87,7 @@ def test_usage(
    stab_instance: Instance,
    solver: LearningSolver,
 ) -> None:
-    stats_before = solver.solve(stab_instance)
+    stats_before = solver._solve(stab_instance)
    sample = stab_instance.get_samples()[0]
    user_cuts_encoded = sample.get_scalar("mip_user_cuts")
    assert user_cuts_encoded is not None
@@ -97,8 +97,8 @@ def test_usage(
    assert stats_before["UserCuts: Added ahead-of-time"] == 0
    assert stats_before["UserCuts: Added in callback"] > 0
-    solver.fit([stab_instance])
+    solver._fit([stab_instance])
-    stats_after = solver.solve(stab_instance)
+    stats_after = solver._solve(stab_instance)
    assert (
        stats_after["UserCuts: Added ahead-of-time"]
        == stats_before["UserCuts: Added in callback"]
--- a/tests/components/test_objective.py
+++ b/tests/components/test_objective.py
@@ -134,8 +134,8 @@ def test_sample_evaluate(sample: Sample) -> None:
 def test_usage() -> None:
    solver = LearningSolver(components=[ObjectiveValueComponent()])
    instance = GurobiPyomoSolver().build_test_instance_knapsack()
-    solver.solve(instance)
+    solver._solve(instance)
-    solver.fit([instance])
+    solver._fit([instance])
-    stats = solver.solve(instance)
+    stats = solver._solve(instance)
    assert stats["mip_lower_bound"] == stats["Objective: Predicted lower bound"]
    assert stats["mip_upper_bound"] == stats["Objective: Predicted upper bound"]
--- a/tests/components/test_primal.py
+++ b/tests/components/test_primal.py
@@ -110,9 +110,9 @@ def test_usage() -> None:
    gen = TravelingSalesmanGenerator(n=randint(low=5, high=6))
    data = gen.generate(1)
    instance = TravelingSalesmanInstance(data[0].n_cities, data[0].distances)
-    solver.solve(instance)
+    solver._solve(instance)
-    solver.fit([instance])
+    solver._fit([instance])
-    stats = solver.solve(instance)
+    stats = solver._solve(instance)
    assert stats["Primal: Free"] == 0
    assert stats["Primal: One"] + stats["Primal: Zero"] == 10
    assert stats["mip_lower_bound"] == stats["mip_warm_start_value"]
--- a/tests/instance/test_file.py
+++ b/tests/instance/test_file.py
@@ -22,7 +22,7 @@ def test_usage() -> None:
    # Solve instance from disk
    solver = LearningSolver(solver=GurobiSolver())
-    solver.solve(FileInstance(filename))
+    solver._solve(FileInstance(filename))
    # Assert HDF5 contains training data
    sample = FileInstance(filename).get_samples()[0]
--- a/tests/problems/test_knapsack.py
+++ b/tests/problems/test_knapsack.py
@@ -36,4 +36,4 @@ def test_knapsack() -> None:
        weights=data[0].weights,
    )
    solver = LearningSolver()
-    solver.solve(instance)
+    solver._solve(instance)
--- a/tests/problems/test_stab.py
+++ b/tests/problems/test_stab.py
@@ -15,7 +15,7 @@ def test_stab() -> None:
    weights = np.array([1.0, 1.0, 1.0, 1.0, 1.0])
    instance = MaxWeightStableSetInstance(graph, weights)
    solver = LearningSolver()
-    stats = solver.solve(instance)
+    stats = solver._solve(instance)
    assert stats["mip_lower_bound"] == 2.0
--- a/tests/problems/test_tsp.py
+++ b/tests/problems/test_tsp.py
@@ -40,7 +40,7 @@ def test_instance() -> None:
    )
    instance = TravelingSalesmanInstance(n_cities, distances)
    solver = LearningSolver()
-    solver.solve(instance)
+    solver._solve(instance)
    assert len(instance.get_samples()) == 1
    sample = instance.get_samples()[0]
    assert_equals(sample.get_array("mip_var_values"), [1.0, 0.0, 1.0, 1.0, 0.0, 1.0])
@@ -63,7 +63,7 @@ def test_subtour() -> None:
    distances = squareform(pdist(cities))
    instance = TravelingSalesmanInstance(n_cities, distances)
    solver = LearningSolver()
-    solver.solve(instance)
+    solver._solve(instance)
    samples = instance.get_samples()
    assert len(samples) == 1
    sample = samples[0]
@@ -96,5 +96,5 @@ def test_subtour() -> None:
            1.0,
        ],
    )
-    solver.fit([instance])
+    solver._fit([instance])
-    solver.solve(instance)
+    solver._solve(instance)
--- a/tests/solvers/test_learning_solver.py
+++ b/tests/solvers/test_learning_solver.py
@@ -5,19 +5,27 @@
 import logging
 import os
 import tempfile
 from os.path import exists
 from typing import List, cast
 import dill
 from scipy.stats import randint
 from miplearn.features.sample import Hdf5Sample
 from miplearn.instance.base import Instance
-from miplearn.instance.picklegz import PickleGzInstance, write_pickle_gz, read_pickle_gz
+from miplearn.instance.picklegz import (
-from miplearn.solvers.gurobi import GurobiSolver
+    PickleGzInstance,
    write_pickle_gz,
    read_pickle_gz,
    save,
 )
 from miplearn.problems.stab import MaxWeightStableSetGenerator, build_stab_model
 from miplearn.solvers.internal import InternalSolver
 from miplearn.solvers.learning import LearningSolver
 from miplearn.solvers.tests import assert_equals
 # noinspection PyUnresolvedReferences
 from tests.solvers.test_internal_solver import internal_solvers
 from miplearn.solvers.tests import assert_equals
 logger = logging.getLogger(__name__)
@@ -34,7 +42,7 @@ def test_learning_solver(
                mode=mode,
            )
-            solver.solve(instance)
+            solver._solve(instance)
            assert len(instance.get_samples()) > 0
            sample = instance.get_samples()[0]
@@ -55,8 +63,8 @@ def test_learning_solver(
            assert lp_log is not None
            assert len(lp_log) > 100
-            solver.fit([instance], n_jobs=4)
+            solver._fit([instance], n_jobs=4)
-            solver.solve(instance)
+            solver._solve(instance)
            # Assert solver is picklable
            with tempfile.TemporaryFile() as file:
@@ -73,9 +81,9 @@ def test_solve_without_lp(
            solver=internal_solver,
            solve_lp=False,
        )
-        solver.solve(instance)
+        solver._solve(instance)
-        solver.fit([instance])
+        solver._fit([instance])
-        solver.solve(instance)
+        solver._solve(instance)
 def test_parallel_solve(
@@ -104,7 +112,7 @@ def test_solve_fit_from_disk(
        # Test: solve
        solver = LearningSolver(solver=internal_solver)
-        solver.solve(instances[0])
+        solver._solve(instances[0])
        instance_loaded = read_pickle_gz(cast(PickleGzInstance, instances[0]).filename)
        assert len(instance_loaded.get_samples()) > 0
@@ -119,17 +127,29 @@ def test_solve_fit_from_disk(
            os.remove(cast(PickleGzInstance, instance).filename)
-def test_simulate_perfect() -> None:
+def test_basic_usage() -> None:
-    internal_solver = GurobiSolver()
+    with tempfile.TemporaryDirectory() as dirname:
-    instance = internal_solver.build_test_instance_knapsack()
+        # Generate instances
-    with tempfile.NamedTemporaryFile(suffix=".pkl", delete=False) as tmp:
+        data = MaxWeightStableSetGenerator(n=randint(low=20, high=21)).generate(4)
-        write_pickle_gz(instance, tmp.name)
+        train_files = save(data[0:3], f"{dirname}/train")
-        solver = LearningSolver(
+        test_files = save(data[3:4], f"{dirname}/test")
-            solver=internal_solver,
+
-            simulate_perfect=True,
+        # Solve training instances
-        )
+        solver = LearningSolver()
-        stats = solver.solve(PickleGzInstance(tmp.name))
+        stats = solver.solve(train_files, build_stab_model)
-        assert stats["mip_lower_bound"] == stats["Objective: Predicted lower bound"]
+        assert len(stats) == 3
        for f in train_files:
            sample_filename = f.replace(".pkl.gz", ".h5")
            assert exists(sample_filename)
            sample = Hdf5Sample(sample_filename)
            assert sample.get_scalar("mip_lower_bound") > 0
        # Fit
        solver.fit(train_files, build_stab_model)
        # Solve test instances
        stats = solver.solve(test_files, build_stab_model)
        assert "Objective: Predicted lower bound" in stats[0].keys()
 def test_gap() -> None: