Move collected data to instance.training_data

2025-12-06 09:28:51 -06:00 · 2021-01-21 08:21:40 -06:00
parent 23dd311d75
commit 06402516e6
11 changed files with 97 additions and 89 deletions
--- a/miplearn/components/objective.py
+++ b/miplearn/components/objective.py
@@ -78,7 +78,15 @@ class ObjectiveValueComponent(Component):

    def evaluate(self, instances):
        y_pred = self.predict(instances)
-        y_true = np.array([[inst.lower_bound, inst.upper_bound] for inst in instances])
+        y_true = np.array(
+            [
+                [
+                    inst.training_data[0]["Lower bound"],
+                    inst.training_data[0]["Upper bound"],
+                ]
+                for inst in instances
+            ]
+        )
        y_true_lb, y_true_ub = y_true[:, 0], y_true[:, 1]
        y_pred_lb, y_pred_ub = y_pred[:, 1], y_pred[:, 1]
        ev = {
--- a/miplearn/components/primal.py
+++ b/miplearn/components/primal.py
@@ -68,7 +68,8 @@ class PrimalSolutionComponent(Component):
            for label in [0, 1]:
                y_train = solutions[category][:, label].astype(int)

-                # If all samples are either positive or negative, make constant predictions
+                # If all samples are either positive or negative, make constant
+                # predictions
                y_avg = np.average(y_train)
                if y_avg < 0.001 or y_avg >= 0.999:
                    self.classifiers[category, label] = round(y_avg)
@@ -130,7 +131,7 @@ class PrimalSolutionComponent(Component):
            desc="Evaluate (primal)",
        ):
            instance = instances[instance_idx]
-            solution_actual = instance.solution
+            solution_actual = instance.training_data[0]["Solution"]
            solution_pred = self.predict(instance)

            vars_all, vars_one, vars_zero = set(), set(), set()
--- a/miplearn/components/steps/tests/test_convert_tight.py
+++ b/miplearn/components/steps/tests/test_convert_tight.py
@@ -24,8 +24,8 @@ def test_convert_tight_usage():
    )

    # Solve original problem
-    solver.solve(instance)
-    original_upper_bound = instance.upper_bound
+    stats = solver.solve(instance)
+    original_upper_bound = stats["Upper bound"]

    # Should collect training data
    assert instance.training_data[0]["slacks"]["eq_capacity"] == 0.0
@@ -35,12 +35,12 @@ def test_convert_tight_usage():
    stats = solver.solve(instance)

    # Objective value should be the same
-    assert instance.upper_bound == original_upper_bound
+    assert stats["Upper bound"] == original_upper_bound
    assert stats["ConvertTight: Inf iterations"] == 0
    assert stats["ConvertTight: Subopt iterations"] == 0


-class TestInstance(Instance):
+class SampleInstance(Instance):
    def to_model(self):
        import gurobipy as grb

@@ -70,9 +70,9 @@ def test_convert_tight_infeasibility():
        components=[comp],
        solve_lp_first=False,
    )
-    instance = TestInstance()
+    instance = SampleInstance()
    stats = solver.solve(instance)
-    assert instance.lower_bound == 5.0
+    assert stats["Upper bound"] == 5.0
    assert stats["ConvertTight: Inf iterations"] == 1
    assert stats["ConvertTight: Subopt iterations"] == 0

@@ -93,9 +93,9 @@ def test_convert_tight_suboptimality():
        components=[comp],
        solve_lp_first=False,
    )
-    instance = TestInstance()
+    instance = SampleInstance()
    stats = solver.solve(instance)
-    assert instance.lower_bound == 5.0
+    assert stats["Upper bound"] == 5.0
    assert stats["ConvertTight: Inf iterations"] == 0
    assert stats["ConvertTight: Subopt iterations"] == 1

@@ -116,8 +116,8 @@ def test_convert_tight_optimal():
        components=[comp],
        solve_lp_first=False,
    )
-    instance = TestInstance()
+    instance = SampleInstance()
    stats = solver.solve(instance)
-    assert instance.lower_bound == 5.0
+    assert stats["Upper bound"] == 5.0
    assert stats["ConvertTight: Inf iterations"] == 0
    assert stats["ConvertTight: Subopt iterations"] == 0
--- a/miplearn/components/tests/test_objective.py
+++ b/miplearn/components/tests/test_objective.py
@@ -15,8 +15,8 @@ def test_usage():
    instances, models = get_test_pyomo_instances()
    comp = ObjectiveValueComponent()
    comp.fit(instances)
-    assert instances[0].lower_bound == 1183.0
-    assert instances[0].upper_bound == 1183.0
+    assert instances[0].training_data[0]["Lower bound"] == 1183.0
+    assert instances[0].training_data[0]["Upper bound"] == 1183.0
    assert np.round(comp.predict(instances), 2).tolist() == [
        [1183.0, 1183.0],
        [1070.0, 1070.0],
--- a/miplearn/components/tests/test_primal.py
+++ b/miplearn/components/tests/test_primal.py
@@ -50,7 +50,7 @@ def test_evaluate():
    comp = PrimalSolutionComponent(classifier=[clf_zero, clf_one], threshold=0.50)
    comp.fit(instances[:1])
    assert comp.predict(instances[0]) == {"x": {0: 0, 1: 0, 2: 1, 3: None}}
-    assert instances[0].solution == {"x": {0: 1, 1: 0, 2: 1, 3: 1}}
+    assert instances[0].training_data[0]["Solution"] == {"x": {0: 1, 1: 0, 2: 1, 3: 1}}
    ev = comp.evaluate(instances[:1])
    assert ev == {
        "Fix one": {
--- a/miplearn/extractors.py
+++ b/miplearn/extractors.py
@@ -2,14 +2,13 @@
 #  Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
 #  Released under the modified BSD license. See COPYING.md for more details.

+import gzip
 import logging
 import pickle
-import gzip
+from abc import ABC, abstractmethod

 import numpy as np
-
 from tqdm.auto import tqdm
-from abc import ABC, abstractmethod

 logger = logging.getLogger(__name__)

@@ -48,10 +47,10 @@ class Extractor(ABC):

    @staticmethod
    def split_variables(instance):
-        assert hasattr(instance, "lp_solution")
        result = {}
-        for var_name in instance.lp_solution:
-            for index in instance.lp_solution[var_name]:
+        lp_solution = instance.training_data[0]["LP solution"]
+        for var_name in lp_solution:
+            for index in lp_solution[var_name]:
                category = instance.get_variable_category(var_name, index)
                if category is None:
                    continue
@@ -71,6 +70,7 @@ class VariableFeaturesExtractor(Extractor):
        ):
            instance_features = instance.get_instance_features()
            var_split = self.split_variables(instance)
+            lp_solution = instance.training_data[0]["LP solution"]
            for (category, var_index_pairs) in var_split.items():
                if category not in result:
                    result[category] = []
@@ -78,7 +78,7 @@ class VariableFeaturesExtractor(Extractor):
                    result[category] += [
                        instance_features.tolist()
                        + instance.get_variable_features(var_name, index).tolist()
-                        + [instance.lp_solution[var_name][index]]
+                        + [lp_solution[var_name][index]]
                    ]
        for category in result:
            result[category] = np.array(result[category])
@@ -97,14 +97,15 @@ class SolutionExtractor(Extractor):
            disable=len(instances) < 5,
        ):
            var_split = self.split_variables(instance)
+            if self.relaxation:
+                solution = instance.training_data[0]["LP solution"]
+            else:
+                solution = instance.training_data[0]["Solution"]
            for (category, var_index_pairs) in var_split.items():
                if category not in result:
                    result[category] = []
                for (var_name, index) in var_index_pairs:
-                    if self.relaxation:
-                        v = instance.lp_solution[var_name][index]
-                    else:
-                        v = instance.solution[var_name][index]
+                    v = solution[var_name][index]
                    if v is None:
                        result[category] += [[0, 0]]
                    else:
@@ -121,7 +122,7 @@ class InstanceFeaturesExtractor(Extractor):
                np.hstack(
                    [
                        instance.get_instance_features(),
-                        instance.lp_value,
+                        instance.training_data[0]["LP value"],
                    ]
                )
                for instance in InstanceIterator(instances)
@@ -137,13 +138,22 @@ class ObjectiveValueExtractor(Extractor):
    def extract(self, instances):
        if self.kind == "lower bound":
            return np.array(
-                [[instance.lower_bound] for instance in InstanceIterator(instances)]
+                [
+                    [instance.training_data[0]["Lower bound"]]
+                    for instance in InstanceIterator(instances)
+                ]
            )
        if self.kind == "upper bound":
            return np.array(
-                [[instance.upper_bound] for instance in InstanceIterator(instances)]
+                [
+                    [instance.training_data[0]["Upper bound"]]
+                    for instance in InstanceIterator(instances)
+                ]
            )
        if self.kind == "lp":
            return np.array(
-                [[instance.lp_value] for instance in InstanceIterator(instances)]
+                [
+                    [instance.training_data[0]["LP value"]]
+                    for instance in InstanceIterator(instances)
+                ]
            )
--- a/miplearn/problems/tests/test_stab.py
+++ b/miplearn/problems/tests/test_stab.py
@@ -15,8 +15,8 @@ def test_stab():
    weights = [1.0, 1.0, 1.0, 1.0, 1.0]
    instance = MaxWeightStableSetInstance(graph, weights)
    solver = LearningSolver()
-    solver.solve(instance)
-    assert instance.lower_bound == 2.0
+    stats = solver.solve(instance)
+    assert stats["Lower bound"] == 2.0


 def test_stab_generator_fixed_graph():
--- a/miplearn/problems/tests/test_tsp.py
+++ b/miplearn/problems/tests/test_tsp.py
@@ -38,16 +38,16 @@ def test_instance():
    )
    instance = TravelingSalesmanInstance(n_cities, distances)
    solver = LearningSolver()
-    solver.solve(instance)
-    x = instance.solution["x"]
+    stats = solver.solve(instance)
+    x = instance.training_data[0]["Solution"]["x"]
    assert x[0, 1] == 1.0
    assert x[0, 2] == 0.0
    assert x[0, 3] == 1.0
    assert x[1, 2] == 1.0
    assert x[1, 3] == 0.0
    assert x[2, 3] == 1.0
-    assert instance.lower_bound == 4.0
-    assert instance.upper_bound == 4.0
+    assert stats["Lower bound"] == 4.0
+    assert stats["Upper bound"] == 4.0


 def test_subtour():
@@ -68,7 +68,7 @@ def test_subtour():
    solver.solve(instance)
    assert hasattr(instance, "found_violated_lazy_constraints")
    assert hasattr(instance, "found_violated_user_cuts")
-    x = instance.solution["x"]
+    x = instance.training_data[0]["Solution"]["x"]
    assert x[0, 1] == 1.0
    assert x[0, 4] == 1.0
    assert x[1, 2] == 1.0
--- a/miplearn/solvers/learning.py
+++ b/miplearn/solvers/learning.py
@@ -115,11 +115,11 @@ class LearningSolver:

    def solve(
        self,
-        instance,
-        model=None,
-        output="",
-        tee=False,
-    ):
+        instance: Union[Instance, str],
+        model: Any = None,
+        output: str = "",
+        tee: bool = False,
+    ) -> MIPSolveStats:
        """
        Solves the given instance. If trained machine-learning models are
        available, they will be used to accelerate the solution process.
@@ -127,20 +127,9 @@ class LearningSolver:
        The argument `instance` may be either an Instance object or a
        filename pointing to a pickled Instance object.

-        This method modifies the instance object. Specifically, the following
-        properties are set:
-
-            - instance.lp_solution
-            - instance.lp_value
-            - instance.lower_bound
-            - instance.upper_bound
-            - instance.solution
-            - instance.solver_log
-
-        Additional solver components may set additional properties. Please
-        see their documentation for more details. If a filename is provided,
-        then the file is modified in-place. That is, the original file is
-        overwritten.
+        This method adds a new training sample to `instance.training_sample`.
+        If a filename is provided, then the file is modified in-place. That is,
+        the original file is overwritten.

        If `solver.solve_lp_first` is False, the properties lp_solution and
        lp_value will be set to dummy values.
@@ -190,7 +179,7 @@ class LearningSolver:

    def _solve(
        self,
-        instance: Instance,
+        instance: Union[Instance, str],
        model: Any = None,
        output: str = "",
        tee: bool = False,
@@ -211,14 +200,18 @@ class LearningSolver:
                fileformat = "pickle"
                with open(filename, "rb") as file:
                    instance = pickle.load(cast(IO[bytes], file))
+        assert isinstance(instance, Instance)

        # Generate model
        if model is None:
            with RedirectOutput([]):
                model = instance.to_model()

-        # Initialize training data
+        # Initialize training sample
        training_sample: TrainingSample = {}
+        if not hasattr(instance, "training_data"):
+            instance.training_data = []
+        instance.training_data += [training_sample]

        # Initialize internal solver
        self.tee = tee
@@ -275,11 +268,6 @@ class LearningSolver:
        for component in self.components.values():
            component.after_solve(self, instance, model, stats, training_sample)

-        # Append training data
-        if not hasattr(instance, "training_data"):
-            instance.training_data = []
-        instance.training_data += [training_sample]
-
        # Write to file, if necessary
        if filename is not None and output is not None:
            output_filename = output
@@ -350,7 +338,7 @@ class LearningSolver:
        self._restore_miplearn_logger()
        return stats

-    def fit(self, training_instances):
+    def fit(self, training_instances: Union[List[str], List[Instance]]) -> None:
        if len(training_instances) == 0:
            return
        for component in self.components.values():
--- a/miplearn/solvers/tests/test_learning_solver.py
+++ b/miplearn/solvers/tests/test_learning_solver.py
@@ -25,20 +25,19 @@ def test_learning_solver():
            )

            solver.solve(instance)
-            assert instance.solution["x"][0] == 1.0
-            assert instance.solution["x"][1] == 0.0
-            assert instance.solution["x"][2] == 1.0
-            assert instance.solution["x"][3] == 1.0
-            assert instance.lower_bound == 1183.0
-            assert instance.upper_bound == 1183.0
-            assert round(instance.lp_solution["x"][0], 3) == 1.000
-            assert round(instance.lp_solution["x"][1], 3) == 0.923
-            assert round(instance.lp_solution["x"][2], 3) == 1.000
-            assert round(instance.lp_solution["x"][3], 3) == 0.000
-            assert round(instance.lp_value, 3) == 1287.923
-            assert instance.found_violated_lazy_constraints == []
-            assert instance.found_violated_user_cuts == []
-            assert len(instance.solver_log) > 100
+            data = instance.training_data[0]
+            assert data["Solution"]["x"][0] == 1.0
+            assert data["Solution"]["x"][1] == 0.0
+            assert data["Solution"]["x"][2] == 1.0
+            assert data["Solution"]["x"][3] == 1.0
+            assert data["Lower bound"] == 1183.0
+            assert data["Upper bound"] == 1183.0
+            assert round(data["LP solution"]["x"][0], 3) == 1.000
+            assert round(data["LP solution"]["x"][1], 3) == 0.923
+            assert round(data["LP solution"]["x"][2], 3) == 1.000
+            assert round(data["LP solution"]["x"][3], 3) == 0.000
+            assert round(data["LP value"], 3) == 1287.923
+            assert len(data["MIP log"]) > 100

            solver.fit([instance])
            solver.solve(instance)
@@ -55,7 +54,8 @@ def test_parallel_solve():
        results = solver.parallel_solve(instances, n_jobs=3)
        assert len(results) == 10
        for instance in instances:
-            assert len(instance.solution["x"].keys()) == 4
+            data = instance.training_data[0]
+            assert len(data["Solution"]["x"].keys()) == 4


 def test_solve_fit_from_disk():
@@ -73,14 +73,14 @@ def test_solve_fit_from_disk():
        solver.solve(filenames[0])
        with open(filenames[0], "rb") as file:
            instance = pickle.load(file)
-            assert hasattr(instance, "solution")
+            assert len(instance.training_data) > 0

        # Test: parallel_solve
        solver.parallel_solve(filenames)
        for filename in filenames:
            with open(filename, "rb") as file:
                instance = pickle.load(file)
-                assert hasattr(instance, "solution")
+                assert len(instance.training_data) > 0

        # Test: solve (with specified output)
        output = [f + ".out" for f in filenames]
--- a/miplearn/types.py
+++ b/miplearn/types.py
@@ -7,13 +7,14 @@ from typing import TypedDict, Optional, Dict, Callable, Any
 TrainingSample = TypedDict(
    "TrainingSample",
    {
-        "LP log": Optional[str],
-        "LP solution": Optional[Dict],
-        "LP value": Optional[float],
-        "Lower bound": Optional[float],
-        "MIP log": Optional[str],
-        "Solution": Optional[Dict],
-        "Upper bound": Optional[float],
+        "LP log": str,
+        "LP solution": Dict,
+        "LP value": float,
+        "Lower bound": float,
+        "MIP log": str,
+        "Solution": Dict,
+        "Upper bound": float,
+        "slacks": Dict,
    },
    total=False,
 )