From 06402516e689791f20dcf9b48891a720a8bdcc20 Mon Sep 17 00:00:00 2001
From: "Alinson S. Xavier" <git@axavier.org>
Date: Thu, 21 Jan 2021 08:21:40 -0600
Subject: [PATCH] Move collected data to instance.training_data

---
 miplearn/components/objective.py              | 10 ++++-
 miplearn/components/primal.py                 |  5 ++-
 .../steps/tests/test_convert_tight.py         | 20 ++++-----
 miplearn/components/tests/test_objective.py   |  4 +-
 miplearn/components/tests/test_primal.py      |  2 +-
 miplearn/extractors.py                        | 40 +++++++++++-------
 miplearn/problems/tests/test_stab.py          |  4 +-
 miplearn/problems/tests/test_tsp.py           | 10 ++---
 miplearn/solvers/learning.py                  | 42 +++++++------------
 .../solvers/tests/test_learning_solver.py     | 34 +++++++--------
 miplearn/types.py                             | 15 +++----
 11 files changed, 97 insertions(+), 89 deletions(-)

diff --git a/miplearn/components/objective.py b/miplearn/components/objective.py
index da27b68..1fe3ac0 100644
--- a/miplearn/components/objective.py
+++ b/miplearn/components/objective.py
@@ -78,7 +78,15 @@ class ObjectiveValueComponent(Component):
 
     def evaluate(self, instances):
         y_pred = self.predict(instances)
-        y_true = np.array([[inst.lower_bound, inst.upper_bound] for inst in instances])
+        y_true = np.array(
+            [
+                [
+                    inst.training_data[0]["Lower bound"],
+                    inst.training_data[0]["Upper bound"],
+                ]
+                for inst in instances
+            ]
+        )
         y_true_lb, y_true_ub = y_true[:, 0], y_true[:, 1]
         y_pred_lb, y_pred_ub = y_pred[:, 1], y_pred[:, 1]
         ev = {
diff --git a/miplearn/components/primal.py b/miplearn/components/primal.py
index 1b15517..d2a9f28 100644
--- a/miplearn/components/primal.py
+++ b/miplearn/components/primal.py
@@ -68,7 +68,8 @@ class PrimalSolutionComponent(Component):
             for label in [0, 1]:
                 y_train = solutions[category][:, label].astype(int)
 
-                # If all samples are either positive or negative, make constant predictions
+                # If all samples are either positive or negative, make constant
+                # predictions
                 y_avg = np.average(y_train)
                 if y_avg < 0.001 or y_avg >= 0.999:
                     self.classifiers[category, label] = round(y_avg)
@@ -130,7 +131,7 @@ class PrimalSolutionComponent(Component):
             desc="Evaluate (primal)",
         ):
             instance = instances[instance_idx]
-            solution_actual = instance.solution
+            solution_actual = instance.training_data[0]["Solution"]
             solution_pred = self.predict(instance)
 
             vars_all, vars_one, vars_zero = set(), set(), set()
diff --git a/miplearn/components/steps/tests/test_convert_tight.py b/miplearn/components/steps/tests/test_convert_tight.py
index ae8e7dc..44557a5 100644
--- a/miplearn/components/steps/tests/test_convert_tight.py
+++ b/miplearn/components/steps/tests/test_convert_tight.py
@@ -24,8 +24,8 @@ def test_convert_tight_usage():
     )
 
     # Solve original problem
-    solver.solve(instance)
-    original_upper_bound = instance.upper_bound
+    stats = solver.solve(instance)
+    original_upper_bound = stats["Upper bound"]
 
     # Should collect training data
     assert instance.training_data[0]["slacks"]["eq_capacity"] == 0.0
@@ -35,12 +35,12 @@ def test_convert_tight_usage():
     stats = solver.solve(instance)
 
     # Objective value should be the same
-    assert instance.upper_bound == original_upper_bound
+    assert stats["Upper bound"] == original_upper_bound
     assert stats["ConvertTight: Inf iterations"] == 0
     assert stats["ConvertTight: Subopt iterations"] == 0
 
 
-class TestInstance(Instance):
+class SampleInstance(Instance):
     def to_model(self):
         import gurobipy as grb
 
@@ -70,9 +70,9 @@ def test_convert_tight_infeasibility():
         components=[comp],
         solve_lp_first=False,
     )
-    instance = TestInstance()
+    instance = SampleInstance()
     stats = solver.solve(instance)
-    assert instance.lower_bound == 5.0
+    assert stats["Upper bound"] == 5.0
     assert stats["ConvertTight: Inf iterations"] == 1
     assert stats["ConvertTight: Subopt iterations"] == 0
 
@@ -93,9 +93,9 @@ def test_convert_tight_suboptimality():
         components=[comp],
         solve_lp_first=False,
     )
-    instance = TestInstance()
+    instance = SampleInstance()
     stats = solver.solve(instance)
-    assert instance.lower_bound == 5.0
+    assert stats["Upper bound"] == 5.0
     assert stats["ConvertTight: Inf iterations"] == 0
     assert stats["ConvertTight: Subopt iterations"] == 1
 
@@ -116,8 +116,8 @@ def test_convert_tight_optimal():
         components=[comp],
         solve_lp_first=False,
     )
-    instance = TestInstance()
+    instance = SampleInstance()
     stats = solver.solve(instance)
-    assert instance.lower_bound == 5.0
+    assert stats["Upper bound"] == 5.0
     assert stats["ConvertTight: Inf iterations"] == 0
     assert stats["ConvertTight: Subopt iterations"] == 0
diff --git a/miplearn/components/tests/test_objective.py b/miplearn/components/tests/test_objective.py
index 02879ac..009ad8b 100644
--- a/miplearn/components/tests/test_objective.py
+++ b/miplearn/components/tests/test_objective.py
@@ -15,8 +15,8 @@ def test_usage():
     instances, models = get_test_pyomo_instances()
     comp = ObjectiveValueComponent()
     comp.fit(instances)
-    assert instances[0].lower_bound == 1183.0
-    assert instances[0].upper_bound == 1183.0
+    assert instances[0].training_data[0]["Lower bound"] == 1183.0
+    assert instances[0].training_data[0]["Upper bound"] == 1183.0
     assert np.round(comp.predict(instances), 2).tolist() == [
         [1183.0, 1183.0],
         [1070.0, 1070.0],
diff --git a/miplearn/components/tests/test_primal.py b/miplearn/components/tests/test_primal.py
index 9882e5b..6585d29 100644
--- a/miplearn/components/tests/test_primal.py
+++ b/miplearn/components/tests/test_primal.py
@@ -50,7 +50,7 @@ def test_evaluate():
     comp = PrimalSolutionComponent(classifier=[clf_zero, clf_one], threshold=0.50)
     comp.fit(instances[:1])
     assert comp.predict(instances[0]) == {"x": {0: 0, 1: 0, 2: 1, 3: None}}
-    assert instances[0].solution == {"x": {0: 1, 1: 0, 2: 1, 3: 1}}
+    assert instances[0].training_data[0]["Solution"] == {"x": {0: 1, 1: 0, 2: 1, 3: 1}}
     ev = comp.evaluate(instances[:1])
     assert ev == {
         "Fix one": {
diff --git a/miplearn/extractors.py b/miplearn/extractors.py
index a113c70..a285405 100644
--- a/miplearn/extractors.py
+++ b/miplearn/extractors.py
@@ -2,14 +2,13 @@
 #  Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
 #  Released under the modified BSD license. See COPYING.md for more details.
 
+import gzip
 import logging
 import pickle
-import gzip
+from abc import ABC, abstractmethod
 
 import numpy as np
-
 from tqdm.auto import tqdm
-from abc import ABC, abstractmethod
 
 logger = logging.getLogger(__name__)
 
@@ -48,10 +47,10 @@ class Extractor(ABC):
 
     @staticmethod
     def split_variables(instance):
-        assert hasattr(instance, "lp_solution")
         result = {}
-        for var_name in instance.lp_solution:
-            for index in instance.lp_solution[var_name]:
+        lp_solution = instance.training_data[0]["LP solution"]
+        for var_name in lp_solution:
+            for index in lp_solution[var_name]:
                 category = instance.get_variable_category(var_name, index)
                 if category is None:
                     continue
@@ -71,6 +70,7 @@ class VariableFeaturesExtractor(Extractor):
         ):
             instance_features = instance.get_instance_features()
             var_split = self.split_variables(instance)
+            lp_solution = instance.training_data[0]["LP solution"]
             for (category, var_index_pairs) in var_split.items():
                 if category not in result:
                     result[category] = []
@@ -78,7 +78,7 @@ class VariableFeaturesExtractor(Extractor):
                     result[category] += [
                         instance_features.tolist()
                         + instance.get_variable_features(var_name, index).tolist()
-                        + [instance.lp_solution[var_name][index]]
+                        + [lp_solution[var_name][index]]
                     ]
         for category in result:
             result[category] = np.array(result[category])
@@ -97,14 +97,15 @@ class SolutionExtractor(Extractor):
             disable=len(instances) < 5,
         ):
             var_split = self.split_variables(instance)
+            if self.relaxation:
+                solution = instance.training_data[0]["LP solution"]
+            else:
+                solution = instance.training_data[0]["Solution"]
             for (category, var_index_pairs) in var_split.items():
                 if category not in result:
                     result[category] = []
                 for (var_name, index) in var_index_pairs:
-                    if self.relaxation:
-                        v = instance.lp_solution[var_name][index]
-                    else:
-                        v = instance.solution[var_name][index]
+                    v = solution[var_name][index]
                     if v is None:
                         result[category] += [[0, 0]]
                     else:
@@ -121,7 +122,7 @@ class InstanceFeaturesExtractor(Extractor):
                 np.hstack(
                     [
                         instance.get_instance_features(),
-                        instance.lp_value,
+                        instance.training_data[0]["LP value"],
                     ]
                 )
                 for instance in InstanceIterator(instances)
@@ -137,13 +138,22 @@ class ObjectiveValueExtractor(Extractor):
     def extract(self, instances):
         if self.kind == "lower bound":
             return np.array(
-                [[instance.lower_bound] for instance in InstanceIterator(instances)]
+                [
+                    [instance.training_data[0]["Lower bound"]]
+                    for instance in InstanceIterator(instances)
+                ]
             )
         if self.kind == "upper bound":
             return np.array(
-                [[instance.upper_bound] for instance in InstanceIterator(instances)]
+                [
+                    [instance.training_data[0]["Upper bound"]]
+                    for instance in InstanceIterator(instances)
+                ]
             )
         if self.kind == "lp":
             return np.array(
-                [[instance.lp_value] for instance in InstanceIterator(instances)]
+                [
+                    [instance.training_data[0]["LP value"]]
+                    for instance in InstanceIterator(instances)
+                ]
             )
diff --git a/miplearn/problems/tests/test_stab.py b/miplearn/problems/tests/test_stab.py
index dc44eb8..f0e03e4 100644
--- a/miplearn/problems/tests/test_stab.py
+++ b/miplearn/problems/tests/test_stab.py
@@ -15,8 +15,8 @@ def test_stab():
     weights = [1.0, 1.0, 1.0, 1.0, 1.0]
     instance = MaxWeightStableSetInstance(graph, weights)
     solver = LearningSolver()
-    solver.solve(instance)
-    assert instance.lower_bound == 2.0
+    stats = solver.solve(instance)
+    assert stats["Lower bound"] == 2.0
 
 
 def test_stab_generator_fixed_graph():
diff --git a/miplearn/problems/tests/test_tsp.py b/miplearn/problems/tests/test_tsp.py
index e43bc75..071c9ef 100644
--- a/miplearn/problems/tests/test_tsp.py
+++ b/miplearn/problems/tests/test_tsp.py
@@ -38,16 +38,16 @@ def test_instance():
     )
     instance = TravelingSalesmanInstance(n_cities, distances)
     solver = LearningSolver()
-    solver.solve(instance)
-    x = instance.solution["x"]
+    stats = solver.solve(instance)
+    x = instance.training_data[0]["Solution"]["x"]
     assert x[0, 1] == 1.0
     assert x[0, 2] == 0.0
     assert x[0, 3] == 1.0
     assert x[1, 2] == 1.0
     assert x[1, 3] == 0.0
     assert x[2, 3] == 1.0
-    assert instance.lower_bound == 4.0
-    assert instance.upper_bound == 4.0
+    assert stats["Lower bound"] == 4.0
+    assert stats["Upper bound"] == 4.0
 
 
 def test_subtour():
@@ -68,7 +68,7 @@ def test_subtour():
     solver.solve(instance)
     assert hasattr(instance, "found_violated_lazy_constraints")
     assert hasattr(instance, "found_violated_user_cuts")
-    x = instance.solution["x"]
+    x = instance.training_data[0]["Solution"]["x"]
     assert x[0, 1] == 1.0
     assert x[0, 4] == 1.0
     assert x[1, 2] == 1.0
diff --git a/miplearn/solvers/learning.py b/miplearn/solvers/learning.py
index 4667f94..de47108 100644
--- a/miplearn/solvers/learning.py
+++ b/miplearn/solvers/learning.py
@@ -115,11 +115,11 @@ class LearningSolver:
 
     def solve(
         self,
-        instance,
-        model=None,
-        output="",
-        tee=False,
-    ):
+        instance: Union[Instance, str],
+        model: Any = None,
+        output: str = "",
+        tee: bool = False,
+    ) -> MIPSolveStats:
         """
         Solves the given instance. If trained machine-learning models are
         available, they will be used to accelerate the solution process.
@@ -127,20 +127,9 @@ class LearningSolver:
         The argument `instance` may be either an Instance object or a
         filename pointing to a pickled Instance object.
 
-        This method modifies the instance object. Specifically, the following
-        properties are set:
-
-            - instance.lp_solution
-            - instance.lp_value
-            - instance.lower_bound
-            - instance.upper_bound
-            - instance.solution
-            - instance.solver_log
-
-        Additional solver components may set additional properties. Please
-        see their documentation for more details. If a filename is provided,
-        then the file is modified in-place. That is, the original file is
-        overwritten.
+        This method adds a new training sample to `instance.training_sample`.
+        If a filename is provided, then the file is modified in-place. That is,
+        the original file is overwritten.
 
         If `solver.solve_lp_first` is False, the properties lp_solution and
         lp_value will be set to dummy values.
@@ -190,7 +179,7 @@ class LearningSolver:
 
     def _solve(
         self,
-        instance: Instance,
+        instance: Union[Instance, str],
         model: Any = None,
         output: str = "",
         tee: bool = False,
@@ -211,14 +200,18 @@ class LearningSolver:
                 fileformat = "pickle"
                 with open(filename, "rb") as file:
                     instance = pickle.load(cast(IO[bytes], file))
+        assert isinstance(instance, Instance)
 
         # Generate model
         if model is None:
             with RedirectOutput([]):
                 model = instance.to_model()
 
-        # Initialize training data
+        # Initialize training sample
         training_sample: TrainingSample = {}
+        if not hasattr(instance, "training_data"):
+            instance.training_data = []
+        instance.training_data += [training_sample]
 
         # Initialize internal solver
         self.tee = tee
@@ -275,11 +268,6 @@ class LearningSolver:
         for component in self.components.values():
             component.after_solve(self, instance, model, stats, training_sample)
 
-        # Append training data
-        if not hasattr(instance, "training_data"):
-            instance.training_data = []
-        instance.training_data += [training_sample]
-
         # Write to file, if necessary
         if filename is not None and output is not None:
             output_filename = output
@@ -350,7 +338,7 @@ class LearningSolver:
         self._restore_miplearn_logger()
         return stats
 
-    def fit(self, training_instances):
+    def fit(self, training_instances: Union[List[str], List[Instance]]) -> None:
         if len(training_instances) == 0:
             return
         for component in self.components.values():
diff --git a/miplearn/solvers/tests/test_learning_solver.py b/miplearn/solvers/tests/test_learning_solver.py
index 41b7317..a6c4c4d 100644
--- a/miplearn/solvers/tests/test_learning_solver.py
+++ b/miplearn/solvers/tests/test_learning_solver.py
@@ -25,20 +25,19 @@ def test_learning_solver():
             )
 
             solver.solve(instance)
-            assert instance.solution["x"][0] == 1.0
-            assert instance.solution["x"][1] == 0.0
-            assert instance.solution["x"][2] == 1.0
-            assert instance.solution["x"][3] == 1.0
-            assert instance.lower_bound == 1183.0
-            assert instance.upper_bound == 1183.0
-            assert round(instance.lp_solution["x"][0], 3) == 1.000
-            assert round(instance.lp_solution["x"][1], 3) == 0.923
-            assert round(instance.lp_solution["x"][2], 3) == 1.000
-            assert round(instance.lp_solution["x"][3], 3) == 0.000
-            assert round(instance.lp_value, 3) == 1287.923
-            assert instance.found_violated_lazy_constraints == []
-            assert instance.found_violated_user_cuts == []
-            assert len(instance.solver_log) > 100
+            data = instance.training_data[0]
+            assert data["Solution"]["x"][0] == 1.0
+            assert data["Solution"]["x"][1] == 0.0
+            assert data["Solution"]["x"][2] == 1.0
+            assert data["Solution"]["x"][3] == 1.0
+            assert data["Lower bound"] == 1183.0
+            assert data["Upper bound"] == 1183.0
+            assert round(data["LP solution"]["x"][0], 3) == 1.000
+            assert round(data["LP solution"]["x"][1], 3) == 0.923
+            assert round(data["LP solution"]["x"][2], 3) == 1.000
+            assert round(data["LP solution"]["x"][3], 3) == 0.000
+            assert round(data["LP value"], 3) == 1287.923
+            assert len(data["MIP log"]) > 100
 
             solver.fit([instance])
             solver.solve(instance)
@@ -55,7 +54,8 @@ def test_parallel_solve():
         results = solver.parallel_solve(instances, n_jobs=3)
         assert len(results) == 10
         for instance in instances:
-            assert len(instance.solution["x"].keys()) == 4
+            data = instance.training_data[0]
+            assert len(data["Solution"]["x"].keys()) == 4
 
 
 def test_solve_fit_from_disk():
@@ -73,14 +73,14 @@ def test_solve_fit_from_disk():
         solver.solve(filenames[0])
         with open(filenames[0], "rb") as file:
             instance = pickle.load(file)
-            assert hasattr(instance, "solution")
+            assert len(instance.training_data) > 0
 
         # Test: parallel_solve
         solver.parallel_solve(filenames)
         for filename in filenames:
             with open(filename, "rb") as file:
                 instance = pickle.load(file)
-                assert hasattr(instance, "solution")
+                assert len(instance.training_data) > 0
 
         # Test: solve (with specified output)
         output = [f + ".out" for f in filenames]
diff --git a/miplearn/types.py b/miplearn/types.py
index c8aeb2b..6cd701f 100644
--- a/miplearn/types.py
+++ b/miplearn/types.py
@@ -7,13 +7,14 @@ from typing import TypedDict, Optional, Dict, Callable, Any
 TrainingSample = TypedDict(
     "TrainingSample",
     {
-        "LP log": Optional[str],
-        "LP solution": Optional[Dict],
-        "LP value": Optional[float],
-        "Lower bound": Optional[float],
-        "MIP log": Optional[str],
-        "Solution": Optional[Dict],
-        "Upper bound": Optional[float],
+        "LP log": str,
+        "LP solution": Dict,
+        "LP value": float,
+        "Lower bound": float,
+        "MIP log": str,
+        "Solution": Dict,
+        "Upper bound": float,
+        "slacks": Dict,
     },
     total=False,
 )