From 1224613b1add1b54a4e05e3e7a5452ee15f396a0 Mon Sep 17 00:00:00 2001
From: "Alinson S. Xavier" <git@axavier.org>
Date: Tue, 30 Mar 2021 21:18:40 -0500
Subject: [PATCH] Implement component.fit, component.fit_xy

---
 miplearn/components/component.py   |  45 ++++++--
 miplearn/components/primal.py      |  19 ++--
 miplearn/problems/knapsack.py      |  20 ++--
 miplearn/problems/stab.py          |   2 +-
 miplearn/problems/tsp.py           |   4 +-
 tests/components/test_component.py |  97 ++++++++++++++++
 tests/components/test_primal.py    | 171 -----------------------------
 7 files changed, 152 insertions(+), 206 deletions(-)
 create mode 100644 tests/components/test_component.py

diff --git a/miplearn/components/component.py b/miplearn/components/component.py
index eaef687..4845caa 100644
--- a/miplearn/components/component.py
+++ b/miplearn/components/component.py
@@ -2,9 +2,10 @@
 #  Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
 #  Released under the modified BSD license. See COPYING.md for more details.
 
-from abc import ABC, abstractmethod
+import numpy as np
 from typing import Any, List, Union, TYPE_CHECKING, Tuple, Dict
 
+from miplearn.extractors import InstanceIterator
 from miplearn.instance import Instance
 from miplearn.types import LearningSolveStats, TrainingSample
 
@@ -13,7 +14,7 @@ if TYPE_CHECKING:
 
 
 # noinspection PyMethodMayBeStatic
-class Component(ABC):
+class Component:
     """
     A Component is an object which adds functionality to a LearningSolver.
 
@@ -130,12 +131,6 @@ class Component(ABC):
         """
         return
 
-    def fit(
-        self,
-        training_instances: Union[List[str], List[Instance]],
-    ) -> None:
-        return
-
     @staticmethod
     def xy_sample(
         instance: Any,
@@ -147,6 +142,40 @@ class Component(ABC):
         """
         return {}, {}
 
+    def xy_instances(
+        self,
+        instances: Union[List[str], List[Instance]],
+    ) -> Tuple[Dict, Dict]:
+        x_combined: Dict = {}
+        y_combined: Dict = {}
+        for instance in InstanceIterator(instances):
+            for sample in instance.training_data:
+                x_sample, y_sample = self.xy_sample(instance, sample)
+                for cat in x_sample.keys():
+                    if cat not in x_combined:
+                        x_combined[cat] = []
+                        y_combined[cat] = []
+                    x_combined[cat] += x_sample[cat]
+                    y_combined[cat] += y_sample[cat]
+        return x_combined, y_combined
+
+    def fit(
+        self,
+        training_instances: Union[List[str], List[Instance]],
+    ) -> None:
+        x, y = self.xy_instances(training_instances)
+        for cat in x.keys():
+            x[cat] = np.array(x[cat])
+            y[cat] = np.array(y[cat])
+        self.fit_xy(x, y)
+
+    def fit_xy(
+        self,
+        x: Dict[str, np.ndarray],
+        y: Dict[str, np.ndarray],
+    ) -> None:
+        return
+
     def iteration_cb(
         self,
         solver: "LearningSolver",
diff --git a/miplearn/components/primal.py b/miplearn/components/primal.py
index c6c556c..24948d2 100644
--- a/miplearn/components/primal.py
+++ b/miplearn/components/primal.py
@@ -105,19 +105,11 @@ class PrimalSolutionComponent(Component):
     ) -> Dict[Hashable, np.ndarray]:
         return self._build_x_y_dict(instances, self._extract_variable_features)
 
-    def y(
+    def fit_xy(
         self,
-        instances: Union[List[str], List[Instance]],
-    ) -> Dict[Hashable, np.ndarray]:
-        return self._build_x_y_dict(instances, self._extract_variable_labels)
-
-    def fit(
-        self,
-        training_instances: Union[List[str], List[Instance]],
-        n_jobs: int = 1,
+        x: Dict[str, np.ndarray],
+        y: Dict[str, np.ndarray],
     ) -> None:
-        x = self.x(training_instances)
-        y = self.y(training_instances)
         for category in x.keys():
             clf = self.classifier_factory()
             thr = self.threshold_factory()
@@ -322,8 +314,11 @@ class PrimalSolutionComponent(Component):
                     x[category] = []
                     y[category] = []
                 features: Any = instance.get_variable_features(var, idx)
+                assert isinstance(features, list)
                 if "LP solution" in sample and sample["LP solution"] is not None:
-                    features += [sample["LP solution"][var][idx]]
+                    lp_value = sample["LP solution"][var][idx]
+                    if lp_value is not None:
+                        features += [sample["LP solution"][var][idx]]
                 x[category] += [features]
                 y[category] += [[opt_value < 0.5, opt_value >= 0.5]]
         return x, y
diff --git a/miplearn/problems/knapsack.py b/miplearn/problems/knapsack.py
index e251322..425d0ad 100644
--- a/miplearn/problems/knapsack.py
+++ b/miplearn/problems/knapsack.py
@@ -265,20 +265,16 @@ class KnapsackInstance(Instance):
         return model
 
     def get_instance_features(self):
-        return np.array(
-            [
-                self.capacity,
-                np.average(self.weights),
-            ]
-        )
+        return [
+            self.capacity,
+            np.average(self.weights),
+        ]
 
     def get_variable_features(self, var, index):
-        return np.array(
-            [
-                self.weights[index],
-                self.prices[index],
-            ]
-        )
+        return [
+            self.weights[index],
+            self.prices[index],
+        ]
 
 
 class GurobiKnapsackInstance(KnapsackInstance):
diff --git a/miplearn/problems/stab.py b/miplearn/problems/stab.py
index 306752d..50b14a4 100644
--- a/miplearn/problems/stab.py
+++ b/miplearn/problems/stab.py
@@ -129,7 +129,7 @@ class MaxWeightStableSetInstance(Instance):
         features += neighbor_weights[:5]
         features += neighbor_degrees[:5]
         features += [self.graph.degree(index)]
-        return np.array(features)
+        return features
 
     def get_variable_category(self, var, index):
         return "default"
diff --git a/miplearn/problems/tsp.py b/miplearn/problems/tsp.py
index ea0f40f..6b1032f 100644
--- a/miplearn/problems/tsp.py
+++ b/miplearn/problems/tsp.py
@@ -157,10 +157,10 @@ class TravelingSalesmanInstance(Instance):
         return model
 
     def get_instance_features(self):
-        return np.array([1])
+        return [1]
 
     def get_variable_features(self, var_name, index):
-        return np.array([1])
+        return [1]
 
     def get_variable_category(self, var_name, index):
         return index
diff --git a/tests/components/test_component.py b/tests/components/test_component.py
new file mode 100644
index 0000000..50fb8ed
--- /dev/null
+++ b/tests/components/test_component.py
@@ -0,0 +1,97 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+from unittest.mock import Mock
+
+from miplearn import Component, Instance
+
+
+def test_xy_instance():
+    def _xy_sample(instance, sample):
+        print(sample)
+        x = {
+            "s1": {
+                "category_a": [
+                    [1, 2, 3],
+                    [3, 4, 6],
+                ],
+                "category_b": [
+                    [7, 8, 9],
+                ],
+            },
+            "s2": {
+                "category_a": [
+                    [0, 0, 0],
+                    [0, 5, 3],
+                    [2, 2, 0],
+                ],
+                "category_c": [
+                    [0, 0, 0],
+                    [0, 0, 1],
+                ],
+            },
+            "s3": {
+                "category_c": [
+                    [1, 1, 1],
+                ],
+            },
+        }
+        y = {
+            "s1": {
+                "category_a": [[1], [2]],
+                "category_b": [[3]],
+            },
+            "s2": {
+                "category_a": [[4], [5], [6]],
+                "category_c": [[8], [9], [10]],
+            },
+            "s3": {
+                "category_c": [[11]],
+            },
+        }
+        return x[sample], y[sample]
+
+    comp = Component()
+    instance_1 = Mock(spec=Instance)
+    instance_1.training_data = ["s1", "s2"]
+    instance_2 = Mock(spec=Instance)
+    instance_2.training_data = ["s3"]
+    comp.xy_sample = _xy_sample
+    x_expected = {
+        "category_a": [
+            [1, 2, 3],
+            [3, 4, 6],
+            [0, 0, 0],
+            [0, 5, 3],
+            [2, 2, 0],
+        ],
+        "category_b": [
+            [7, 8, 9],
+        ],
+        "category_c": [
+            [0, 0, 0],
+            [0, 0, 1],
+            [1, 1, 1],
+        ],
+    }
+    y_expected = {
+        "category_a": [
+            [1],
+            [2],
+            [4],
+            [5],
+            [6],
+        ],
+        "category_b": [
+            [3],
+        ],
+        "category_c": [
+            [8],
+            [9],
+            [10],
+            [11],
+        ],
+    }
+    x_actual, y_actual = comp.xy_instances([instance_1, instance_2])
+    assert x_actual == x_expected
+    assert y_actual == y_expected
diff --git a/tests/components/test_primal.py b/tests/components/test_primal.py
index 0aca1e5..a768d41 100644
--- a/tests/components/test_primal.py
+++ b/tests/components/test_primal.py
@@ -130,177 +130,6 @@ def test_xy_sample_without_lp_solution() -> None:
     assert_array_equal(y_actual["default"], y_expected["default"])
 
 
-def test_x_y_fit() -> None:
-    comp = PrimalSolutionComponent()
-    training_instances = cast(
-        List[Instance],
-        [
-            Mock(spec=Instance),
-            Mock(spec=Instance),
-        ],
-    )
-
-    # Construct first instance
-    training_instances[0].get_variable_category = Mock(  # type: ignore
-        side_effect=lambda var_name, index: {
-            0: "default",
-            1: None,
-            2: "default",
-            3: "default",
-        }[index]
-    )
-    training_instances[0].get_variable_features = Mock(  # type: ignore
-        side_effect=lambda var, index: {
-            0: [0.0, 0.0],
-            1: [0.0, 1.0],
-            2: [1.0, 0.0],
-            3: [1.0, 1.0],
-        }[index]
-    )
-    training_instances[0].training_data = [
-        {
-            "Solution": {
-                "x": {
-                    0: 0.0,
-                    1: 1.0,
-                    2: 0.0,
-                    3: 0.0,
-                }
-            },
-            "LP solution": {
-                "x": {
-                    0: 0.1,
-                    1: 0.1,
-                    2: 0.1,
-                    3: 0.1,
-                }
-            },
-        },
-        {
-            "Solution": {
-                "x": {
-                    0: 0.0,
-                    1: 1.0,
-                    2: 1.0,
-                    3: 0.0,
-                }
-            },
-            "LP solution": {
-                "x": {
-                    0: 0.2,
-                    1: 0.2,
-                    2: 0.2,
-                    3: 0.2,
-                }
-            },
-        },
-    ]
-
-    # Construct second instance
-    training_instances[1].get_variable_category = Mock(  # type: ignore
-        side_effect=lambda var_name, index: {
-            0: "default",
-            1: None,
-            2: "default",
-            3: "default",
-        }[index]
-    )
-    training_instances[1].get_variable_features = Mock(  # type: ignore
-        side_effect=lambda var, index: {
-            0: [0.0, 0.0],
-            1: [0.0, 2.0],
-            2: [2.0, 0.0],
-            3: [2.0, 2.0],
-        }[index]
-    )
-    training_instances[1].training_data = [
-        {
-            "Solution": {
-                "x": {
-                    0: 1.0,
-                    1: 1.0,
-                    2: 1.0,
-                    3: 1.0,
-                }
-            },
-            "LP solution": {
-                "x": {
-                    0: 0.3,
-                    1: 0.3,
-                    2: 0.3,
-                    3: 0.3,
-                }
-            },
-        },
-        {
-            "Solution": None,
-            "LP solution": None,
-        },
-    ]
-
-    # Test x
-    x_expected = {
-        "default": np.array(
-            [
-                [0.0, 0.0, 0.1],
-                [1.0, 0.0, 0.1],
-                [1.0, 1.0, 0.1],
-                [0.0, 0.0, 0.2],
-                [1.0, 0.0, 0.2],
-                [1.0, 1.0, 0.2],
-                [0.0, 0.0, 0.3],
-                [2.0, 0.0, 0.3],
-                [2.0, 2.0, 0.3],
-            ]
-        )
-    }
-    x_actual = comp.x(training_instances)
-    assert len(x_actual.keys()) == 1
-    assert_array_equal(x_actual["default"], x_expected["default"])
-
-    # Test y
-    y_expected = {
-        "default": np.array(
-            [
-                [True, False],
-                [True, False],
-                [True, False],
-                [True, False],
-                [False, True],
-                [True, False],
-                [False, True],
-                [False, True],
-                [False, True],
-            ]
-        )
-    }
-    y_actual = comp.y(training_instances)
-    assert len(y_actual.keys()) == 1
-    assert_array_equal(y_actual["default"], y_expected["default"])
-
-    # Test fit
-    classifier = Mock(spec=Classifier)
-    threshold = Mock(spec=Threshold)
-    classifier_factory = Mock(return_value=classifier)
-    threshold_factory = Mock(return_value=threshold)
-    comp = PrimalSolutionComponent(
-        classifier=classifier_factory,
-        threshold=threshold_factory,
-    )
-    comp.fit(training_instances)
-
-    # Should build and train classifier for "default" category
-    classifier_factory.assert_called_once()
-    assert_array_equal(x_actual["default"], classifier.fit.call_args[0][0])
-    assert_array_equal(y_actual["default"], classifier.fit.call_args[0][1])
-
-    # Should build and train threshold for "default" category
-    threshold_factory.assert_called_once()
-    assert classifier == threshold.fit.call_args[0][0]
-    assert_array_equal(x_actual["default"], threshold.fit.call_args[0][1])
-    assert_array_equal(y_actual["default"], threshold.fit.call_args[0][2])
-
-
 def test_predict() -> None:
     comp = PrimalSolutionComponent()