From 7af22bd16b4b0f03ff2870cfe90ef8841698dd91 Mon Sep 17 00:00:00 2001
From: "Alinson S. Xavier" <git@axavier.org>
Date: Sat, 3 Apr 2021 10:24:05 -0500
Subject: [PATCH] Refactor ObjectiveValueComponent

---
 miplearn/classifiers/__init__.py   |  10 +-
 miplearn/components/component.py   |   6 +
 miplearn/components/objective.py   | 130 ++++++--------
 tests/components/test_objective.py | 277 ++++++++++++++++++++---------
 4 files changed, 264 insertions(+), 159 deletions(-)

diff --git a/miplearn/classifiers/__init__.py b/miplearn/classifiers/__init__.py
index 8544b50..2fdd117 100644
--- a/miplearn/classifiers/__init__.py
+++ b/miplearn/classifiers/__init__.py
@@ -143,10 +143,16 @@ class Regressor(ABC):
             `n_outputs` is the number of columns in `y_train` provided to `fit`.
         """
         assert self.n_inputs is not None
-        assert isinstance(x_test, np.ndarray)
+        assert isinstance(x_test, np.ndarray), (
+            f"Parameter x_train must be np.ndarray. "
+            f"Found {x_test.__class__.__name__} instead."
+        )
         assert len(x_test.shape) == 2
         (n_samples, n_inputs_x) = x_test.shape
-        assert n_inputs_x == self.n_inputs
+        assert n_inputs_x == self.n_inputs, (
+            f"Test and training data have different number of "
+            f"inputs: {n_inputs_x} != {self.n_inputs}"
+        )
         return np.ndarray([])
 
     @abstractmethod
diff --git a/miplearn/components/component.py b/miplearn/components/component.py
index b4b4fcc..220aed2 100644
--- a/miplearn/components/component.py
+++ b/miplearn/components/component.py
@@ -154,6 +154,12 @@ class Component:
         x: Dict[str, np.ndarray],
         y: Dict[str, np.ndarray],
     ) -> None:
+        """
+        Given two dictionaries x and y, mapping the name of the category to matrices
+        of features and targets, this function does two things. First, for each
+        category, it creates a clone of the prototype regressor/classifier. Second,
+        it passes (x[category], y[category]) to the clone's fit method.
+        """
         return
 
     def iteration_cb(
diff --git a/miplearn/components/objective.py b/miplearn/components/objective.py
index 8de77bc..d1f2925 100644
--- a/miplearn/components/objective.py
+++ b/miplearn/components/objective.py
@@ -35,15 +35,12 @@ class ObjectiveValueComponent(Component):
 
     def __init__(
         self,
-        lb_regressor: Regressor = ScikitLearnRegressor(LinearRegression()),
-        ub_regressor: Regressor = ScikitLearnRegressor(LinearRegression()),
+        regressor: Regressor = ScikitLearnRegressor(LinearRegression()),
     ) -> None:
-        assert isinstance(lb_regressor, Regressor)
-        assert isinstance(ub_regressor, Regressor)
+        assert isinstance(regressor, Regressor)
         self.ub_regressor: Optional[Regressor] = None
         self.lb_regressor: Optional[Regressor] = None
-        self.lb_regressor_prototype = lb_regressor
-        self.ub_regressor_prototype = ub_regressor
+        self.regressor_prototype = regressor
         self._predicted_ub: Optional[float] = None
         self._predicted_lb: Optional[float] = None
 
@@ -56,65 +53,28 @@ class ObjectiveValueComponent(Component):
         features: Features,
         training_data: TrainingSample,
     ) -> None:
-        if self.ub_regressor is not None:
-            logger.info("Predicting optimal value...")
-            pred = self.predict([instance])
-            predicted_lb = pred["Upper bound"][0]
-            predicted_ub = pred["Lower bound"][0]
-            logger.info("Predicted LB=%.2f, UB=%.2f" % (predicted_lb, predicted_ub))
-            if predicted_ub is not None:
-                stats["Objective: Predicted UB"] = predicted_ub
-            if predicted_lb is not None:
-                stats["Objective: Predicted LB"] = predicted_lb
-
-    def fit(self, training_instances: Union[List[str], List[Instance]]) -> None:
-        self.lb_regressor = self.lb_regressor_prototype.clone()
-        self.ub_regressor = self.ub_regressor_prototype.clone()
-        logger.debug("Extracting features...")
-        x_train = self.x(training_instances)
-        y_train = self.y(training_instances)
-        logger.debug("Fitting lb_regressor...")
-        self.lb_regressor.fit(x_train, y_train["Lower bound"])
-        logger.debug("Fitting ub_regressor...")
-        self.ub_regressor.fit(x_train, y_train["Upper bound"])
+        logger.info("Predicting optimal value...")
+        pred = self.sample_predict(features, training_data)
+        if "Upper bound" in pred:
+            ub = pred["Upper bound"]
+            logger.info("Predicted upper bound: %.6e" % ub)
+            stats["Objective: Predicted UB"] = ub
+        if "Lower bound" in pred:
+            lb = pred["Lower bound"]
+            logger.info("Predicted lower bound: %.6e" % lb)
+            stats["Objective: Predicted LB"] = lb
 
-    def predict(
+    def fit_xy(
         self,
-        instances: Union[List[str], List[Instance]],
-    ) -> Dict[str, List[float]]:
-        assert self.lb_regressor is not None
-        assert self.ub_regressor is not None
-        x_test = self.x(instances)
-        (n_samples, n_features) = x_test.shape
-        lb = self.lb_regressor.predict(x_test)
-        ub = self.ub_regressor.predict(x_test)
-        assert lb.shape == (n_samples, 1)
-        assert ub.shape == (n_samples, 1)
-        return {
-            "Lower bound": lb.ravel().tolist(),
-            "Upper bound": ub.ravel().tolist(),
-        }
-
-    @staticmethod
-    def x(instances: Union[List[str], List[Instance]]) -> np.ndarray:
-        result = []
-        for instance in InstanceIterator(instances):
-            for sample in instance.training_data:
-                result.append(instance.get_instance_features() + [sample["LP value"]])
-        return np.array(result)
-
-    @staticmethod
-    def y(instances: Union[List[str], List[Instance]]) -> Dict[str, np.ndarray]:
-        ub: List[List[float]] = []
-        lb: List[List[float]] = []
-        for instance in InstanceIterator(instances):
-            for sample in instance.training_data:
-                lb.append([sample["Lower bound"]])
-                ub.append([sample["Upper bound"]])
-        return {
-            "Lower bound": np.array(lb),
-            "Upper bound": np.array(ub),
-        }
+        x: Dict[str, np.ndarray],
+        y: Dict[str, np.ndarray],
+    ) -> None:
+        if "Lower bound" in y:
+            self.lb_regressor = self.regressor_prototype.clone()
+            self.lb_regressor.fit(x["Lower bound"], y["Lower bound"])
+        if "Upper bound" in y:
+            self.ub_regressor = self.regressor_prototype.clone()
+            self.ub_regressor.fit(x["Upper bound"], y["Upper bound"])
 
     # def evaluate(
     #     self,
@@ -153,23 +113,39 @@ class ObjectiveValueComponent(Component):
     #     }
     #     return ev
 
+    def sample_predict(
+        self,
+        features: Features,
+        sample: TrainingSample,
+    ) -> Dict[str, float]:
+        pred: Dict[str, float] = {}
+        x, _ = self.sample_xy(features, sample)
+        if self.lb_regressor is not None:
+            lb_pred = self.lb_regressor.predict(np.array(x["Lower bound"]))
+            pred["Lower bound"] = lb_pred[0, 0]
+        else:
+            logger.info("Lower bound regressor not fitted. Skipping.")
+        if self.ub_regressor is not None:
+            ub_pred = self.ub_regressor.predict(np.array(x["Upper bound"]))
+            pred["Upper bound"] = ub_pred[0, 0]
+        else:
+            logger.info("Upper bound regressor not fitted. Skipping.")
+        return pred
+
     @staticmethod
     def sample_xy(
         features: Features,
         sample: TrainingSample,
-    ) -> Tuple[Dict, Dict]:
-        f = features["Instance"]["User features"]
+    ) -> Tuple[Dict[str, List[List[float]]], Dict[str, List[List[float]]]]:
+        x: Dict[str, List[List[float]]] = {}
+        y: Dict[str, List[List[float]]] = {}
+        f = list(features["Instance"]["User features"])
         if "LP value" in sample and sample["LP value"] is not None:
             f += [sample["LP value"]]
-        x = {
-            "Lower bound": [f],
-            "Upper bound": [f],
-        }
-        if "Lower bound" in sample:
-            y = {
-                "Lower bound": [[sample["Lower bound"]]],
-                "Upper bound": [[sample["Upper bound"]]],
-            }
-            return x, y
-        else:
-            return x, {}
+        x["Lower bound"] = [f]
+        x["Upper bound"] = [f]
+        if "Lower bound" in sample and sample["Lower bound"] is not None:
+            y["Lower bound"] = [[sample["Lower bound"]]]
+        if "Upper bound" in sample and sample["Upper bound"] is not None:
+            y["Upper bound"] = [[sample["Upper bound"]]]
+        return x, y
diff --git a/tests/components/test_objective.py b/tests/components/test_objective.py
index fb19c4e..253a00a 100644
--- a/tests/components/test_objective.py
+++ b/tests/components/test_objective.py
@@ -1,78 +1,76 @@
 #  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
 #  Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
 #  Released under the modified BSD license. See COPYING.md for more details.
-
-from typing import cast
 from unittest.mock import Mock
 
-import numpy as np
+import pytest
 from numpy.testing import assert_array_equal
 
-from miplearn import GurobiPyomoSolver, LearningSolver
-from miplearn.instance import Instance
-from miplearn.classifiers import Regressor
+from miplearn import GurobiPyomoSolver, LearningSolver, Regressor
 from miplearn.components.objective import ObjectiveValueComponent
 from miplearn.types import TrainingSample, Features
-from tests.fixtures.knapsack import get_test_pyomo_instances, get_knapsack_instance
-
-
-def test_x_y_predict() -> None:
-    # Construct instance
-    instance = cast(Instance, Mock(spec=Instance))
-    instance.get_instance_features = Mock(  # type: ignore
-        return_value=[1.0, 2.0],
-    )
-    instance.training_data = [
-        {
-            "Lower bound": 1.0,
-            "Upper bound": 2.0,
-            "LP value": 3.0,
-        },
-        {
-            "Lower bound": 1.5,
-            "Upper bound": 2.2,
-            "LP value": 3.4,
-        },
-    ]
-
-    # Construct mock regressors
-    lb_regressor = Mock(spec=Regressor)
-    lb_regressor.predict = Mock(return_value=np.array([[5.0], [6.0]]))
-    lb_regressor.clone = lambda: lb_regressor
-    ub_regressor = Mock(spec=Regressor)
-    ub_regressor.predict = Mock(return_value=np.array([[3.0], [3.0]]))
-    ub_regressor.clone = lambda: ub_regressor
-    comp = ObjectiveValueComponent(
-        lb_regressor=lb_regressor,
-        ub_regressor=ub_regressor,
-    )
-
-    # Should build x correctly
-    x_expected = np.array([[1.0, 2.0, 3.0], [1.0, 2.0, 3.4]])
-    assert_array_equal(comp.x([instance]), x_expected)
-
-    # Should build y correctly
-    y_actual = comp.y([instance])
-    y_expected_lb = np.array([[1.0], [1.5]])
-    y_expected_ub = np.array([[2.0], [2.2]])
-    assert_array_equal(y_actual["Lower bound"], y_expected_lb)
-    assert_array_equal(y_actual["Upper bound"], y_expected_ub)
-
-    # Should pass arrays to regressors
-    comp.fit([instance])
-    assert_array_equal(lb_regressor.fit.call_args[0][0], x_expected)
-    assert_array_equal(lb_regressor.fit.call_args[0][1], y_expected_lb)
-    assert_array_equal(ub_regressor.fit.call_args[0][0], x_expected)
-    assert_array_equal(ub_regressor.fit.call_args[0][1], y_expected_ub)
-
-    # Should return predictions
-    pred = comp.predict([instance])
-    assert_array_equal(lb_regressor.predict.call_args[0][0], x_expected)
-    assert_array_equal(ub_regressor.predict.call_args[0][0], x_expected)
-    assert pred == {
-        "Lower bound": [5.0, 6.0],
-        "Upper bound": [3.0, 3.0],
-    }
+from tests.fixtures.knapsack import get_knapsack_instance
+
+import numpy as np
+
+
+# def test_x_y_predict() -> None:
+#     # Construct instance
+#     instance = cast(Instance, Mock(spec=Instance))
+#     instance.get_instance_features = Mock(  # type: ignore
+#         return_value=[1.0, 2.0],
+#     )
+#     instance.training_data = [
+#         {
+#             "Lower bound": 1.0,
+#             "Upper bound": 2.0,
+#             "LP value": 3.0,
+#         },
+#         {
+#             "Lower bound": 1.5,
+#             "Upper bound": 2.2,
+#             "LP value": 3.4,
+#         },
+#     ]
+#
+#     # Construct mock regressors
+#     lb_regressor = Mock(spec=Regressor)
+#     lb_regressor.predict = Mock(return_value=np.array([[5.0], [6.0]]))
+#     lb_regressor.clone = lambda: lb_regressor
+#     ub_regressor = Mock(spec=Regressor)
+#     ub_regressor.predict = Mock(return_value=np.array([[3.0], [3.0]]))
+#     ub_regressor.clone = lambda: ub_regressor
+#     comp = ObjectiveValueComponent(
+#         lb_regressor=lb_regressor,
+#         ub_regressor=ub_regressor,
+#     )
+#
+#     # Should build x correctly
+#     x_expected = np.array([[1.0, 2.0, 3.0], [1.0, 2.0, 3.4]])
+#     assert_array_equal(comp.x([instance]), x_expected)
+#
+#     # Should build y correctly
+#     y_actual = comp.y([instance])
+#     y_expected_lb = np.array([[1.0], [1.5]])
+#     y_expected_ub = np.array([[2.0], [2.2]])
+#     assert_array_equal(y_actual["Lower bound"], y_expected_lb)
+#     assert_array_equal(y_actual["Upper bound"], y_expected_ub)
+#
+#     # Should pass arrays to regressors
+#     comp.fit([instance])
+#     assert_array_equal(lb_regressor.fit.call_args[0][0], x_expected)
+#     assert_array_equal(lb_regressor.fit.call_args[0][1], y_expected_lb)
+#     assert_array_equal(ub_regressor.fit.call_args[0][0], x_expected)
+#     assert_array_equal(ub_regressor.fit.call_args[0][1], y_expected_ub)
+#
+#     # Should return predictions
+#     pred = comp.predict([instance])
+#     assert_array_equal(lb_regressor.predict.call_args[0][0], x_expected)
+#     assert_array_equal(ub_regressor.predict.call_args[0][0], x_expected)
+#     assert pred == {
+#         "Lower bound": [5.0, 6.0],
+#         "Upper bound": [3.0, 3.0],
+#     }
 
 
 # def test_obj_evaluate():
@@ -106,17 +104,44 @@ def test_x_y_predict() -> None:
 #     }
 
 
-def test_xy_sample_with_lp() -> None:
-    features: Features = {
+@pytest.fixture
+def features() -> Features:
+    return {
         "Instance": {
             "User features": [1.0, 2.0],
         }
     }
-    sample: TrainingSample = {
+
+
+@pytest.fixture
+def sample() -> TrainingSample:
+    return {
         "Lower bound": 1.0,
         "Upper bound": 2.0,
         "LP value": 3.0,
     }
+
+
+@pytest.fixture
+def sample_without_lp() -> TrainingSample:
+    return {
+        "Lower bound": 1.0,
+        "Upper bound": 2.0,
+    }
+
+
+@pytest.fixture
+def sample_without_ub() -> TrainingSample:
+    return {
+        "Lower bound": 1.0,
+        "LP value": 3.0,
+    }
+
+
+def test_sample_xy(
+    features: Features,
+    sample: TrainingSample,
+) -> None:
     x_expected = {
         "Lower bound": [[1.0, 2.0, 3.0]],
         "Upper bound": [[1.0, 2.0, 3.0]],
@@ -132,16 +157,10 @@ def test_xy_sample_with_lp() -> None:
     assert y_actual == y_expected
 
 
-def test_xy_sample_without_lp() -> None:
-    features: Features = {
-        "Instance": {
-            "User features": [1.0, 2.0],
-        }
-    }
-    sample: TrainingSample = {
-        "Lower bound": 1.0,
-        "Upper bound": 2.0,
-    }
+def test_sample_xy_without_lp(
+    features: Features,
+    sample_without_lp: TrainingSample,
+) -> None:
     x_expected = {
         "Lower bound": [[1.0, 2.0]],
         "Upper bound": [[1.0, 2.0]],
@@ -150,13 +169,111 @@ def test_xy_sample_without_lp() -> None:
         "Lower bound": [[1.0]],
         "Upper bound": [[2.0]],
     }
-    xy = ObjectiveValueComponent.sample_xy(features, sample)
+    xy = ObjectiveValueComponent.sample_xy(features, sample_without_lp)
+    assert xy is not None
+    x_actual, y_actual = xy
+    assert x_actual == x_expected
+    assert y_actual == y_expected
+
+
+def test_sample_xy_without_ub(
+    features: Features,
+    sample_without_ub: TrainingSample,
+) -> None:
+    x_expected = {
+        "Lower bound": [[1.0, 2.0, 3.0]],
+        "Upper bound": [[1.0, 2.0, 3.0]],
+    }
+    y_expected = {"Lower bound": [[1.0]]}
+    xy = ObjectiveValueComponent.sample_xy(features, sample_without_ub)
     assert xy is not None
     x_actual, y_actual = xy
     assert x_actual == x_expected
     assert y_actual == y_expected
 
 
+def test_fit_xy() -> None:
+    x = {
+        "Lower bound": np.array([[0.0, 0.0], [1.0, 2.0]]),
+        "Upper bound": np.array([[0.0, 0.0], [1.0, 2.0]]),
+    }
+    y = {
+        "Lower bound": np.array([[100.0]]),
+        "Upper bound": np.array([[200.0]]),
+    }
+    reg = Mock(spec=Regressor)
+    reg.clone = Mock(side_effect=lambda: Mock(spec=Regressor))
+    comp = ObjectiveValueComponent(regressor=reg)
+    assert comp.ub_regressor is None
+    assert comp.lb_regressor is None
+    comp.fit_xy(x, y)
+    assert reg.clone.call_count == 2
+    assert comp.ub_regressor is not None
+    assert comp.lb_regressor is not None
+    assert comp.ub_regressor.fit.call_count == 1
+    assert comp.lb_regressor.fit.call_count == 1
+    assert_array_equal(comp.ub_regressor.fit.call_args[0][0], x["Upper bound"])
+    assert_array_equal(comp.lb_regressor.fit.call_args[0][0], x["Lower bound"])
+    assert_array_equal(comp.ub_regressor.fit.call_args[0][1], y["Upper bound"])
+    assert_array_equal(comp.lb_regressor.fit.call_args[0][1], y["Lower bound"])
+
+
+def test_fit_xy_without_ub() -> None:
+    x = {
+        "Lower bound": np.array([[0.0, 0.0], [1.0, 2.0]]),
+        "Upper bound": np.array([[0.0, 0.0], [1.0, 2.0]]),
+    }
+    y = {
+        "Lower bound": np.array([[100.0]]),
+    }
+    reg = Mock(spec=Regressor)
+    reg.clone = Mock(side_effect=lambda: Mock(spec=Regressor))
+    comp = ObjectiveValueComponent(regressor=reg)
+    assert comp.ub_regressor is None
+    assert comp.lb_regressor is None
+    comp.fit_xy(x, y)
+    assert reg.clone.call_count == 1
+    assert comp.ub_regressor is None
+    assert comp.lb_regressor is not None
+    assert comp.lb_regressor.fit.call_count == 1
+    assert_array_equal(comp.lb_regressor.fit.call_args[0][0], x["Lower bound"])
+    assert_array_equal(comp.lb_regressor.fit.call_args[0][1], y["Lower bound"])
+
+
+def test_sample_predict(
+    features: Features,
+    sample: TrainingSample,
+) -> None:
+    x, y = ObjectiveValueComponent.sample_xy(features, sample)
+    comp = ObjectiveValueComponent()
+    comp.lb_regressor = Mock(spec=Regressor)
+    comp.ub_regressor = Mock(spec=Regressor)
+    comp.lb_regressor.predict = Mock(side_effect=lambda _: np.array([[50.0]]))
+    comp.ub_regressor.predict = Mock(side_effect=lambda _: np.array([[60.0]]))
+    pred = comp.sample_predict(features, sample)
+    assert pred == {
+        "Lower bound": 50.0,
+        "Upper bound": 60.0,
+    }
+    assert_array_equal(comp.ub_regressor.predict.call_args[0][0], x["Upper bound"])
+    assert_array_equal(comp.lb_regressor.predict.call_args[0][0], x["Lower bound"])
+
+
+def test_sample_predict_without_ub(
+    features: Features,
+    sample_without_ub: TrainingSample,
+) -> None:
+    x, y = ObjectiveValueComponent.sample_xy(features, sample_without_ub)
+    comp = ObjectiveValueComponent()
+    comp.lb_regressor = Mock(spec=Regressor)
+    comp.lb_regressor.predict = Mock(side_effect=lambda _: np.array([[50.0]]))
+    pred = comp.sample_predict(features, sample_without_ub)
+    assert pred == {
+        "Lower bound": 50.0,
+    }
+    assert_array_equal(comp.lb_regressor.predict.call_args[0][0], x["Lower bound"])
+
+
 def test_usage() -> None:
     solver = LearningSolver(components=[ObjectiveValueComponent()])
     instance = get_knapsack_instance(GurobiPyomoSolver())