From 60b9a6775fbc68d8214886ea20a71001b50a8262 Mon Sep 17 00:00:00 2001
From: Alinson S Xavier <axavier@anl.gov>
Date: Tue, 10 Aug 2021 10:28:30 -0500
Subject: [PATCH] Use NumPy to compute AlvLouWeh2017 features

---
 miplearn/features/extractor.py   | 148 +++++++++++++++++--------------
 tests/features/test_extractor.py |  60 +++++--------
 2 files changed, 105 insertions(+), 103 deletions(-)

diff --git a/miplearn/features/extractor.py b/miplearn/features/extractor.py
index 14fa673..155c153 100644
--- a/miplearn/features/extractor.py
+++ b/miplearn/features/extractor.py
@@ -127,7 +127,9 @@ class FeaturesExtractor:
         ]:
             if f is not None:
                 lp_var_features_list.append(f.reshape(-1, 1))
-        sample.put_array("lp_var_features", np.hstack(lp_var_features_list))
+        lp_var_features = np.hstack(lp_var_features_list)
+        _fix_infinity(lp_var_features)
+        sample.put_array("lp_var_features", lp_var_features)
 
         # Constraint features
         lp_constr_features_list = []
@@ -142,7 +144,9 @@ class FeaturesExtractor:
         ]:
             if f is not None:
                 lp_constr_features_list.append(f.reshape(-1, 1))
-        sample.put_array("lp_constr_features", np.hstack(lp_constr_features_list))
+        lp_constr_features = np.hstack(lp_constr_features_list)
+        _fix_infinity(lp_constr_features)
+        sample.put_array("lp_constr_features", lp_constr_features)
 
         # Build lp_instance_features
         static_instance_features = sample.get_array("static_instance_features")
@@ -311,73 +315,83 @@ class FeaturesExtractor:
         obj_sa_down = sample.get_array("lp_var_sa_obj_down")
         obj_sa_up = sample.get_array("lp_var_sa_obj_up")
         values = sample.get_array("lp_var_values")
-        assert obj_coeffs is not None
-
-        pos_obj_coeff_sum = 0.0
-        neg_obj_coeff_sum = 0.0
-        for coeff in obj_coeffs:
-            if coeff > 0:
-                pos_obj_coeff_sum += coeff
-            if coeff < 0:
-                neg_obj_coeff_sum += -coeff
-
-        features = []
-        for i in range(len(obj_coeffs)):
-            f: List[float] = []
-            if obj_coeffs is not None:
-                # Feature 1
-                f.append(np.sign(obj_coeffs[i]))
-
-                # Feature 2
-                if pos_obj_coeff_sum > 0:
-                    f.append(abs(obj_coeffs[i]) / pos_obj_coeff_sum)
-                else:
-                    f.append(0.0)
-
-                # Feature 3
-                if neg_obj_coeff_sum > 0:
-                    f.append(abs(obj_coeffs[i]) / neg_obj_coeff_sum)
-                else:
-                    f.append(0.0)
 
+        assert obj_coeffs is not None
+        obj_coeffs = obj_coeffs.astype(float)
+        _fix_infinity(obj_coeffs)
+        nvars = len(obj_coeffs)
+
+        if obj_sa_down is not None:
+            obj_sa_down = obj_sa_down.astype(float)
+            _fix_infinity(obj_sa_down)
+
+        if obj_sa_up is not None:
+            obj_sa_up = obj_sa_up.astype(float)
+            _fix_infinity(obj_sa_up)
+
+        if values is not None:
+            values = values.astype(float)
+            _fix_infinity(values)
+
+        pos_obj_coeffs_sum = obj_coeffs[obj_coeffs > 0].sum()
+        neg_obj_coeffs_sum = -obj_coeffs[obj_coeffs < 0].sum()
+
+        curr = 0
+        max_n_features = 8
+        features = np.zeros((nvars, max_n_features))
+        with np.errstate(divide="ignore", invalid="ignore"):
+            # Feature 1
+            features[:, curr] = np.sign(obj_coeffs)
+            curr += 1
+
+            # Feature 2
+            if abs(pos_obj_coeffs_sum) > 0:
+                features[:, curr] = np.abs(obj_coeffs) / pos_obj_coeffs_sum
+                curr += 1
+
+            # Feature 3
+            if abs(neg_obj_coeffs_sum) > 0:
+                features[:, curr] = np.abs(obj_coeffs) / neg_obj_coeffs_sum
+                curr += 1
+
+            # Feature 37
             if values is not None:
-                # Feature 37
-                f.append(
-                    min(
-                        values[i] - np.floor(values[i]),
-                        np.ceil(values[i]) - values[i],
-                    )
+                features[:, curr] = np.minimum(
+                    values - np.floor(values),
+                    np.ceil(values) - values,
                 )
+                curr += 1
 
+            # Feature 44
             if obj_sa_up is not None:
-                assert obj_sa_down is not None
-                assert obj_coeffs is not None
-
-                # Convert inf into large finite numbers
-                sd = max(-1e20, obj_sa_down[i])
-                su = min(1e20, obj_sa_up[i])
-                obj = obj_coeffs[i]
-
-                # Features 44 and 46
-                f.append(np.sign(obj_sa_up[i]))
-                f.append(np.sign(obj_sa_down[i]))
-
-                # Feature 47
-                csign = np.sign(obj)
-                if csign != 0 and ((obj - sd) / csign) > 0.001:
-                    f.append(log((obj - sd) / csign))
-                else:
-                    f.append(0.0)
-
-                # Feature 48
-                if csign != 0 and ((su - obj) / csign) > 0.001:
-                    f.append(log((su - obj) / csign))
-                else:
-                    f.append(0.0)
-
-            for (i, v) in enumerate(f):
-                if not isfinite(v):
-                    f[i] = 0.0
-
-            features.append(f)
-        return np.array(features, dtype=float)
+                features[:, curr] = np.sign(obj_sa_up)
+                curr += 1
+
+            # Feature 46
+            if obj_sa_down is not None:
+                features[:, curr] = np.sign(obj_sa_down)
+                curr += 1
+
+            # Feature 47
+            if obj_sa_down is not None:
+                features[:, curr] = np.log(
+                    obj_coeffs - obj_sa_down / np.sign(obj_coeffs)
+                )
+                curr += 1
+
+            # Feature 48
+            if obj_sa_up is not None:
+                features[:, curr] = np.log(obj_coeffs - obj_sa_up / np.sign(obj_coeffs))
+                curr += 1
+
+        features = features[:, 0:curr]
+        _fix_infinity(features)
+        return features
+
+
+def _fix_infinity(m: np.ndarray) -> None:
+    masked = np.ma.masked_invalid(m)
+    max_values = np.max(masked, axis=0)
+    min_values = np.min(masked, axis=0)
+    m[:] = np.maximum(np.minimum(m, max_values), min_values)
+    m[np.isnan(m)] = 0.0
diff --git a/tests/features/test_extractor.py b/tests/features/test_extractor.py
index 732aaa1..2c6ac37 100644
--- a/tests/features/test_extractor.py
+++ b/tests/features/test_extractor.py
@@ -2,6 +2,7 @@
 #  Copyright (C) 2020-2021, UChicago Argonne, LLC. All rights reserved.
 #  Released under the modified BSD license. See COPYING.md for more details.
 
+import os
 import sys
 import time
 from typing import Any
@@ -63,11 +64,11 @@ def test_knapsack() -> None:
         sample.get_vector_list("static_var_features"),
         np.array(
             [
-                [23.0, 505.0, 1.0, 0.32899, 0.0, 0.0, 505.0, 1.0],
-                [26.0, 352.0, 1.0, 0.229316, 0.0, 0.0, 352.0, 1.0],
-                [20.0, 458.0, 1.0, 0.298371, 0.0, 0.0, 458.0, 1.0],
-                [18.0, 220.0, 1.0, 0.143322, 0.0, 0.0, 220.0, 1.0],
-                [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 67.0],
+                [23.0, 505.0, 1.0, 0.32899, 0.0, 505.0, 1.0],
+                [26.0, 352.0, 1.0, 0.229316, 0.0, 352.0, 1.0],
+                [20.0, 458.0, 1.0, 0.298371, 0.0, 458.0, 1.0],
+                [18.0, 220.0, 1.0, 0.143322, 0.0, 220.0, 1.0],
+                [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 67.0],
             ]
         ),
     )
@@ -163,22 +164,20 @@ def test_knapsack() -> None:
                     1.0,
                     0.32899,
                     0.0,
-                    0.0,
                     505.0,
                     1.0,
                     1.0,
                     0.32899,
                     0.0,
-                    0.0,
                     1.0,
                     1.0,
                     5.265874,
-                    46.051702,
+                    0.0,
                     193.615385,
-                    -inf,
+                    -0.111111,
                     1.0,
                     311.384615,
-                    inf,
+                    570.869565,
                     0.913043,
                     2.043478,
                     1.0,
@@ -189,24 +188,22 @@ def test_knapsack() -> None:
                     1.0,
                     0.229316,
                     0.0,
-                    0.0,
                     352.0,
                     1.0,
                     1.0,
                     0.229316,
-                    0.0,
                     0.076923,
                     1.0,
                     1.0,
                     3.532875,
-                    5.388476,
                     0.0,
-                    -inf,
+                    0.0,
+                    -0.111111,
                     0.923077,
                     317.777778,
                     570.869565,
                     0.923077,
-                    inf,
+                    69.0,
                     0.923077,
                 ],
                 [
@@ -215,22 +212,20 @@ def test_knapsack() -> None:
                     1.0,
                     0.298371,
                     0.0,
-                    0.0,
                     458.0,
                     1.0,
                     1.0,
                     0.298371,
                     0.0,
-                    0.0,
                     1.0,
                     1.0,
                     5.232342,
-                    46.051702,
+                    0.0,
                     187.230769,
-                    -inf,
+                    -0.111111,
                     1.0,
                     270.769231,
-                    inf,
+                    570.869565,
                     0.9,
                     2.2,
                     1.0,
@@ -241,24 +236,22 @@ def test_knapsack() -> None:
                     1.0,
                     0.143322,
                     0.0,
-                    0.0,
                     220.0,
                     1.0,
                     1.0,
                     0.143322,
                     0.0,
-                    0.0,
                     1.0,
                     -1.0,
-                    46.051702,
-                    3.16515,
+                    5.453347,
+                    0.0,
                     -23.692308,
                     -0.111111,
                     1.0,
-                    -inf,
+                    -13.538462,
                     243.692308,
                     0.0,
-                    inf,
+                    69.0,
                     0.0,
                 ],
                 [
@@ -268,21 +261,19 @@ def test_knapsack() -> None:
                     0.0,
                     0.0,
                     0.0,
-                    0.0,
                     67.0,
                     0.0,
                     0.0,
                     0.0,
-                    0.0,
                     1.0,
                     -1.0,
-                    0.0,
+                    5.453347,
                     0.0,
                     13.538462,
-                    -inf,
+                    -0.111111,
                     67.0,
                     -13.538462,
-                    inf,
+                    570.869565,
                     43.0,
                     69.0,
                     67.0,
@@ -391,7 +382,7 @@ class MpsInstance(Instance):
         return gp.read(self.filename)
 
 
-def main() -> None:
+if __name__ == "__main__":
     solver = GurobiSolver()
     instance = MpsInstance(sys.argv[1])
     solver.set_instance(instance)
@@ -404,7 +395,4 @@ def main() -> None:
         extractor.extract_after_lp_features(solver, sample, lp_stats)
 
     cProfile.run("run()", filename="tmp/prof")
-
-
-if __name__ == "__main__":
-    main()
+    os.system("flameprof tmp/prof > tmp/prof.svg")