From fa847f76ece7c6f06e129b2b1f7ddeb83e543b80 Mon Sep 17 00:00:00 2001
From: Alinson S Xavier <axavier@anl.gov>
Date: Thu, 23 Jan 2020 13:39:51 -0600
Subject: [PATCH] LearningSolver: add method for solving training instances in
 parallel

---
 README.md                     | 20 ++++++++++++++++++++
 miplearn/solvers.py           | 29 ++++++++++++++++++++++++++++-
 miplearn/tests/test_solver.py | 12 +++++++++++-
 requirements.txt              |  3 ++-
 setup.py                      | 20 +++++++++++++-------
 5 files changed, 74 insertions(+), 10 deletions(-)

diff --git a/README.md b/README.md
index 4f442e5..6b17c0b 100644
--- a/README.md
+++ b/README.md
@@ -101,6 +101,26 @@ for instance in more_instances:
 In addition to storing the training data, `solver.save` also serializes and stores all trained ML models themselves, so it is not necessary to call `solver.fit`.
 
 
+### Solving training instances in parallel
+
+In many situations, training instances can be solved in parallel to accelerate the training process. `LearningSolver` provides the method `parallel_solve(instances)` to easily achieve this. After all instances have been solved, the ML models can be trained and saved to disk as usual, as the next example illustrates:
+
+```python
+from miplearn import LearningSolver
+
+# Training phase...
+solver = LearningSolver(...) # training solver parameters
+solver.parallel_solve(training_instances, n_jobs=4)
+solver.fit()
+solver.save("/tmp/data.bin")
+
+# Test phase...
+solver = LearningSolver(...) # test solver parameters
+solver.load("/tmp/data.bin")
+solver.solve(test_instance)
+```
+
+
 Current Limitations
 -------------------
 
diff --git a/miplearn/solvers.py b/miplearn/solvers.py
index 6b79560..c616041 100644
--- a/miplearn/solvers.py
+++ b/miplearn/solvers.py
@@ -6,8 +6,12 @@ from .transformers import PerVariableTransformer
 from .warmstart import KnnWarmStartPredictor
 import pyomo.environ as pe
 import numpy as np
-from copy import deepcopy
+from copy import copy, deepcopy
 import pickle
+from tqdm import tqdm
+from joblib import Parallel, delayed
+import multiprocessing
+
 
 
 class LearningSolver:
@@ -78,6 +82,29 @@ class LearningSolver:
                 self.y_train[category] = y
             else:
                 self.y_train[category] = np.vstack([self.y_train[category], y])
+                
+    def parallel_solve(self, instances, n_jobs=4):
+        def _process(instance):
+            solver = copy(self)
+            solver.solve(instance)
+            return solver.x_train, solver.y_train
+
+        def _merge(results):
+            categories = results[0][0].keys()
+            x_entries = [np.vstack([r[0][c] for r in results]) for c in categories]
+            y_entries = [np.vstack([r[1][c] for r in results]) for c in categories]
+            x_train = dict(zip(categories, x_entries))
+            y_train = dict(zip(categories, y_entries))
+            return x_train, y_train
+
+        results = Parallel(n_jobs=n_jobs)(
+            delayed(_process)(i)
+            for i in tqdm(instances)
+        )
+        
+        x_train, y_train = _merge(results)
+        self.x_train = x_train
+        self.y_train = y_train
 
     def fit(self, x_train_dict=None, y_train_dict=None):
         if x_train_dict is None:
diff --git a/miplearn/tests/test_solver.py b/miplearn/tests/test_solver.py
index 7eb0711..d14358a 100644
--- a/miplearn/tests/test_solver.py
+++ b/miplearn/tests/test_solver.py
@@ -4,6 +4,7 @@
 
 from miplearn import LearningSolver
 from miplearn.problems.knapsack import KnapsackInstance2
+import numpy as np
 
 
 def test_solver():
@@ -30,4 +31,13 @@ def test_solve_save_load():
     solver.load("/tmp/knapsack_train.bin")
     assert len(solver.x_train) == prev_x_train_len
     assert len(solver.y_train) == prev_y_train_len
-    
\ No newline at end of file
+
+def test_parallel_solve():
+    instances = [KnapsackInstance2(weights=np.random.rand(5),
+                                   prices=np.random.rand(5),
+                                   capacity=3.0)
+                 for _ in range(10)]
+    solver = LearningSolver()
+    solver.parallel_solve(instances, n_jobs=2)
+    assert len(solver.x_train[0]) == 10
+    assert len(solver.y_train[0]) == 10
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 7a573c5..67c30d6 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,4 +2,5 @@ pyomo
 numpy
 pytest
 sklearn
-networkx
\ No newline at end of file
+networkx
+tqdm
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 9dc10a2..dabfb5e 100644
--- a/setup.py
+++ b/setup.py
@@ -1,11 +1,17 @@
 from setuptools import setup
 
 setup(
-   name='miplearn',
-   version='0.1',
-   description='A Machine-Learning Framework for Mixed-Integer Optimization',
-   author='Alinson S. Xavier',
-   author_email='axavier@anl.gov',
-   packages=['miplearn'],
-   install_requires=['pyomo', 'numpy', 'sklearn', 'networkx'],
+    name='miplearn',
+    version='0.1',
+    description='A Machine-Learning Framework for Mixed-Integer Optimization',
+    author='Alinson S. Xavier',
+    author_email='axavier@anl.gov',
+    packages=['miplearn'],
+    install_requires=[
+        'pyomo',
+        'numpy',
+        'sklearn',
+        'networkx',
+        'tqdm',
+    ],
 )
\ No newline at end of file