From fa847f76ece7c6f06e129b2b1f7ddeb83e543b80 Mon Sep 17 00:00:00 2001 From: Alinson S Xavier Date: Thu, 23 Jan 2020 13:39:51 -0600 Subject: [PATCH] LearningSolver: add method for solving training instances in parallel --- README.md | 20 ++++++++++++++++++++ miplearn/solvers.py | 29 ++++++++++++++++++++++++++++- miplearn/tests/test_solver.py | 12 +++++++++++- requirements.txt | 3 ++- setup.py | 20 +++++++++++++------- 5 files changed, 74 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 4f442e5..6b17c0b 100644 --- a/README.md +++ b/README.md @@ -101,6 +101,26 @@ for instance in more_instances: In addition to storing the training data, `solver.save` also serializes and stores all trained ML models themselves, so it is not necessary to call `solver.fit`. +### Solving training instances in parallel + +In many situations, training instances can be solved in parallel to accelerate the training process. `LearningSolver` provides the method `parallel_solve(instances)` to easily achieve this. After all instances have been solved, the ML models can be trained and saved to disk as usual, as the next example illustrates: + +```python +from miplearn import LearningSolver + +# Training phase... +solver = LearningSolver(...) # training solver parameters +solver.parallel_solve(training_instances, n_jobs=4) +solver.fit() +solver.save("/tmp/data.bin") + +# Test phase... +solver = LearningSolver(...) # test solver parameters +solver.load("/tmp/data.bin") +solver.solve(test_instance) +``` + + Current Limitations ------------------- diff --git a/miplearn/solvers.py b/miplearn/solvers.py index 6b79560..c616041 100644 --- a/miplearn/solvers.py +++ b/miplearn/solvers.py @@ -6,8 +6,12 @@ from .transformers import PerVariableTransformer from .warmstart import KnnWarmStartPredictor import pyomo.environ as pe import numpy as np -from copy import deepcopy +from copy import copy, deepcopy import pickle +from tqdm import tqdm +from joblib import Parallel, delayed +import multiprocessing + class LearningSolver: @@ -78,6 +82,29 @@ class LearningSolver: self.y_train[category] = y else: self.y_train[category] = np.vstack([self.y_train[category], y]) + + def parallel_solve(self, instances, n_jobs=4): + def _process(instance): + solver = copy(self) + solver.solve(instance) + return solver.x_train, solver.y_train + + def _merge(results): + categories = results[0][0].keys() + x_entries = [np.vstack([r[0][c] for r in results]) for c in categories] + y_entries = [np.vstack([r[1][c] for r in results]) for c in categories] + x_train = dict(zip(categories, x_entries)) + y_train = dict(zip(categories, y_entries)) + return x_train, y_train + + results = Parallel(n_jobs=n_jobs)( + delayed(_process)(i) + for i in tqdm(instances) + ) + + x_train, y_train = _merge(results) + self.x_train = x_train + self.y_train = y_train def fit(self, x_train_dict=None, y_train_dict=None): if x_train_dict is None: diff --git a/miplearn/tests/test_solver.py b/miplearn/tests/test_solver.py index 7eb0711..d14358a 100644 --- a/miplearn/tests/test_solver.py +++ b/miplearn/tests/test_solver.py @@ -4,6 +4,7 @@ from miplearn import LearningSolver from miplearn.problems.knapsack import KnapsackInstance2 +import numpy as np def test_solver(): @@ -30,4 +31,13 @@ def test_solve_save_load(): solver.load("/tmp/knapsack_train.bin") assert len(solver.x_train) == prev_x_train_len assert len(solver.y_train) == prev_y_train_len - \ No newline at end of file + +def test_parallel_solve(): + instances = [KnapsackInstance2(weights=np.random.rand(5), + prices=np.random.rand(5), + capacity=3.0) + for _ in range(10)] + solver = LearningSolver() + solver.parallel_solve(instances, n_jobs=2) + assert len(solver.x_train[0]) == 10 + assert len(solver.y_train[0]) == 10 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 7a573c5..67c30d6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,5 @@ pyomo numpy pytest sklearn -networkx \ No newline at end of file +networkx +tqdm \ No newline at end of file diff --git a/setup.py b/setup.py index 9dc10a2..dabfb5e 100644 --- a/setup.py +++ b/setup.py @@ -1,11 +1,17 @@ from setuptools import setup setup( - name='miplearn', - version='0.1', - description='A Machine-Learning Framework for Mixed-Integer Optimization', - author='Alinson S. Xavier', - author_email='axavier@anl.gov', - packages=['miplearn'], - install_requires=['pyomo', 'numpy', 'sklearn', 'networkx'], + name='miplearn', + version='0.1', + description='A Machine-Learning Framework for Mixed-Integer Optimization', + author='Alinson S. Xavier', + author_email='axavier@anl.gov', + packages=['miplearn'], + install_requires=[ + 'pyomo', + 'numpy', + 'sklearn', + 'networkx', + 'tqdm', + ], ) \ No newline at end of file