LearningSolver: add method for solving training instances in parallel

pull/1/head
Alinson S. Xavier 6 years ago
parent 5817f273e9
commit fa847f76ec

@ -101,6 +101,26 @@ for instance in more_instances:
In addition to storing the training data, `solver.save` also serializes and stores all trained ML models themselves, so it is not necessary to call `solver.fit`.
### Solving training instances in parallel
In many situations, training instances can be solved in parallel to accelerate the training process. `LearningSolver` provides the method `parallel_solve(instances)` to easily achieve this. After all instances have been solved, the ML models can be trained and saved to disk as usual, as the next example illustrates:
```python
from miplearn import LearningSolver
# Training phase...
solver = LearningSolver(...) # training solver parameters
solver.parallel_solve(training_instances, n_jobs=4)
solver.fit()
solver.save("/tmp/data.bin")
# Test phase...
solver = LearningSolver(...) # test solver parameters
solver.load("/tmp/data.bin")
solver.solve(test_instance)
```
Current Limitations
-------------------

@ -6,8 +6,12 @@ from .transformers import PerVariableTransformer
from .warmstart import KnnWarmStartPredictor
import pyomo.environ as pe
import numpy as np
from copy import deepcopy
from copy import copy, deepcopy
import pickle
from tqdm import tqdm
from joblib import Parallel, delayed
import multiprocessing
class LearningSolver:
@ -79,6 +83,29 @@ class LearningSolver:
else:
self.y_train[category] = np.vstack([self.y_train[category], y])
def parallel_solve(self, instances, n_jobs=4):
def _process(instance):
solver = copy(self)
solver.solve(instance)
return solver.x_train, solver.y_train
def _merge(results):
categories = results[0][0].keys()
x_entries = [np.vstack([r[0][c] for r in results]) for c in categories]
y_entries = [np.vstack([r[1][c] for r in results]) for c in categories]
x_train = dict(zip(categories, x_entries))
y_train = dict(zip(categories, y_entries))
return x_train, y_train
results = Parallel(n_jobs=n_jobs)(
delayed(_process)(i)
for i in tqdm(instances)
)
x_train, y_train = _merge(results)
self.x_train = x_train
self.y_train = y_train
def fit(self, x_train_dict=None, y_train_dict=None):
if x_train_dict is None:
x_train_dict = self.x_train

@ -4,6 +4,7 @@
from miplearn import LearningSolver
from miplearn.problems.knapsack import KnapsackInstance2
import numpy as np
def test_solver():
@ -31,3 +32,12 @@ def test_solve_save_load():
assert len(solver.x_train) == prev_x_train_len
assert len(solver.y_train) == prev_y_train_len
def test_parallel_solve():
instances = [KnapsackInstance2(weights=np.random.rand(5),
prices=np.random.rand(5),
capacity=3.0)
for _ in range(10)]
solver = LearningSolver()
solver.parallel_solve(instances, n_jobs=2)
assert len(solver.x_train[0]) == 10
assert len(solver.y_train[0]) == 10

@ -3,3 +3,4 @@ numpy
pytest
sklearn
networkx
tqdm

@ -1,11 +1,17 @@
from setuptools import setup
setup(
name='miplearn',
version='0.1',
description='A Machine-Learning Framework for Mixed-Integer Optimization',
author='Alinson S. Xavier',
author_email='axavier@anl.gov',
packages=['miplearn'],
install_requires=['pyomo', 'numpy', 'sklearn', 'networkx'],
name='miplearn',
version='0.1',
description='A Machine-Learning Framework for Mixed-Integer Optimization',
author='Alinson S. Xavier',
author_email='axavier@anl.gov',
packages=['miplearn'],
install_requires=[
'pyomo',
'numpy',
'sklearn',
'networkx',
'tqdm',
],
)
Loading…
Cancel
Save