Replace InstanceIterator by PickleGzInstance

master
Alinson S. Xavier 5 years ago
parent b4770c6c0a
commit 08e808690e
No known key found for this signature in database
GPG Key ID: DCA0DAD4D2F58624

@ -40,6 +40,8 @@ from miplearn import (
BenchmarkRunner, BenchmarkRunner,
GurobiPyomoSolver, GurobiPyomoSolver,
setup_logger, setup_logger,
PickleGzInstance,
write_pickle_gz_multiple,
) )
setup_logger() setup_logger()
@ -48,24 +50,6 @@ logging.getLogger("pyomo.core").setLevel(logging.ERROR)
logger = logging.getLogger("benchmark") logger = logging.getLogger("benchmark")
def write_pickle_gz(obj, filename):
logger.info(f"Writing: {filename}")
os.makedirs(os.path.dirname(filename), exist_ok=True)
with gzip.GzipFile(filename, "wb") as file:
pickle.dump(obj, file)
def read_pickle_gz(filename):
logger.info(f"Reading: {filename}")
with gzip.GzipFile(filename, "rb") as file:
return pickle.load(file)
def write_pickle_gz_multiple(objs, dirname):
for (i, obj) in enumerate(objs):
write_pickle_gz(obj, f"{dirname}/{i:05d}.pkl.gz")
def train(args): def train(args):
basepath = args["<challenge>"] basepath = args["<challenge>"]
problem_name, challenge_name = args["<challenge>"].split("/") problem_name, challenge_name = args["<challenge>"].split("/")
@ -78,7 +62,9 @@ def train(args):
done_filename = f"{basepath}/train/done" done_filename = f"{basepath}/train/done"
if not os.path.isfile(done_filename): if not os.path.isfile(done_filename):
train_instances = glob.glob(f"{basepath}/train/*.gz") train_instances = [
PickleGzInstance(f) for f in glob.glob(f"{basepath}/train/*.gz")
]
solver = LearningSolver( solver = LearningSolver(
solver=lambda: GurobiPyomoSolver( solver=lambda: GurobiPyomoSolver(
params={ params={
@ -96,7 +82,7 @@ def train(args):
def test_baseline(args): def test_baseline(args):
basepath = args["<challenge>"] basepath = args["<challenge>"]
test_instances = glob.glob(f"{basepath}/test/*.gz") test_instances = [PickleGzInstance(f) for f in glob.glob(f"{basepath}/test/*.gz")]
csv_filename = f"{basepath}/benchmark_baseline.csv" csv_filename = f"{basepath}/benchmark_baseline.csv"
if not os.path.isfile(csv_filename): if not os.path.isfile(csv_filename):
solvers = { solvers = {
@ -119,8 +105,8 @@ def test_baseline(args):
def test_ml(args): def test_ml(args):
basepath = args["<challenge>"] basepath = args["<challenge>"]
test_instances = glob.glob(f"{basepath}/test/*.gz") test_instances = [PickleGzInstance(f) for f in glob.glob(f"{basepath}/test/*.gz")]
train_instances = glob.glob(f"{basepath}/train/*.gz") train_instances = [PickleGzInstance(f) for f in glob.glob(f"{basepath}/train/*.gz")]
csv_filename = f"{basepath}/benchmark_ml.csv" csv_filename = f"{basepath}/benchmark_ml.csv"
if not os.path.isfile(csv_filename): if not os.path.isfile(csv_filename):
solvers = { solvers = {

@ -119,11 +119,10 @@ For more significant performance benefits, `LearningSolver` can also be configur
### 6.1 Saving and loading solver state ### 6.1 Saving and loading solver state
After solving a large number of training instances, it may be desirable to save the current state of `LearningSolver` to disk, so that the solver can still use the acquired knowledge after the application restarts. This can be accomplished by using the standard `pickle` module, as the following example illustrates: After solving a large number of training instances, it may be desirable to save the current state of `LearningSolver` to disk, so that the solver can still use the acquired knowledge after the application restarts. This can be accomplished by using the the utility functions `write_pickle_gz` and `read_pickle_gz`, as the following example illustrates:
```python ```python
from miplearn import LearningSolver from miplearn import LearningSolver, write_pickle_gz, read_pickle_gz
import pickle
# Solve training instances # Solve training instances
training_instances = [...] training_instances = [...]
@ -135,14 +134,12 @@ for instance in training_instances:
solver.fit(training_instances) solver.fit(training_instances)
# Save trained solver to disk # Save trained solver to disk
with open("solver.pickle", "wb") as file: write_pickle_gz(solver, "solver.pkl.gz")
pickle.dump(solver, file)
# Application restarts... # Application restarts...
# Load trained solver from disk # Load trained solver from disk
with open("solver.pickle", "rb") as file: solver = read_pickle_gz("solver.pkl.gz")
solver = pickle.load(file)
# Solve additional instances # Solve additional instances
test_instances = [...] test_instances = [...]
@ -171,23 +168,24 @@ solver.parallel_solve(test_instances)
### 6.3 Solving instances from the disk ### 6.3 Solving instances from the disk
In all examples above, we have assumed that instances are available as Python objects, stored in memory. When problem instances are very large, or when there is a large number of problem instances, this approach may require an excessive amount of memory. To reduce memory requirements, MIPLearn can also operate on instances that are stored on disk. More precisely, the methods `fit`, `solve` and `parallel_solve` in `LearningSolver` can operate on filenames (or lists of filenames) instead of instance objects, as the next example illustrates. In all examples above, we have assumed that instances are available as Python objects, stored in memory. When problem instances are very large, or when there is a large number of problem instances, this approach may require an excessive amount of memory. To reduce memory requirements, MIPLearn can also operate on instances that are stored on disk, through the `PickleGzInstance` class, as the next example illustrates.
Instance files must be pickled instance objects. The method `solve` loads at most one instance to memory at a time, while `parallel_solve` loads at most `n_jobs` instances.
```python ```python
import pickle import pickle
from miplearn import LearningSolver from miplearn import (
LearningSolver,
PickleGzInstance,
write_pickle_gz,
)
# Construct and pickle 600 problem instances # Construct and pickle 600 problem instances
for i in range(600): for i in range(600):
instance = MyProblemInstance([...]) instance = MyProblemInstance([...])
with open("instance_%03d.pkl" % i, "w") as file: write_pickle_gz(instance, "instance_%03d.pkl" % i)
pickle.dump(instance, obj)
# Split instances into training and test # Split instances into training and test
test_instances = ["instance_%03d.pkl" % i for i in range(500)] test_instances = [PickleGzInstance("instance_%03d.pkl" % i) for i in range(500)]
train_instances = ["instance_%03d.pkl" % i for i in range(500, 600)] train_instances = [PickleGzInstance("instance_%03d.pkl" % i) for i in range(500, 600)]
# Create solver # Create solver
solver = LearningSolver([...]) solver = LearningSolver([...])
@ -203,20 +201,7 @@ solver.parallel_solve(test_instances, n_jobs=4)
``` ```
By default, `solve` and `parallel_solve` modify files in place. That is, after the instances are loaded from disk and solved, MIPLearn writes them back to the disk, overwriting the original files. To write to an alternative file instead, use the arguments `output_filename` (in `solve`) and `output_filenames` (in `parallel_solve`). To discard the modifications instead, use `discard_outputs=True`. This can be useful, for example, during benchmarks. By default, `solve` and `parallel_solve` modify files in place. That is, after the instances are loaded from disk and solved, MIPLearn writes them back to the disk, overwriting the original files. To discard the modifications instead, use `LearningSolver(..., discard_outputs=True)`. This can be useful, for example, during benchmarks.
```python
# Solve a single instance file and write the output to another file
solver.solve("knapsack_1.orig.pkl", output_filename="knapsack_1.solved.pkl")
# Solve a list of instance files
instances = ["knapsack_%03d.orig.pkl" % i for i in range(100)]
output = ["knapsack_%03d.solved.pkl" % i for i in range(100)]
solver.parallel_solve(instances, output_filenames=output)
# Solve instances and discard solutions and training data
solver.parallel_solve(instances, discard_outputs=True)
```
## 7. Running benchmarks ## 7. Running benchmarks

@ -23,7 +23,13 @@ from .components.steps.convert_tight import ConvertTightIneqsIntoEqsStep
from .components.steps.drop_redundant import DropRedundantInequalitiesStep from .components.steps.drop_redundant import DropRedundantInequalitiesStep
from .components.steps.relax_integrality import RelaxIntegralityStep from .components.steps.relax_integrality import RelaxIntegralityStep
from .extractors import InstanceFeaturesExtractor from .extractors import InstanceFeaturesExtractor
from .instance import Instance from .instance import (
Instance,
PickleGzInstance,
write_pickle_gz,
write_pickle_gz_multiple,
read_pickle_gz,
)
from .log import setup_logger from .log import setup_logger
from .solvers.gurobi import GurobiSolver from .solvers.gurobi import GurobiSolver
from .solvers.internal import InternalSolver from .solvers.internal import InternalSolver

@ -52,7 +52,7 @@ class BenchmarkRunner:
def parallel_solve( def parallel_solve(
self, self,
instances: Union[List[str], List[Instance]], instances: List[Instance],
n_jobs: int = 1, n_jobs: int = 1,
n_trials: int = 3, n_trials: int = 3,
) -> None: ) -> None:
@ -61,7 +61,7 @@ class BenchmarkRunner:
Parameters Parameters
---------- ----------
instances: Union[List[str], List[Instance]] instances: List[Instance]
List of instances to solve. This can either be a list of instances List of instances to solve. This can either be a list of instances
already loaded in memory, or a list of filenames pointing to pickled (and already loaded in memory, or a list of filenames pointing to pickled (and
optionally gzipped) files. optionally gzipped) files.
@ -98,17 +98,14 @@ class BenchmarkRunner:
os.makedirs(os.path.dirname(filename), exist_ok=True) os.makedirs(os.path.dirname(filename), exist_ok=True)
self.results.to_csv(filename) self.results.to_csv(filename)
def fit(self, instances: Union[List[str], List[Instance]]) -> None: def fit(self, instances: List[Instance]) -> None:
""" """
Trains all solvers with the provided training instances. Trains all solvers with the provided training instances.
Parameters Parameters
---------- ----------
instances: Union[List[str], List[Instance]] instances: List[Instance]
List of training instances. This can either be a list of instances List of training instances.
already loaded in memory, or a list of filenames pointing to pickled (and
optionally gzipped) files.
""" """
for (solver_name, solver) in self.solvers.items(): for (solver_name, solver) in self.solvers.items():
logger.debug(f"Fitting {solver_name}...") logger.debug(f"Fitting {solver_name}...")

@ -2,10 +2,10 @@
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved. # Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
# Released under the modified BSD license. See COPYING.md for more details. # Released under the modified BSD license. See COPYING.md for more details.
from typing import Any, List, TYPE_CHECKING, Tuple, Dict, Hashable
import numpy as np import numpy as np
from typing import Any, List, Union, TYPE_CHECKING, Tuple, Dict, Optional, Hashable
from miplearn.extractors import InstanceIterator
from miplearn.instance import Instance from miplearn.instance import Instance
from miplearn.types import LearningSolveStats, TrainingSample, Features from miplearn.types import LearningSolveStats, TrainingSample, Features
@ -120,11 +120,11 @@ class Component:
def xy_instances( def xy_instances(
self, self,
instances: Union[List[str], List[Instance]], instances: List[Instance],
) -> Tuple[Dict, Dict]: ) -> Tuple[Dict, Dict]:
x_combined: Dict = {} x_combined: Dict = {}
y_combined: Dict = {} y_combined: Dict = {}
for instance in InstanceIterator(instances): for instance in instances:
assert isinstance(instance, Instance) assert isinstance(instance, Instance)
for sample in instance.training_data: for sample in instance.training_data:
xy = self.sample_xy(instance.features, sample) xy = self.sample_xy(instance.features, sample)
@ -141,7 +141,7 @@ class Component:
def fit( def fit(
self, self,
training_instances: Union[List[str], List[Instance]], training_instances: List[Instance],
) -> None: ) -> None:
x, y = self.xy_instances(training_instances) x, y = self.xy_instances(training_instances)
for cat in x.keys(): for cat in x.keys():
@ -198,9 +198,9 @@ class Component:
) -> None: ) -> None:
return return
def evaluate(self, instances: Union[List[str], List[Instance]]) -> List: def evaluate(self, instances: List[Instance]) -> List:
ev = [] ev = []
for instance in InstanceIterator(instances): for instance in instances:
for sample in instance.training_data: for sample in instance.training_data:
ev += [self.sample_evaluate(instance.features, sample)] ev += [self.sample_evaluate(instance.features, sample)]
return ev return ev

@ -13,7 +13,7 @@ from miplearn.classifiers import Classifier
from miplearn.classifiers.counting import CountingClassifier from miplearn.classifiers.counting import CountingClassifier
from miplearn.components import classifier_evaluation_dict from miplearn.components import classifier_evaluation_dict
from miplearn.components.component import Component from miplearn.components.component import Component
from miplearn.extractors import InstanceFeaturesExtractor, InstanceIterator from miplearn.extractors import InstanceFeaturesExtractor
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -68,7 +68,7 @@ class DynamicLazyConstraintsComponent(Component):
self.classifiers = {} self.classifiers = {}
violation_to_instance_idx = {} violation_to_instance_idx = {}
for (idx, instance) in enumerate(InstanceIterator(training_instances)): for (idx, instance) in enumerate(training_instances):
for v in instance.found_violated_lazy_constraints: for v in instance.found_violated_lazy_constraints:
if isinstance(v, list): if isinstance(v, list):
v = tuple(v) v = tuple(v)

@ -3,22 +3,14 @@
# Released under the modified BSD license. See COPYING.md for more details. # Released under the modified BSD license. See COPYING.md for more details.
import logging import logging
from typing import List, Dict, Union, Optional, Any, TYPE_CHECKING, Tuple, Hashable from typing import List, Dict, Any, TYPE_CHECKING, Tuple, Hashable
import numpy as np import numpy as np
from sklearn.linear_model import LinearRegression from sklearn.linear_model import LinearRegression
from sklearn.metrics import (
mean_squared_error,
explained_variance_score,
max_error,
mean_absolute_error,
r2_score,
)
from miplearn.classifiers import Regressor from miplearn.classifiers import Regressor
from miplearn.classifiers.sklearn import ScikitLearnRegressor from miplearn.classifiers.sklearn import ScikitLearnRegressor
from miplearn.components.component import Component from miplearn.components.component import Component
from miplearn.extractors import InstanceIterator
from miplearn.instance import Instance from miplearn.instance import Instance
from miplearn.types import TrainingSample, LearningSolveStats, Features from miplearn.types import TrainingSample, LearningSolveStats, Features

@ -13,7 +13,6 @@ from miplearn.classifiers.counting import CountingClassifier
from miplearn.components import classifier_evaluation_dict from miplearn.components import classifier_evaluation_dict
from miplearn.components.component import Component from miplearn.components.component import Component
from miplearn.components.steps.drop_redundant import DropRedundantInequalitiesStep from miplearn.components.steps.drop_redundant import DropRedundantInequalitiesStep
from miplearn.extractors import InstanceIterator
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -116,7 +115,7 @@ class ConvertTightIneqsIntoEqsStep(Component):
def _x_train(instances): def _x_train(instances):
x = {} x = {}
for instance in tqdm( for instance in tqdm(
InstanceIterator(instances), instances,
desc="Extract (drop:x)", desc="Extract (drop:x)",
disable=len(instances) < 5, disable=len(instances) < 5,
): ):
@ -139,7 +138,7 @@ class ConvertTightIneqsIntoEqsStep(Component):
def y(self, instances): def y(self, instances):
y = {} y = {}
for instance in tqdm( for instance in tqdm(
InstanceIterator(instances), instances,
desc="Extract (rlx:conv_ineqs:y)", desc="Extract (rlx:conv_ineqs:y)",
disable=len(instances) < 5, disable=len(instances) < 5,
): ):

@ -6,14 +6,13 @@ import logging
from copy import deepcopy from copy import deepcopy
import numpy as np import numpy as np
from tqdm import tqdm
from p_tqdm import p_umap from p_tqdm import p_umap
from tqdm import tqdm
from miplearn.classifiers.counting import CountingClassifier from miplearn.classifiers.counting import CountingClassifier
from miplearn.components import classifier_evaluation_dict from miplearn.components import classifier_evaluation_dict
from miplearn.components.component import Component from miplearn.components.component import Component
from miplearn.components.lazy_static import LazyConstraint from miplearn.components.lazy_static import LazyConstraint
from miplearn.extractors import InstanceIterator
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -131,7 +130,6 @@ class DropRedundantInequalitiesStep(Component):
def _extract(instance): def _extract(instance):
x = {} x = {}
y = {} y = {}
for instance in InstanceIterator([instance]):
for training_data in instance.training_data: for training_data in instance.training_data:
for (cid, slack) in training_data["slacks"].items(): for (cid, slack) in training_data["slacks"].items():
category = instance.get_constraint_category(cid) category = instance.get_constraint_category(cid)
@ -149,13 +147,7 @@ class DropRedundantInequalitiesStep(Component):
return x, y return x, y
if n_jobs == 1: if n_jobs == 1:
results = [ results = [_extract(i) for i in tqdm(instances, desc="Extract (drop 1/3)")]
_extract(i)
for i in tqdm(
instances,
desc="Extract (drop 1/3)",
)
]
else: else:
results = p_umap( results = p_umap(
_extract, _extract,

@ -2,51 +2,14 @@
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved. # Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
# Released under the modified BSD license. See COPYING.md for more details. # Released under the modified BSD license. See COPYING.md for more details.
import gzip
import logging import logging
import pickle
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import List, Union, cast, IO
import numpy as np import numpy as np
from tqdm.auto import tqdm
from miplearn.instance import Instance
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class InstanceIterator:
def __init__(
self,
instances: Union[List[str], List[Instance]],
) -> None:
self.instances = instances
self.current = 0
def __iter__(self):
return self
def __next__(self) -> Instance:
if self.current >= len(self.instances):
raise StopIteration
result = self.instances[self.current]
self.current += 1
if isinstance(result, str):
logger.debug("Read: %s" % result)
try:
if result.endswith(".gz"):
with gzip.GzipFile(result, "rb") as gzfile:
result = pickle.load(cast(IO[bytes], gzfile))
else:
with open(result, "rb") as file:
result = pickle.load(cast(IO[bytes], file))
except pickle.UnpicklingError:
raise Exception(f"Invalid instance file: {result}")
assert isinstance(result, Instance)
return result
class Extractor(ABC): class Extractor(ABC):
@abstractmethod @abstractmethod
def extract(self, instances): def extract(self, instances):
@ -77,6 +40,6 @@ class InstanceFeaturesExtractor(Extractor):
instance.training_data[0]["LP value"], instance.training_data[0]["LP value"],
] ]
) )
for instance in InstanceIterator(instances) for instance in instances
] ]
) )

@ -3,14 +3,34 @@
# Released under the modified BSD license. See COPYING.md for more details. # Released under the modified BSD license. See COPYING.md for more details.
import gzip import gzip
import json import logging
import os
import pickle
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import Any, List, Optional, Hashable from typing import Any, List, Optional, Hashable, IO, cast
import numpy as np
from miplearn.types import TrainingSample, VarIndex, Features from miplearn.types import TrainingSample, VarIndex, Features
logger = logging.getLogger(__name__)
def write_pickle_gz(obj: Any, filename: str) -> None:
logger.info(f"Writing: {filename}")
os.makedirs(os.path.dirname(filename), exist_ok=True)
with gzip.GzipFile(filename, "wb") as file:
pickle.dump(obj, cast(IO[bytes], file))
def read_pickle_gz(filename: str) -> Any:
logger.info(f"Reading: {filename}")
with gzip.GzipFile(filename, "rb") as file:
return pickle.load(cast(IO[bytes], file))
def write_pickle_gz_multiple(objs: List[Any], dirname: str) -> None:
for (i, obj) in enumerate(objs):
write_pickle_gz(obj, f"{dirname}/{i:05d}.pkl.gz")
# noinspection PyMethodMayBeStatic # noinspection PyMethodMayBeStatic
class Instance(ABC): class Instance(ABC):
@ -155,12 +175,116 @@ class Instance(ABC):
def build_user_cut(self, model, violation): def build_user_cut(self, model, violation):
pass pass
def load(self, filename): def flush(self) -> None:
with gzip.GzipFile(filename, "r") as f: """
data = json.loads(f.read().decode("utf-8")) Save any pending changes made to the instance to the underlying data store.
self.__dict__ = data """
pass
def lazy_load(func):
def inner(self, *args):
if self.instance is None:
self.instance = self._load()
self.features = self.instance.features
self.training_data = self.instance.training_data
return func(self, *args)
return inner
class PickleGzInstance(Instance):
"""
An instance backed by a gzipped pickle file.
The instance is only loaded to memory after an operation is called (for example,
`to_model`).
Parameters
----------
filename: str
Path of the gzipped pickle file that should be loaded.
"""
def __init__(self, filename: str) -> None:
super().__init__()
assert os.path.exists(filename), f"File not found: {filename}"
self.instance: Optional[Instance] = None
self.filename: str = filename
@lazy_load
def to_model(self) -> Any:
assert self.instance is not None
return self.instance.to_model()
@lazy_load
def get_instance_features(self) -> List[float]:
assert self.instance is not None
return self.instance.get_instance_features()
@lazy_load
def get_variable_features(self, var_name: str, index: VarIndex) -> List[float]:
assert self.instance is not None
return self.instance.get_variable_features(var_name, index)
@lazy_load
def get_variable_category(
self,
var_name: str,
index: VarIndex,
) -> Optional[Hashable]:
assert self.instance is not None
return self.instance.get_variable_category(var_name, index)
@lazy_load
def get_constraint_features(self, cid: str) -> Optional[List[float]]:
assert self.instance is not None
return self.instance.get_constraint_features(cid)
@lazy_load
def get_constraint_category(self, cid: str) -> Optional[str]:
assert self.instance is not None
return self.instance.get_constraint_category(cid)
@lazy_load
def has_static_lazy_constraints(self) -> bool:
assert self.instance is not None
return self.instance.has_static_lazy_constraints()
@lazy_load
def has_dynamic_lazy_constraints(self):
assert self.instance is not None
return self.instance.has_dynamic_lazy_constraints()
@lazy_load
def is_constraint_lazy(self, cid: str) -> bool:
assert self.instance is not None
return self.instance.is_constraint_lazy(cid)
@lazy_load
def find_violated_lazy_constraints(self, model):
assert self.instance is not None
return self.instance.find_violated_lazy_constraints(model)
@lazy_load
def build_lazy_constraint(self, model, violation):
assert self.instance is not None
return self.instance.build_lazy_constraint(model, violation)
@lazy_load
def find_violated_user_cuts(self, model):
assert self.instance is not None
return self.instance.find_violated_user_cuts(model)
@lazy_load
def build_user_cut(self, model, violation):
assert self.instance is not None
return self.instance.build_user_cut(model, violation)
def _load(self) -> Instance:
obj = read_pickle_gz(self.filename)
assert isinstance(obj, Instance)
return obj
def dump(self, filename): def flush(self) -> None:
data = json.dumps(self.__dict__, indent=2).encode("utf-8") write_pickle_gz(self.instance, self.filename)
with gzip.GzipFile(filename, "w") as f:
f.write(data)

@ -18,7 +18,7 @@ from miplearn.components.lazy_dynamic import DynamicLazyConstraintsComponent
from miplearn.components.objective import ObjectiveValueComponent from miplearn.components.objective import ObjectiveValueComponent
from miplearn.components.primal import PrimalSolutionComponent from miplearn.components.primal import PrimalSolutionComponent
from miplearn.features import FeaturesExtractor from miplearn.features import FeaturesExtractor
from miplearn.instance import Instance from miplearn.instance import Instance, PickleGzInstance
from miplearn.solvers import _RedirectOutput from miplearn.solvers import _RedirectOutput
from miplearn.solvers.internal import InternalSolver from miplearn.solvers.internal import InternalSolver
from miplearn.solvers.pyomo.gurobi import GurobiPyomoSolver from miplearn.solvers.pyomo.gurobi import GurobiPyomoSolver
@ -30,8 +30,7 @@ logger = logging.getLogger(__name__)
class _GlobalVariables: class _GlobalVariables:
def __init__(self) -> None: def __init__(self) -> None:
self.solver: Optional[LearningSolver] = None self.solver: Optional[LearningSolver] = None
self.instances: Optional[Union[List[str], List[Instance]]] = None self.instances: Optional[List[Instance]] = None
self.output_filenames: Optional[List[str]] = None
self.discard_outputs: bool = False self.discard_outputs: bool = False
@ -44,16 +43,10 @@ _GLOBAL = [_GlobalVariables()]
def _parallel_solve(idx): def _parallel_solve(idx):
solver = _GLOBAL[0].solver solver = _GLOBAL[0].solver
instances = _GLOBAL[0].instances instances = _GLOBAL[0].instances
output_filenames = _GLOBAL[0].output_filenames
discard_outputs = _GLOBAL[0].discard_outputs discard_outputs = _GLOBAL[0].discard_outputs
if output_filenames is None:
output_filename = None
else:
output_filename = output_filenames[idx]
try: try:
stats = solver.solve( stats = solver.solve(
instances[idx], instances[idx],
output_filename=output_filename,
discard_output=discard_outputs, discard_output=discard_outputs,
) )
return stats, instances[idx] return stats, instances[idx]
@ -129,30 +122,12 @@ class LearningSolver:
def _solve( def _solve(
self, self,
instance: Union[Instance, str], instance: Instance,
model: Any = None, model: Any = None,
output_filename: Optional[str] = None,
discard_output: bool = False, discard_output: bool = False,
tee: bool = False, tee: bool = False,
) -> LearningSolveStats: ) -> LearningSolveStats:
# Load instance from file, if necessary
filename = None
fileformat = None
file: Union[BinaryIO, gzip.GzipFile]
if isinstance(instance, str):
filename = instance
logger.info("Reading: %s" % filename)
if filename.endswith(".gz"):
fileformat = "pickle-gz"
with gzip.GzipFile(filename, "rb") as file:
instance = pickle.load(cast(IO[bytes], file))
else:
fileformat = "pickle"
with open(filename, "rb") as file:
instance = pickle.load(cast(IO[bytes], file))
assert isinstance(instance, Instance)
# Generate model # Generate model
if model is None: if model is None:
with _RedirectOutput([]): with _RedirectOutput([]):
@ -262,23 +237,15 @@ class LearningSolver:
component.after_solve_mip(*callback_args) component.after_solve_mip(*callback_args)
# Write to file, if necessary # Write to file, if necessary
if not discard_output and filename is not None: if not discard_output:
if output_filename is None: instance.flush()
output_filename = filename
logger.info("Writing: %s" % output_filename)
if fileformat == "pickle":
with open(output_filename, "wb") as file:
pickle.dump(instance, cast(IO[bytes], file))
else:
with gzip.GzipFile(output_filename, "wb") as file:
pickle.dump(instance, cast(IO[bytes], file))
return stats return stats
def solve( def solve(
self, self,
instance: Union[Instance, str], instance: Instance,
model: Any = None, model: Any = None,
output_filename: Optional[str] = None,
discard_output: bool = False, discard_output: bool = False,
tee: bool = False, tee: bool = False,
) -> LearningSolveStats: ) -> LearningSolveStats:
@ -298,14 +265,10 @@ class LearningSolver:
Parameters Parameters
---------- ----------
instance: Union[Instance, str] instance: Instance
The instance to be solved, or a filename. The instance to be solved.
model: Any model: Any
The corresponding Pyomo model. If not provided, it will be created. The corresponding Pyomo model. If not provided, it will be created.
output_filename: Optional[str]
If instance is a filename and output_filename is provided, write the
modified instance to this file, instead of replacing the original one. If
output_filename is None (the default), modified the original file in-place.
discard_output: bool discard_output: bool
If True, do not write the modified instances anywhere; simply discard If True, do not write the modified instances anywhere; simply discard
them. Useful during benchmarking. them. Useful during benchmarking.
@ -325,30 +288,28 @@ class LearningSolver:
details. details.
""" """
if self.simulate_perfect: if self.simulate_perfect:
if not isinstance(instance, str): if not isinstance(instance, PickleGzInstance):
raise Exception("Not implemented") raise Exception("Not implemented")
with tempfile.NamedTemporaryFile(suffix=os.path.basename(instance)) as tmp:
self._solve( self._solve(
instance=instance, instance=instance,
model=model, model=model,
output_filename=tmp.name,
tee=tee, tee=tee,
discard_output=True,
) )
self.fit([tmp.name]) self.fit([instance])
instance.instance = None
return self._solve( return self._solve(
instance=instance, instance=instance,
model=model, model=model,
output_filename=output_filename,
discard_output=discard_output, discard_output=discard_output,
tee=tee, tee=tee,
) )
def parallel_solve( def parallel_solve(
self, self,
instances: Union[List[str], List[Instance]], instances: List[Instance],
n_jobs: int = 4, n_jobs: int = 4,
label: str = "Solve", label: str = "Solve",
output_filenames: Optional[List[str]] = None,
discard_outputs: bool = False, discard_outputs: bool = False,
) -> List[LearningSolveStats]: ) -> List[LearningSolveStats]:
""" """
@ -361,17 +322,13 @@ class LearningSolver:
Parameters Parameters
---------- ----------
output_filenames: Optional[List[str]]
If instances are file names and output_filenames is provided, write the
modified instances to these files, instead of replacing the original
files. If output_filenames is None, modifies the instances in-place.
discard_outputs: bool discard_outputs: bool
If True, do not write the modified instances anywhere; simply discard If True, do not write the modified instances anywhere; simply discard
them instead. Useful during benchmarking. them instead. Useful during benchmarking.
label: str label: str
Label to show in the progress bar. Label to show in the progress bar.
instances: Union[List[str], List[Instance]] instances: List[Instance]
The instances to be solved The instances to be solved.
n_jobs: int n_jobs: int
Number of instances to solve in parallel at a time. Number of instances to solve in parallel at a time.
@ -388,7 +345,6 @@ class LearningSolver:
self.internal_solver = None self.internal_solver = None
self._silence_miplearn_logger() self._silence_miplearn_logger()
_GLOBAL[0].solver = self _GLOBAL[0].solver = self
_GLOBAL[0].output_filenames = output_filenames
_GLOBAL[0].instances = instances _GLOBAL[0].instances = instances
_GLOBAL[0].discard_outputs = discard_outputs _GLOBAL[0].discard_outputs = discard_outputs
results = p_map( results = p_map(
@ -405,7 +361,7 @@ class LearningSolver:
self._restore_miplearn_logger() self._restore_miplearn_logger()
return stats return stats
def fit(self, training_instances: Union[List[str], List[Instance]]) -> None: def fit(self, training_instances: List[Instance]) -> None:
logger.debug("Fitting...") logger.debug("Fitting...")
if len(training_instances) == 0: if len(training_instances) == 0:
return return

@ -8,6 +8,7 @@ import pickle
import tempfile import tempfile
import os import os
from miplearn.instance import PickleGzInstance, write_pickle_gz, read_pickle_gz
from miplearn.solvers.gurobi import GurobiSolver from miplearn.solvers.gurobi import GurobiSolver
from miplearn.solvers.learning import LearningSolver from miplearn.solvers.learning import LearningSolver
from . import _get_knapsack_instance, get_internal_solvers from . import _get_knapsack_instance, get_internal_solvers
@ -78,61 +79,40 @@ def test_parallel_solve():
def test_solve_fit_from_disk(): def test_solve_fit_from_disk():
for internal_solver in get_internal_solvers(): for internal_solver in get_internal_solvers():
# Create instances and pickle them # Create instances and pickle them
filenames = [] instances = []
for k in range(3): for k in range(3):
instance = _get_knapsack_instance(internal_solver) instance = _get_knapsack_instance(internal_solver)
with tempfile.NamedTemporaryFile(suffix=".pkl", delete=False) as file: with tempfile.NamedTemporaryFile(suffix=".pkl", delete=False) as file:
filenames += [file.name] instances += [PickleGzInstance(file.name)]
pickle.dump(instance, file) write_pickle_gz(instance, file.name)
# Test: solve # Test: solve
solver = LearningSolver(solver=internal_solver) solver = LearningSolver(solver=internal_solver)
solver.solve(filenames[0]) solver.solve(instances[0])
with open(filenames[0], "rb") as file: instance_loaded = read_pickle_gz(instances[0].filename)
instance = pickle.load(file) assert len(instance_loaded.training_data) > 0
assert len(instance.training_data) > 0
# Test: parallel_solve # Test: parallel_solve
solver.parallel_solve(filenames) solver.parallel_solve(instances)
for filename in filenames: for instance in instances:
with open(filename, "rb") as file: instance_loaded = read_pickle_gz(instance.filename)
instance = pickle.load(file)
assert len(instance.training_data) > 0 assert len(instance.training_data) > 0
# Test: solve (with specified output)
output = [f + ".out" for f in filenames]
solver.solve(
filenames[0],
output_filename=output[0],
)
assert os.path.isfile(output[0])
# Test: parallel_solve (with specified output)
solver.parallel_solve(
filenames,
output_filenames=output,
)
for filename in output:
assert os.path.isfile(filename)
# Delete temporary files # Delete temporary files
for filename in filenames: for instance in instances:
os.remove(filename) os.remove(instance.filename)
for filename in output:
os.remove(filename)
def test_simulate_perfect(): def test_simulate_perfect():
internal_solver = GurobiSolver internal_solver = GurobiSolver
instance = _get_knapsack_instance(internal_solver) instance = _get_knapsack_instance(internal_solver)
with tempfile.NamedTemporaryFile(suffix=".pkl", delete=False) as tmp: with tempfile.NamedTemporaryFile(suffix=".pkl", delete=False) as tmp:
pickle.dump(instance, tmp) write_pickle_gz(instance, tmp.name)
tmp.flush()
solver = LearningSolver( solver = LearningSolver(
solver=internal_solver, solver=internal_solver,
simulate_perfect=True, simulate_perfect=True,
) )
stats = solver.solve(tmp.name) stats = solver.solve(PickleGzInstance(tmp.name))
assert stats["Lower bound"] == stats["Objective: Predicted lower bound"] assert stats["Lower bound"] == stats["Objective: Predicted lower bound"]

@ -0,0 +1,16 @@
# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
# Released under the modified BSD license. See COPYING.md for more details.
import tempfile
from miplearn import GurobiSolver
from miplearn.instance import write_pickle_gz, PickleGzInstance
from tests.fixtures.knapsack import get_knapsack_instance
def test_pickled() -> None:
original = get_knapsack_instance(GurobiSolver())
file = tempfile.NamedTemporaryFile()
write_pickle_gz(original, file.name)
pickled = PickleGzInstance(file.name)
assert pickled.to_model() is not None
Loading…
Cancel
Save