From 08e808690ee571de1c00bb556eceb16d7a357022 Mon Sep 17 00:00:00 2001 From: "Alinson S. Xavier" Date: Sun, 4 Apr 2021 14:48:46 -0500 Subject: [PATCH] Replace InstanceIterator by PickleGzInstance --- benchmark/benchmark.py | 30 ++-- docs/usage.md | 43 ++---- miplearn/__init__.py | 8 +- miplearn/benchmark.py | 13 +- miplearn/components/component.py | 14 +- miplearn/components/lazy_dynamic.py | 4 +- miplearn/components/objective.py | 10 +- miplearn/components/steps/convert_tight.py | 5 +- miplearn/components/steps/drop_redundant.py | 40 +++--- miplearn/extractors.py | 39 +----- miplearn/instance.py | 148 ++++++++++++++++++-- miplearn/solvers/learning.py | 88 +++--------- tests/solvers/test_learning_solver.py | 50 ++----- tests/test_instance.py | 16 +++ 14 files changed, 252 insertions(+), 256 deletions(-) create mode 100644 tests/test_instance.py diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py index de5a619..7f2881c 100755 --- a/benchmark/benchmark.py +++ b/benchmark/benchmark.py @@ -40,6 +40,8 @@ from miplearn import ( BenchmarkRunner, GurobiPyomoSolver, setup_logger, + PickleGzInstance, + write_pickle_gz_multiple, ) setup_logger() @@ -48,24 +50,6 @@ logging.getLogger("pyomo.core").setLevel(logging.ERROR) logger = logging.getLogger("benchmark") -def write_pickle_gz(obj, filename): - logger.info(f"Writing: {filename}") - os.makedirs(os.path.dirname(filename), exist_ok=True) - with gzip.GzipFile(filename, "wb") as file: - pickle.dump(obj, file) - - -def read_pickle_gz(filename): - logger.info(f"Reading: {filename}") - with gzip.GzipFile(filename, "rb") as file: - return pickle.load(file) - - -def write_pickle_gz_multiple(objs, dirname): - for (i, obj) in enumerate(objs): - write_pickle_gz(obj, f"{dirname}/{i:05d}.pkl.gz") - - def train(args): basepath = args[""] problem_name, challenge_name = args[""].split("/") @@ -78,7 +62,9 @@ def train(args): done_filename = f"{basepath}/train/done" if not os.path.isfile(done_filename): - train_instances = glob.glob(f"{basepath}/train/*.gz") + train_instances = [ + PickleGzInstance(f) for f in glob.glob(f"{basepath}/train/*.gz") + ] solver = LearningSolver( solver=lambda: GurobiPyomoSolver( params={ @@ -96,7 +82,7 @@ def train(args): def test_baseline(args): basepath = args[""] - test_instances = glob.glob(f"{basepath}/test/*.gz") + test_instances = [PickleGzInstance(f) for f in glob.glob(f"{basepath}/test/*.gz")] csv_filename = f"{basepath}/benchmark_baseline.csv" if not os.path.isfile(csv_filename): solvers = { @@ -119,8 +105,8 @@ def test_baseline(args): def test_ml(args): basepath = args[""] - test_instances = glob.glob(f"{basepath}/test/*.gz") - train_instances = glob.glob(f"{basepath}/train/*.gz") + test_instances = [PickleGzInstance(f) for f in glob.glob(f"{basepath}/test/*.gz")] + train_instances = [PickleGzInstance(f) for f in glob.glob(f"{basepath}/train/*.gz")] csv_filename = f"{basepath}/benchmark_ml.csv" if not os.path.isfile(csv_filename): solvers = { diff --git a/docs/usage.md b/docs/usage.md index 750d1d2..93dab8c 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -119,11 +119,10 @@ For more significant performance benefits, `LearningSolver` can also be configur ### 6.1 Saving and loading solver state -After solving a large number of training instances, it may be desirable to save the current state of `LearningSolver` to disk, so that the solver can still use the acquired knowledge after the application restarts. This can be accomplished by using the standard `pickle` module, as the following example illustrates: +After solving a large number of training instances, it may be desirable to save the current state of `LearningSolver` to disk, so that the solver can still use the acquired knowledge after the application restarts. This can be accomplished by using the the utility functions `write_pickle_gz` and `read_pickle_gz`, as the following example illustrates: ```python -from miplearn import LearningSolver -import pickle +from miplearn import LearningSolver, write_pickle_gz, read_pickle_gz # Solve training instances training_instances = [...] @@ -135,14 +134,12 @@ for instance in training_instances: solver.fit(training_instances) # Save trained solver to disk -with open("solver.pickle", "wb") as file: - pickle.dump(solver, file) +write_pickle_gz(solver, "solver.pkl.gz") # Application restarts... # Load trained solver from disk -with open("solver.pickle", "rb") as file: - solver = pickle.load(file) +solver = read_pickle_gz("solver.pkl.gz") # Solve additional instances test_instances = [...] @@ -171,23 +168,24 @@ solver.parallel_solve(test_instances) ### 6.3 Solving instances from the disk -In all examples above, we have assumed that instances are available as Python objects, stored in memory. When problem instances are very large, or when there is a large number of problem instances, this approach may require an excessive amount of memory. To reduce memory requirements, MIPLearn can also operate on instances that are stored on disk. More precisely, the methods `fit`, `solve` and `parallel_solve` in `LearningSolver` can operate on filenames (or lists of filenames) instead of instance objects, as the next example illustrates. -Instance files must be pickled instance objects. The method `solve` loads at most one instance to memory at a time, while `parallel_solve` loads at most `n_jobs` instances. - +In all examples above, we have assumed that instances are available as Python objects, stored in memory. When problem instances are very large, or when there is a large number of problem instances, this approach may require an excessive amount of memory. To reduce memory requirements, MIPLearn can also operate on instances that are stored on disk, through the `PickleGzInstance` class, as the next example illustrates. ```python import pickle -from miplearn import LearningSolver +from miplearn import ( + LearningSolver, + PickleGzInstance, + write_pickle_gz, +) # Construct and pickle 600 problem instances for i in range(600): instance = MyProblemInstance([...]) - with open("instance_%03d.pkl" % i, "w") as file: - pickle.dump(instance, obj) + write_pickle_gz(instance, "instance_%03d.pkl" % i) # Split instances into training and test -test_instances = ["instance_%03d.pkl" % i for i in range(500)] -train_instances = ["instance_%03d.pkl" % i for i in range(500, 600)] +test_instances = [PickleGzInstance("instance_%03d.pkl" % i) for i in range(500)] +train_instances = [PickleGzInstance("instance_%03d.pkl" % i) for i in range(500, 600)] # Create solver solver = LearningSolver([...]) @@ -203,20 +201,7 @@ solver.parallel_solve(test_instances, n_jobs=4) ``` -By default, `solve` and `parallel_solve` modify files in place. That is, after the instances are loaded from disk and solved, MIPLearn writes them back to the disk, overwriting the original files. To write to an alternative file instead, use the arguments `output_filename` (in `solve`) and `output_filenames` (in `parallel_solve`). To discard the modifications instead, use `discard_outputs=True`. This can be useful, for example, during benchmarks. - -```python -# Solve a single instance file and write the output to another file -solver.solve("knapsack_1.orig.pkl", output_filename="knapsack_1.solved.pkl") - -# Solve a list of instance files -instances = ["knapsack_%03d.orig.pkl" % i for i in range(100)] -output = ["knapsack_%03d.solved.pkl" % i for i in range(100)] -solver.parallel_solve(instances, output_filenames=output) - -# Solve instances and discard solutions and training data -solver.parallel_solve(instances, discard_outputs=True) -``` +By default, `solve` and `parallel_solve` modify files in place. That is, after the instances are loaded from disk and solved, MIPLearn writes them back to the disk, overwriting the original files. To discard the modifications instead, use `LearningSolver(..., discard_outputs=True)`. This can be useful, for example, during benchmarks. ## 7. Running benchmarks diff --git a/miplearn/__init__.py b/miplearn/__init__.py index 351d6f7..5964a12 100644 --- a/miplearn/__init__.py +++ b/miplearn/__init__.py @@ -23,7 +23,13 @@ from .components.steps.convert_tight import ConvertTightIneqsIntoEqsStep from .components.steps.drop_redundant import DropRedundantInequalitiesStep from .components.steps.relax_integrality import RelaxIntegralityStep from .extractors import InstanceFeaturesExtractor -from .instance import Instance +from .instance import ( + Instance, + PickleGzInstance, + write_pickle_gz, + write_pickle_gz_multiple, + read_pickle_gz, +) from .log import setup_logger from .solvers.gurobi import GurobiSolver from .solvers.internal import InternalSolver diff --git a/miplearn/benchmark.py b/miplearn/benchmark.py index b4ab2b3..20ff04e 100644 --- a/miplearn/benchmark.py +++ b/miplearn/benchmark.py @@ -52,7 +52,7 @@ class BenchmarkRunner: def parallel_solve( self, - instances: Union[List[str], List[Instance]], + instances: List[Instance], n_jobs: int = 1, n_trials: int = 3, ) -> None: @@ -61,7 +61,7 @@ class BenchmarkRunner: Parameters ---------- - instances: Union[List[str], List[Instance]] + instances: List[Instance] List of instances to solve. This can either be a list of instances already loaded in memory, or a list of filenames pointing to pickled (and optionally gzipped) files. @@ -98,17 +98,14 @@ class BenchmarkRunner: os.makedirs(os.path.dirname(filename), exist_ok=True) self.results.to_csv(filename) - def fit(self, instances: Union[List[str], List[Instance]]) -> None: + def fit(self, instances: List[Instance]) -> None: """ Trains all solvers with the provided training instances. Parameters ---------- - instances: Union[List[str], List[Instance]] - List of training instances. This can either be a list of instances - already loaded in memory, or a list of filenames pointing to pickled (and - optionally gzipped) files. - + instances: List[Instance] + List of training instances. """ for (solver_name, solver) in self.solvers.items(): logger.debug(f"Fitting {solver_name}...") diff --git a/miplearn/components/component.py b/miplearn/components/component.py index 5a6b991..a182a44 100644 --- a/miplearn/components/component.py +++ b/miplearn/components/component.py @@ -2,10 +2,10 @@ # Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved. # Released under the modified BSD license. See COPYING.md for more details. +from typing import Any, List, TYPE_CHECKING, Tuple, Dict, Hashable + import numpy as np -from typing import Any, List, Union, TYPE_CHECKING, Tuple, Dict, Optional, Hashable -from miplearn.extractors import InstanceIterator from miplearn.instance import Instance from miplearn.types import LearningSolveStats, TrainingSample, Features @@ -120,11 +120,11 @@ class Component: def xy_instances( self, - instances: Union[List[str], List[Instance]], + instances: List[Instance], ) -> Tuple[Dict, Dict]: x_combined: Dict = {} y_combined: Dict = {} - for instance in InstanceIterator(instances): + for instance in instances: assert isinstance(instance, Instance) for sample in instance.training_data: xy = self.sample_xy(instance.features, sample) @@ -141,7 +141,7 @@ class Component: def fit( self, - training_instances: Union[List[str], List[Instance]], + training_instances: List[Instance], ) -> None: x, y = self.xy_instances(training_instances) for cat in x.keys(): @@ -198,9 +198,9 @@ class Component: ) -> None: return - def evaluate(self, instances: Union[List[str], List[Instance]]) -> List: + def evaluate(self, instances: List[Instance]) -> List: ev = [] - for instance in InstanceIterator(instances): + for instance in instances: for sample in instance.training_data: ev += [self.sample_evaluate(instance.features, sample)] return ev diff --git a/miplearn/components/lazy_dynamic.py b/miplearn/components/lazy_dynamic.py index 252ca5c..ddefb23 100644 --- a/miplearn/components/lazy_dynamic.py +++ b/miplearn/components/lazy_dynamic.py @@ -13,7 +13,7 @@ from miplearn.classifiers import Classifier from miplearn.classifiers.counting import CountingClassifier from miplearn.components import classifier_evaluation_dict from miplearn.components.component import Component -from miplearn.extractors import InstanceFeaturesExtractor, InstanceIterator +from miplearn.extractors import InstanceFeaturesExtractor logger = logging.getLogger(__name__) @@ -68,7 +68,7 @@ class DynamicLazyConstraintsComponent(Component): self.classifiers = {} violation_to_instance_idx = {} - for (idx, instance) in enumerate(InstanceIterator(training_instances)): + for (idx, instance) in enumerate(training_instances): for v in instance.found_violated_lazy_constraints: if isinstance(v, list): v = tuple(v) diff --git a/miplearn/components/objective.py b/miplearn/components/objective.py index c6cf984..5aec236 100644 --- a/miplearn/components/objective.py +++ b/miplearn/components/objective.py @@ -3,22 +3,14 @@ # Released under the modified BSD license. See COPYING.md for more details. import logging -from typing import List, Dict, Union, Optional, Any, TYPE_CHECKING, Tuple, Hashable +from typing import List, Dict, Any, TYPE_CHECKING, Tuple, Hashable import numpy as np from sklearn.linear_model import LinearRegression -from sklearn.metrics import ( - mean_squared_error, - explained_variance_score, - max_error, - mean_absolute_error, - r2_score, -) from miplearn.classifiers import Regressor from miplearn.classifiers.sklearn import ScikitLearnRegressor from miplearn.components.component import Component -from miplearn.extractors import InstanceIterator from miplearn.instance import Instance from miplearn.types import TrainingSample, LearningSolveStats, Features diff --git a/miplearn/components/steps/convert_tight.py b/miplearn/components/steps/convert_tight.py index f052519..34b010a 100644 --- a/miplearn/components/steps/convert_tight.py +++ b/miplearn/components/steps/convert_tight.py @@ -13,7 +13,6 @@ from miplearn.classifiers.counting import CountingClassifier from miplearn.components import classifier_evaluation_dict from miplearn.components.component import Component from miplearn.components.steps.drop_redundant import DropRedundantInequalitiesStep -from miplearn.extractors import InstanceIterator logger = logging.getLogger(__name__) @@ -116,7 +115,7 @@ class ConvertTightIneqsIntoEqsStep(Component): def _x_train(instances): x = {} for instance in tqdm( - InstanceIterator(instances), + instances, desc="Extract (drop:x)", disable=len(instances) < 5, ): @@ -139,7 +138,7 @@ class ConvertTightIneqsIntoEqsStep(Component): def y(self, instances): y = {} for instance in tqdm( - InstanceIterator(instances), + instances, desc="Extract (rlx:conv_ineqs:y)", disable=len(instances) < 5, ): diff --git a/miplearn/components/steps/drop_redundant.py b/miplearn/components/steps/drop_redundant.py index a41e7c7..b3bfe7e 100644 --- a/miplearn/components/steps/drop_redundant.py +++ b/miplearn/components/steps/drop_redundant.py @@ -6,14 +6,13 @@ import logging from copy import deepcopy import numpy as np -from tqdm import tqdm from p_tqdm import p_umap +from tqdm import tqdm from miplearn.classifiers.counting import CountingClassifier from miplearn.components import classifier_evaluation_dict from miplearn.components.component import Component from miplearn.components.lazy_static import LazyConstraint -from miplearn.extractors import InstanceIterator logger = logging.getLogger(__name__) @@ -131,31 +130,24 @@ class DropRedundantInequalitiesStep(Component): def _extract(instance): x = {} y = {} - for instance in InstanceIterator([instance]): - for training_data in instance.training_data: - for (cid, slack) in training_data["slacks"].items(): - category = instance.get_constraint_category(cid) - if category is None: - continue - if category not in x: - x[category] = [] - if category not in y: - y[category] = [] - if slack > self.slack_tolerance: - y[category] += [[False, True]] - else: - y[category] += [[True, False]] - x[category] += [instance.get_constraint_features(cid)] + for training_data in instance.training_data: + for (cid, slack) in training_data["slacks"].items(): + category = instance.get_constraint_category(cid) + if category is None: + continue + if category not in x: + x[category] = [] + if category not in y: + y[category] = [] + if slack > self.slack_tolerance: + y[category] += [[False, True]] + else: + y[category] += [[True, False]] + x[category] += [instance.get_constraint_features(cid)] return x, y if n_jobs == 1: - results = [ - _extract(i) - for i in tqdm( - instances, - desc="Extract (drop 1/3)", - ) - ] + results = [_extract(i) for i in tqdm(instances, desc="Extract (drop 1/3)")] else: results = p_umap( _extract, diff --git a/miplearn/extractors.py b/miplearn/extractors.py index 4e2c755..350e5ca 100644 --- a/miplearn/extractors.py +++ b/miplearn/extractors.py @@ -2,51 +2,14 @@ # Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved. # Released under the modified BSD license. See COPYING.md for more details. -import gzip import logging -import pickle from abc import ABC, abstractmethod -from typing import List, Union, cast, IO import numpy as np -from tqdm.auto import tqdm - -from miplearn.instance import Instance logger = logging.getLogger(__name__) -class InstanceIterator: - def __init__( - self, - instances: Union[List[str], List[Instance]], - ) -> None: - self.instances = instances - self.current = 0 - - def __iter__(self): - return self - - def __next__(self) -> Instance: - if self.current >= len(self.instances): - raise StopIteration - result = self.instances[self.current] - self.current += 1 - if isinstance(result, str): - logger.debug("Read: %s" % result) - try: - if result.endswith(".gz"): - with gzip.GzipFile(result, "rb") as gzfile: - result = pickle.load(cast(IO[bytes], gzfile)) - else: - with open(result, "rb") as file: - result = pickle.load(cast(IO[bytes], file)) - except pickle.UnpicklingError: - raise Exception(f"Invalid instance file: {result}") - assert isinstance(result, Instance) - return result - - class Extractor(ABC): @abstractmethod def extract(self, instances): @@ -77,6 +40,6 @@ class InstanceFeaturesExtractor(Extractor): instance.training_data[0]["LP value"], ] ) - for instance in InstanceIterator(instances) + for instance in instances ] ) diff --git a/miplearn/instance.py b/miplearn/instance.py index b327f97..47a4867 100644 --- a/miplearn/instance.py +++ b/miplearn/instance.py @@ -3,14 +3,34 @@ # Released under the modified BSD license. See COPYING.md for more details. import gzip -import json +import logging +import os +import pickle from abc import ABC, abstractmethod -from typing import Any, List, Optional, Hashable - -import numpy as np +from typing import Any, List, Optional, Hashable, IO, cast from miplearn.types import TrainingSample, VarIndex, Features +logger = logging.getLogger(__name__) + + +def write_pickle_gz(obj: Any, filename: str) -> None: + logger.info(f"Writing: {filename}") + os.makedirs(os.path.dirname(filename), exist_ok=True) + with gzip.GzipFile(filename, "wb") as file: + pickle.dump(obj, cast(IO[bytes], file)) + + +def read_pickle_gz(filename: str) -> Any: + logger.info(f"Reading: {filename}") + with gzip.GzipFile(filename, "rb") as file: + return pickle.load(cast(IO[bytes], file)) + + +def write_pickle_gz_multiple(objs: List[Any], dirname: str) -> None: + for (i, obj) in enumerate(objs): + write_pickle_gz(obj, f"{dirname}/{i:05d}.pkl.gz") + # noinspection PyMethodMayBeStatic class Instance(ABC): @@ -155,12 +175,116 @@ class Instance(ABC): def build_user_cut(self, model, violation): pass - def load(self, filename): - with gzip.GzipFile(filename, "r") as f: - data = json.loads(f.read().decode("utf-8")) - self.__dict__ = data + def flush(self) -> None: + """ + Save any pending changes made to the instance to the underlying data store. + """ + pass + + +def lazy_load(func): + def inner(self, *args): + if self.instance is None: + self.instance = self._load() + self.features = self.instance.features + self.training_data = self.instance.training_data + return func(self, *args) + + return inner + + +class PickleGzInstance(Instance): + """ + An instance backed by a gzipped pickle file. + + The instance is only loaded to memory after an operation is called (for example, + `to_model`). + + Parameters + ---------- + filename: str + Path of the gzipped pickle file that should be loaded. + """ + + def __init__(self, filename: str) -> None: + super().__init__() + assert os.path.exists(filename), f"File not found: {filename}" + self.instance: Optional[Instance] = None + self.filename: str = filename + + @lazy_load + def to_model(self) -> Any: + assert self.instance is not None + return self.instance.to_model() + + @lazy_load + def get_instance_features(self) -> List[float]: + assert self.instance is not None + return self.instance.get_instance_features() + + @lazy_load + def get_variable_features(self, var_name: str, index: VarIndex) -> List[float]: + assert self.instance is not None + return self.instance.get_variable_features(var_name, index) + + @lazy_load + def get_variable_category( + self, + var_name: str, + index: VarIndex, + ) -> Optional[Hashable]: + assert self.instance is not None + return self.instance.get_variable_category(var_name, index) + + @lazy_load + def get_constraint_features(self, cid: str) -> Optional[List[float]]: + assert self.instance is not None + return self.instance.get_constraint_features(cid) + + @lazy_load + def get_constraint_category(self, cid: str) -> Optional[str]: + assert self.instance is not None + return self.instance.get_constraint_category(cid) + + @lazy_load + def has_static_lazy_constraints(self) -> bool: + assert self.instance is not None + return self.instance.has_static_lazy_constraints() + + @lazy_load + def has_dynamic_lazy_constraints(self): + assert self.instance is not None + return self.instance.has_dynamic_lazy_constraints() + + @lazy_load + def is_constraint_lazy(self, cid: str) -> bool: + assert self.instance is not None + return self.instance.is_constraint_lazy(cid) + + @lazy_load + def find_violated_lazy_constraints(self, model): + assert self.instance is not None + return self.instance.find_violated_lazy_constraints(model) + + @lazy_load + def build_lazy_constraint(self, model, violation): + assert self.instance is not None + return self.instance.build_lazy_constraint(model, violation) + + @lazy_load + def find_violated_user_cuts(self, model): + assert self.instance is not None + return self.instance.find_violated_user_cuts(model) + + @lazy_load + def build_user_cut(self, model, violation): + assert self.instance is not None + return self.instance.build_user_cut(model, violation) + + def _load(self) -> Instance: + obj = read_pickle_gz(self.filename) + assert isinstance(obj, Instance) + return obj - def dump(self, filename): - data = json.dumps(self.__dict__, indent=2).encode("utf-8") - with gzip.GzipFile(filename, "w") as f: - f.write(data) + def flush(self) -> None: + write_pickle_gz(self.instance, self.filename) diff --git a/miplearn/solvers/learning.py b/miplearn/solvers/learning.py index 4a3ab47..eede7e6 100644 --- a/miplearn/solvers/learning.py +++ b/miplearn/solvers/learning.py @@ -18,7 +18,7 @@ from miplearn.components.lazy_dynamic import DynamicLazyConstraintsComponent from miplearn.components.objective import ObjectiveValueComponent from miplearn.components.primal import PrimalSolutionComponent from miplearn.features import FeaturesExtractor -from miplearn.instance import Instance +from miplearn.instance import Instance, PickleGzInstance from miplearn.solvers import _RedirectOutput from miplearn.solvers.internal import InternalSolver from miplearn.solvers.pyomo.gurobi import GurobiPyomoSolver @@ -30,8 +30,7 @@ logger = logging.getLogger(__name__) class _GlobalVariables: def __init__(self) -> None: self.solver: Optional[LearningSolver] = None - self.instances: Optional[Union[List[str], List[Instance]]] = None - self.output_filenames: Optional[List[str]] = None + self.instances: Optional[List[Instance]] = None self.discard_outputs: bool = False @@ -44,16 +43,10 @@ _GLOBAL = [_GlobalVariables()] def _parallel_solve(idx): solver = _GLOBAL[0].solver instances = _GLOBAL[0].instances - output_filenames = _GLOBAL[0].output_filenames discard_outputs = _GLOBAL[0].discard_outputs - if output_filenames is None: - output_filename = None - else: - output_filename = output_filenames[idx] try: stats = solver.solve( instances[idx], - output_filename=output_filename, discard_output=discard_outputs, ) return stats, instances[idx] @@ -129,30 +122,12 @@ class LearningSolver: def _solve( self, - instance: Union[Instance, str], + instance: Instance, model: Any = None, - output_filename: Optional[str] = None, discard_output: bool = False, tee: bool = False, ) -> LearningSolveStats: - # Load instance from file, if necessary - filename = None - fileformat = None - file: Union[BinaryIO, gzip.GzipFile] - if isinstance(instance, str): - filename = instance - logger.info("Reading: %s" % filename) - if filename.endswith(".gz"): - fileformat = "pickle-gz" - with gzip.GzipFile(filename, "rb") as file: - instance = pickle.load(cast(IO[bytes], file)) - else: - fileformat = "pickle" - with open(filename, "rb") as file: - instance = pickle.load(cast(IO[bytes], file)) - assert isinstance(instance, Instance) - # Generate model if model is None: with _RedirectOutput([]): @@ -262,23 +237,15 @@ class LearningSolver: component.after_solve_mip(*callback_args) # Write to file, if necessary - if not discard_output and filename is not None: - if output_filename is None: - output_filename = filename - logger.info("Writing: %s" % output_filename) - if fileformat == "pickle": - with open(output_filename, "wb") as file: - pickle.dump(instance, cast(IO[bytes], file)) - else: - with gzip.GzipFile(output_filename, "wb") as file: - pickle.dump(instance, cast(IO[bytes], file)) + if not discard_output: + instance.flush() + return stats def solve( self, - instance: Union[Instance, str], + instance: Instance, model: Any = None, - output_filename: Optional[str] = None, discard_output: bool = False, tee: bool = False, ) -> LearningSolveStats: @@ -298,14 +265,10 @@ class LearningSolver: Parameters ---------- - instance: Union[Instance, str] - The instance to be solved, or a filename. + instance: Instance + The instance to be solved. model: Any The corresponding Pyomo model. If not provided, it will be created. - output_filename: Optional[str] - If instance is a filename and output_filename is provided, write the - modified instance to this file, instead of replacing the original one. If - output_filename is None (the default), modified the original file in-place. discard_output: bool If True, do not write the modified instances anywhere; simply discard them. Useful during benchmarking. @@ -325,30 +288,28 @@ class LearningSolver: details. """ if self.simulate_perfect: - if not isinstance(instance, str): + if not isinstance(instance, PickleGzInstance): raise Exception("Not implemented") - with tempfile.NamedTemporaryFile(suffix=os.path.basename(instance)) as tmp: - self._solve( - instance=instance, - model=model, - output_filename=tmp.name, - tee=tee, - ) - self.fit([tmp.name]) + self._solve( + instance=instance, + model=model, + tee=tee, + discard_output=True, + ) + self.fit([instance]) + instance.instance = None return self._solve( instance=instance, model=model, - output_filename=output_filename, discard_output=discard_output, tee=tee, ) def parallel_solve( self, - instances: Union[List[str], List[Instance]], + instances: List[Instance], n_jobs: int = 4, label: str = "Solve", - output_filenames: Optional[List[str]] = None, discard_outputs: bool = False, ) -> List[LearningSolveStats]: """ @@ -361,17 +322,13 @@ class LearningSolver: Parameters ---------- - output_filenames: Optional[List[str]] - If instances are file names and output_filenames is provided, write the - modified instances to these files, instead of replacing the original - files. If output_filenames is None, modifies the instances in-place. discard_outputs: bool If True, do not write the modified instances anywhere; simply discard them instead. Useful during benchmarking. label: str Label to show in the progress bar. - instances: Union[List[str], List[Instance]] - The instances to be solved + instances: List[Instance] + The instances to be solved. n_jobs: int Number of instances to solve in parallel at a time. @@ -388,7 +345,6 @@ class LearningSolver: self.internal_solver = None self._silence_miplearn_logger() _GLOBAL[0].solver = self - _GLOBAL[0].output_filenames = output_filenames _GLOBAL[0].instances = instances _GLOBAL[0].discard_outputs = discard_outputs results = p_map( @@ -405,7 +361,7 @@ class LearningSolver: self._restore_miplearn_logger() return stats - def fit(self, training_instances: Union[List[str], List[Instance]]) -> None: + def fit(self, training_instances: List[Instance]) -> None: logger.debug("Fitting...") if len(training_instances) == 0: return diff --git a/tests/solvers/test_learning_solver.py b/tests/solvers/test_learning_solver.py index 36abbe3..913eb6b 100644 --- a/tests/solvers/test_learning_solver.py +++ b/tests/solvers/test_learning_solver.py @@ -8,6 +8,7 @@ import pickle import tempfile import os +from miplearn.instance import PickleGzInstance, write_pickle_gz, read_pickle_gz from miplearn.solvers.gurobi import GurobiSolver from miplearn.solvers.learning import LearningSolver from . import _get_knapsack_instance, get_internal_solvers @@ -78,61 +79,40 @@ def test_parallel_solve(): def test_solve_fit_from_disk(): for internal_solver in get_internal_solvers(): # Create instances and pickle them - filenames = [] + instances = [] for k in range(3): instance = _get_knapsack_instance(internal_solver) with tempfile.NamedTemporaryFile(suffix=".pkl", delete=False) as file: - filenames += [file.name] - pickle.dump(instance, file) + instances += [PickleGzInstance(file.name)] + write_pickle_gz(instance, file.name) # Test: solve solver = LearningSolver(solver=internal_solver) - solver.solve(filenames[0]) - with open(filenames[0], "rb") as file: - instance = pickle.load(file) - assert len(instance.training_data) > 0 + solver.solve(instances[0]) + instance_loaded = read_pickle_gz(instances[0].filename) + assert len(instance_loaded.training_data) > 0 # Test: parallel_solve - solver.parallel_solve(filenames) - for filename in filenames: - with open(filename, "rb") as file: - instance = pickle.load(file) - assert len(instance.training_data) > 0 - - # Test: solve (with specified output) - output = [f + ".out" for f in filenames] - solver.solve( - filenames[0], - output_filename=output[0], - ) - assert os.path.isfile(output[0]) - - # Test: parallel_solve (with specified output) - solver.parallel_solve( - filenames, - output_filenames=output, - ) - for filename in output: - assert os.path.isfile(filename) + solver.parallel_solve(instances) + for instance in instances: + instance_loaded = read_pickle_gz(instance.filename) + assert len(instance.training_data) > 0 # Delete temporary files - for filename in filenames: - os.remove(filename) - for filename in output: - os.remove(filename) + for instance in instances: + os.remove(instance.filename) def test_simulate_perfect(): internal_solver = GurobiSolver instance = _get_knapsack_instance(internal_solver) with tempfile.NamedTemporaryFile(suffix=".pkl", delete=False) as tmp: - pickle.dump(instance, tmp) - tmp.flush() + write_pickle_gz(instance, tmp.name) solver = LearningSolver( solver=internal_solver, simulate_perfect=True, ) - stats = solver.solve(tmp.name) + stats = solver.solve(PickleGzInstance(tmp.name)) assert stats["Lower bound"] == stats["Objective: Predicted lower bound"] diff --git a/tests/test_instance.py b/tests/test_instance.py new file mode 100644 index 0000000..0ee3b9c --- /dev/null +++ b/tests/test_instance.py @@ -0,0 +1,16 @@ +# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization +# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved. +# Released under the modified BSD license. See COPYING.md for more details. +import tempfile + +from miplearn import GurobiSolver +from miplearn.instance import write_pickle_gz, PickleGzInstance +from tests.fixtures.knapsack import get_knapsack_instance + + +def test_pickled() -> None: + original = get_knapsack_instance(GurobiSolver()) + file = tempfile.NamedTemporaryFile() + write_pickle_gz(original, file.name) + pickled = PickleGzInstance(file.name) + assert pickled.to_model() is not None