Replace InstanceIterator by PickleGzInstance

This commit is contained in:
2021-04-04 14:48:46 -05:00
parent b4770c6c0a
commit 08e808690e
14 changed files with 253 additions and 257 deletions

View File

@@ -23,7 +23,13 @@ from .components.steps.convert_tight import ConvertTightIneqsIntoEqsStep
from .components.steps.drop_redundant import DropRedundantInequalitiesStep
from .components.steps.relax_integrality import RelaxIntegralityStep
from .extractors import InstanceFeaturesExtractor
from .instance import Instance
from .instance import (
Instance,
PickleGzInstance,
write_pickle_gz,
write_pickle_gz_multiple,
read_pickle_gz,
)
from .log import setup_logger
from .solvers.gurobi import GurobiSolver
from .solvers.internal import InternalSolver

View File

@@ -52,7 +52,7 @@ class BenchmarkRunner:
def parallel_solve(
self,
instances: Union[List[str], List[Instance]],
instances: List[Instance],
n_jobs: int = 1,
n_trials: int = 3,
) -> None:
@@ -61,7 +61,7 @@ class BenchmarkRunner:
Parameters
----------
instances: Union[List[str], List[Instance]]
instances: List[Instance]
List of instances to solve. This can either be a list of instances
already loaded in memory, or a list of filenames pointing to pickled (and
optionally gzipped) files.
@@ -98,17 +98,14 @@ class BenchmarkRunner:
os.makedirs(os.path.dirname(filename), exist_ok=True)
self.results.to_csv(filename)
def fit(self, instances: Union[List[str], List[Instance]]) -> None:
def fit(self, instances: List[Instance]) -> None:
"""
Trains all solvers with the provided training instances.
Parameters
----------
instances: Union[List[str], List[Instance]]
List of training instances. This can either be a list of instances
already loaded in memory, or a list of filenames pointing to pickled (and
optionally gzipped) files.
instances: List[Instance]
List of training instances.
"""
for (solver_name, solver) in self.solvers.items():
logger.debug(f"Fitting {solver_name}...")

View File

@@ -2,10 +2,10 @@
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
# Released under the modified BSD license. See COPYING.md for more details.
import numpy as np
from typing import Any, List, Union, TYPE_CHECKING, Tuple, Dict, Optional, Hashable
from typing import Any, List, TYPE_CHECKING, Tuple, Dict, Hashable
import numpy as np
from miplearn.extractors import InstanceIterator
from miplearn.instance import Instance
from miplearn.types import LearningSolveStats, TrainingSample, Features
@@ -120,11 +120,11 @@ class Component:
def xy_instances(
self,
instances: Union[List[str], List[Instance]],
instances: List[Instance],
) -> Tuple[Dict, Dict]:
x_combined: Dict = {}
y_combined: Dict = {}
for instance in InstanceIterator(instances):
for instance in instances:
assert isinstance(instance, Instance)
for sample in instance.training_data:
xy = self.sample_xy(instance.features, sample)
@@ -141,7 +141,7 @@ class Component:
def fit(
self,
training_instances: Union[List[str], List[Instance]],
training_instances: List[Instance],
) -> None:
x, y = self.xy_instances(training_instances)
for cat in x.keys():
@@ -198,9 +198,9 @@ class Component:
) -> None:
return
def evaluate(self, instances: Union[List[str], List[Instance]]) -> List:
def evaluate(self, instances: List[Instance]) -> List:
ev = []
for instance in InstanceIterator(instances):
for instance in instances:
for sample in instance.training_data:
ev += [self.sample_evaluate(instance.features, sample)]
return ev

View File

@@ -13,7 +13,7 @@ from miplearn.classifiers import Classifier
from miplearn.classifiers.counting import CountingClassifier
from miplearn.components import classifier_evaluation_dict
from miplearn.components.component import Component
from miplearn.extractors import InstanceFeaturesExtractor, InstanceIterator
from miplearn.extractors import InstanceFeaturesExtractor
logger = logging.getLogger(__name__)
@@ -68,7 +68,7 @@ class DynamicLazyConstraintsComponent(Component):
self.classifiers = {}
violation_to_instance_idx = {}
for (idx, instance) in enumerate(InstanceIterator(training_instances)):
for (idx, instance) in enumerate(training_instances):
for v in instance.found_violated_lazy_constraints:
if isinstance(v, list):
v = tuple(v)

View File

@@ -3,22 +3,14 @@
# Released under the modified BSD license. See COPYING.md for more details.
import logging
from typing import List, Dict, Union, Optional, Any, TYPE_CHECKING, Tuple, Hashable
from typing import List, Dict, Any, TYPE_CHECKING, Tuple, Hashable
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import (
mean_squared_error,
explained_variance_score,
max_error,
mean_absolute_error,
r2_score,
)
from miplearn.classifiers import Regressor
from miplearn.classifiers.sklearn import ScikitLearnRegressor
from miplearn.components.component import Component
from miplearn.extractors import InstanceIterator
from miplearn.instance import Instance
from miplearn.types import TrainingSample, LearningSolveStats, Features

View File

@@ -13,7 +13,6 @@ from miplearn.classifiers.counting import CountingClassifier
from miplearn.components import classifier_evaluation_dict
from miplearn.components.component import Component
from miplearn.components.steps.drop_redundant import DropRedundantInequalitiesStep
from miplearn.extractors import InstanceIterator
logger = logging.getLogger(__name__)
@@ -116,7 +115,7 @@ class ConvertTightIneqsIntoEqsStep(Component):
def _x_train(instances):
x = {}
for instance in tqdm(
InstanceIterator(instances),
instances,
desc="Extract (drop:x)",
disable=len(instances) < 5,
):
@@ -139,7 +138,7 @@ class ConvertTightIneqsIntoEqsStep(Component):
def y(self, instances):
y = {}
for instance in tqdm(
InstanceIterator(instances),
instances,
desc="Extract (rlx:conv_ineqs:y)",
disable=len(instances) < 5,
):

View File

@@ -6,14 +6,13 @@ import logging
from copy import deepcopy
import numpy as np
from tqdm import tqdm
from p_tqdm import p_umap
from tqdm import tqdm
from miplearn.classifiers.counting import CountingClassifier
from miplearn.components import classifier_evaluation_dict
from miplearn.components.component import Component
from miplearn.components.lazy_static import LazyConstraint
from miplearn.extractors import InstanceIterator
logger = logging.getLogger(__name__)
@@ -131,31 +130,24 @@ class DropRedundantInequalitiesStep(Component):
def _extract(instance):
x = {}
y = {}
for instance in InstanceIterator([instance]):
for training_data in instance.training_data:
for (cid, slack) in training_data["slacks"].items():
category = instance.get_constraint_category(cid)
if category is None:
continue
if category not in x:
x[category] = []
if category not in y:
y[category] = []
if slack > self.slack_tolerance:
y[category] += [[False, True]]
else:
y[category] += [[True, False]]
x[category] += [instance.get_constraint_features(cid)]
for training_data in instance.training_data:
for (cid, slack) in training_data["slacks"].items():
category = instance.get_constraint_category(cid)
if category is None:
continue
if category not in x:
x[category] = []
if category not in y:
y[category] = []
if slack > self.slack_tolerance:
y[category] += [[False, True]]
else:
y[category] += [[True, False]]
x[category] += [instance.get_constraint_features(cid)]
return x, y
if n_jobs == 1:
results = [
_extract(i)
for i in tqdm(
instances,
desc="Extract (drop 1/3)",
)
]
results = [_extract(i) for i in tqdm(instances, desc="Extract (drop 1/3)")]
else:
results = p_umap(
_extract,

View File

@@ -2,51 +2,14 @@
# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
# Released under the modified BSD license. See COPYING.md for more details.
import gzip
import logging
import pickle
from abc import ABC, abstractmethod
from typing import List, Union, cast, IO
import numpy as np
from tqdm.auto import tqdm
from miplearn.instance import Instance
logger = logging.getLogger(__name__)
class InstanceIterator:
def __init__(
self,
instances: Union[List[str], List[Instance]],
) -> None:
self.instances = instances
self.current = 0
def __iter__(self):
return self
def __next__(self) -> Instance:
if self.current >= len(self.instances):
raise StopIteration
result = self.instances[self.current]
self.current += 1
if isinstance(result, str):
logger.debug("Read: %s" % result)
try:
if result.endswith(".gz"):
with gzip.GzipFile(result, "rb") as gzfile:
result = pickle.load(cast(IO[bytes], gzfile))
else:
with open(result, "rb") as file:
result = pickle.load(cast(IO[bytes], file))
except pickle.UnpicklingError:
raise Exception(f"Invalid instance file: {result}")
assert isinstance(result, Instance)
return result
class Extractor(ABC):
@abstractmethod
def extract(self, instances):
@@ -77,6 +40,6 @@ class InstanceFeaturesExtractor(Extractor):
instance.training_data[0]["LP value"],
]
)
for instance in InstanceIterator(instances)
for instance in instances
]
)

View File

@@ -3,14 +3,34 @@
# Released under the modified BSD license. See COPYING.md for more details.
import gzip
import json
import logging
import os
import pickle
from abc import ABC, abstractmethod
from typing import Any, List, Optional, Hashable
import numpy as np
from typing import Any, List, Optional, Hashable, IO, cast
from miplearn.types import TrainingSample, VarIndex, Features
logger = logging.getLogger(__name__)
def write_pickle_gz(obj: Any, filename: str) -> None:
logger.info(f"Writing: {filename}")
os.makedirs(os.path.dirname(filename), exist_ok=True)
with gzip.GzipFile(filename, "wb") as file:
pickle.dump(obj, cast(IO[bytes], file))
def read_pickle_gz(filename: str) -> Any:
logger.info(f"Reading: {filename}")
with gzip.GzipFile(filename, "rb") as file:
return pickle.load(cast(IO[bytes], file))
def write_pickle_gz_multiple(objs: List[Any], dirname: str) -> None:
for (i, obj) in enumerate(objs):
write_pickle_gz(obj, f"{dirname}/{i:05d}.pkl.gz")
# noinspection PyMethodMayBeStatic
class Instance(ABC):
@@ -155,12 +175,116 @@ class Instance(ABC):
def build_user_cut(self, model, violation):
pass
def load(self, filename):
with gzip.GzipFile(filename, "r") as f:
data = json.loads(f.read().decode("utf-8"))
self.__dict__ = data
def flush(self) -> None:
"""
Save any pending changes made to the instance to the underlying data store.
"""
pass
def dump(self, filename):
data = json.dumps(self.__dict__, indent=2).encode("utf-8")
with gzip.GzipFile(filename, "w") as f:
f.write(data)
def lazy_load(func):
def inner(self, *args):
if self.instance is None:
self.instance = self._load()
self.features = self.instance.features
self.training_data = self.instance.training_data
return func(self, *args)
return inner
class PickleGzInstance(Instance):
"""
An instance backed by a gzipped pickle file.
The instance is only loaded to memory after an operation is called (for example,
`to_model`).
Parameters
----------
filename: str
Path of the gzipped pickle file that should be loaded.
"""
def __init__(self, filename: str) -> None:
super().__init__()
assert os.path.exists(filename), f"File not found: {filename}"
self.instance: Optional[Instance] = None
self.filename: str = filename
@lazy_load
def to_model(self) -> Any:
assert self.instance is not None
return self.instance.to_model()
@lazy_load
def get_instance_features(self) -> List[float]:
assert self.instance is not None
return self.instance.get_instance_features()
@lazy_load
def get_variable_features(self, var_name: str, index: VarIndex) -> List[float]:
assert self.instance is not None
return self.instance.get_variable_features(var_name, index)
@lazy_load
def get_variable_category(
self,
var_name: str,
index: VarIndex,
) -> Optional[Hashable]:
assert self.instance is not None
return self.instance.get_variable_category(var_name, index)
@lazy_load
def get_constraint_features(self, cid: str) -> Optional[List[float]]:
assert self.instance is not None
return self.instance.get_constraint_features(cid)
@lazy_load
def get_constraint_category(self, cid: str) -> Optional[str]:
assert self.instance is not None
return self.instance.get_constraint_category(cid)
@lazy_load
def has_static_lazy_constraints(self) -> bool:
assert self.instance is not None
return self.instance.has_static_lazy_constraints()
@lazy_load
def has_dynamic_lazy_constraints(self):
assert self.instance is not None
return self.instance.has_dynamic_lazy_constraints()
@lazy_load
def is_constraint_lazy(self, cid: str) -> bool:
assert self.instance is not None
return self.instance.is_constraint_lazy(cid)
@lazy_load
def find_violated_lazy_constraints(self, model):
assert self.instance is not None
return self.instance.find_violated_lazy_constraints(model)
@lazy_load
def build_lazy_constraint(self, model, violation):
assert self.instance is not None
return self.instance.build_lazy_constraint(model, violation)
@lazy_load
def find_violated_user_cuts(self, model):
assert self.instance is not None
return self.instance.find_violated_user_cuts(model)
@lazy_load
def build_user_cut(self, model, violation):
assert self.instance is not None
return self.instance.build_user_cut(model, violation)
def _load(self) -> Instance:
obj = read_pickle_gz(self.filename)
assert isinstance(obj, Instance)
return obj
def flush(self) -> None:
write_pickle_gz(self.instance, self.filename)

View File

@@ -18,7 +18,7 @@ from miplearn.components.lazy_dynamic import DynamicLazyConstraintsComponent
from miplearn.components.objective import ObjectiveValueComponent
from miplearn.components.primal import PrimalSolutionComponent
from miplearn.features import FeaturesExtractor
from miplearn.instance import Instance
from miplearn.instance import Instance, PickleGzInstance
from miplearn.solvers import _RedirectOutput
from miplearn.solvers.internal import InternalSolver
from miplearn.solvers.pyomo.gurobi import GurobiPyomoSolver
@@ -30,8 +30,7 @@ logger = logging.getLogger(__name__)
class _GlobalVariables:
def __init__(self) -> None:
self.solver: Optional[LearningSolver] = None
self.instances: Optional[Union[List[str], List[Instance]]] = None
self.output_filenames: Optional[List[str]] = None
self.instances: Optional[List[Instance]] = None
self.discard_outputs: bool = False
@@ -44,16 +43,10 @@ _GLOBAL = [_GlobalVariables()]
def _parallel_solve(idx):
solver = _GLOBAL[0].solver
instances = _GLOBAL[0].instances
output_filenames = _GLOBAL[0].output_filenames
discard_outputs = _GLOBAL[0].discard_outputs
if output_filenames is None:
output_filename = None
else:
output_filename = output_filenames[idx]
try:
stats = solver.solve(
instances[idx],
output_filename=output_filename,
discard_output=discard_outputs,
)
return stats, instances[idx]
@@ -129,30 +122,12 @@ class LearningSolver:
def _solve(
self,
instance: Union[Instance, str],
instance: Instance,
model: Any = None,
output_filename: Optional[str] = None,
discard_output: bool = False,
tee: bool = False,
) -> LearningSolveStats:
# Load instance from file, if necessary
filename = None
fileformat = None
file: Union[BinaryIO, gzip.GzipFile]
if isinstance(instance, str):
filename = instance
logger.info("Reading: %s" % filename)
if filename.endswith(".gz"):
fileformat = "pickle-gz"
with gzip.GzipFile(filename, "rb") as file:
instance = pickle.load(cast(IO[bytes], file))
else:
fileformat = "pickle"
with open(filename, "rb") as file:
instance = pickle.load(cast(IO[bytes], file))
assert isinstance(instance, Instance)
# Generate model
if model is None:
with _RedirectOutput([]):
@@ -262,23 +237,15 @@ class LearningSolver:
component.after_solve_mip(*callback_args)
# Write to file, if necessary
if not discard_output and filename is not None:
if output_filename is None:
output_filename = filename
logger.info("Writing: %s" % output_filename)
if fileformat == "pickle":
with open(output_filename, "wb") as file:
pickle.dump(instance, cast(IO[bytes], file))
else:
with gzip.GzipFile(output_filename, "wb") as file:
pickle.dump(instance, cast(IO[bytes], file))
if not discard_output:
instance.flush()
return stats
def solve(
self,
instance: Union[Instance, str],
instance: Instance,
model: Any = None,
output_filename: Optional[str] = None,
discard_output: bool = False,
tee: bool = False,
) -> LearningSolveStats:
@@ -298,14 +265,10 @@ class LearningSolver:
Parameters
----------
instance: Union[Instance, str]
The instance to be solved, or a filename.
instance: Instance
The instance to be solved.
model: Any
The corresponding Pyomo model. If not provided, it will be created.
output_filename: Optional[str]
If instance is a filename and output_filename is provided, write the
modified instance to this file, instead of replacing the original one. If
output_filename is None (the default), modified the original file in-place.
discard_output: bool
If True, do not write the modified instances anywhere; simply discard
them. Useful during benchmarking.
@@ -325,30 +288,28 @@ class LearningSolver:
details.
"""
if self.simulate_perfect:
if not isinstance(instance, str):
if not isinstance(instance, PickleGzInstance):
raise Exception("Not implemented")
with tempfile.NamedTemporaryFile(suffix=os.path.basename(instance)) as tmp:
self._solve(
instance=instance,
model=model,
output_filename=tmp.name,
tee=tee,
)
self.fit([tmp.name])
self._solve(
instance=instance,
model=model,
tee=tee,
discard_output=True,
)
self.fit([instance])
instance.instance = None
return self._solve(
instance=instance,
model=model,
output_filename=output_filename,
discard_output=discard_output,
tee=tee,
)
def parallel_solve(
self,
instances: Union[List[str], List[Instance]],
instances: List[Instance],
n_jobs: int = 4,
label: str = "Solve",
output_filenames: Optional[List[str]] = None,
discard_outputs: bool = False,
) -> List[LearningSolveStats]:
"""
@@ -361,17 +322,13 @@ class LearningSolver:
Parameters
----------
output_filenames: Optional[List[str]]
If instances are file names and output_filenames is provided, write the
modified instances to these files, instead of replacing the original
files. If output_filenames is None, modifies the instances in-place.
discard_outputs: bool
If True, do not write the modified instances anywhere; simply discard
them instead. Useful during benchmarking.
label: str
Label to show in the progress bar.
instances: Union[List[str], List[Instance]]
The instances to be solved
instances: List[Instance]
The instances to be solved.
n_jobs: int
Number of instances to solve in parallel at a time.
@@ -388,7 +345,6 @@ class LearningSolver:
self.internal_solver = None
self._silence_miplearn_logger()
_GLOBAL[0].solver = self
_GLOBAL[0].output_filenames = output_filenames
_GLOBAL[0].instances = instances
_GLOBAL[0].discard_outputs = discard_outputs
results = p_map(
@@ -405,7 +361,7 @@ class LearningSolver:
self._restore_miplearn_logger()
return stats
def fit(self, training_instances: Union[List[str], List[Instance]]) -> None:
def fit(self, training_instances: List[Instance]) -> None:
logger.debug("Fitting...")
if len(training_instances) == 0:
return