Replace InstanceIterator by PickleGzInstance

5 years ago · 08e808690e
parent b4770c6c0a
commit 08e808690e
14 changed files with 252 additions and 256 deletions
--- a/benchmark/benchmark.py
+++ b/benchmark/benchmark.py
@ -40,6 +40,8 @@ from miplearn import (
    BenchmarkRunner,
    GurobiPyomoSolver,
    setup_logger,
    PickleGzInstance,
    write_pickle_gz_multiple,
 )
 setup_logger()
@ -48,24 +50,6 @@ logging.getLogger("pyomo.core").setLevel(logging.ERROR)
 logger = logging.getLogger("benchmark")
 def write_pickle_gz(obj, filename):
    logger.info(f"Writing: {filename}")
    os.makedirs(os.path.dirname(filename), exist_ok=True)
    with gzip.GzipFile(filename, "wb") as file:
        pickle.dump(obj, file)
 def read_pickle_gz(filename):
    logger.info(f"Reading: {filename}")
    with gzip.GzipFile(filename, "rb") as file:
        return pickle.load(file)
 def write_pickle_gz_multiple(objs, dirname):
    for (i, obj) in enumerate(objs):
        write_pickle_gz(obj, f"{dirname}/{i:05d}.pkl.gz")
 def train(args):
    basepath = args["<challenge>"]
    problem_name, challenge_name = args["<challenge>"].split("/")
@ -78,7 +62,9 @@ def train(args):
    done_filename = f"{basepath}/train/done"
    if not os.path.isfile(done_filename):
-        train_instances = glob.glob(f"{basepath}/train/*.gz")
+        train_instances = [
            PickleGzInstance(f) for f in glob.glob(f"{basepath}/train/*.gz")
        ]
        solver = LearningSolver(
            solver=lambda: GurobiPyomoSolver(
                params={
@ -96,7 +82,7 @@ def train(args):
 def test_baseline(args):
    basepath = args["<challenge>"]
-    test_instances = glob.glob(f"{basepath}/test/*.gz")
+    test_instances = [PickleGzInstance(f) for f in glob.glob(f"{basepath}/test/*.gz")]
    csv_filename = f"{basepath}/benchmark_baseline.csv"
    if not os.path.isfile(csv_filename):
        solvers = {
@ -119,8 +105,8 @@ def test_baseline(args):
 def test_ml(args):
    basepath = args["<challenge>"]
-    test_instances = glob.glob(f"{basepath}/test/*.gz")
+    test_instances = [PickleGzInstance(f) for f in glob.glob(f"{basepath}/test/*.gz")]
-    train_instances = glob.glob(f"{basepath}/train/*.gz")
+    train_instances = [PickleGzInstance(f) for f in glob.glob(f"{basepath}/train/*.gz")]
    csv_filename = f"{basepath}/benchmark_ml.csv"
    if not os.path.isfile(csv_filename):
        solvers = {
--- a/docs/usage.md
+++ b/docs/usage.md
@ -119,11 +119,10 @@ For more significant performance benefits, `LearningSolver` can also be configur
 ### 6.1 Saving and loading solver state
-After solving a large number of training instances, it may be desirable to save the current state of `LearningSolver` to disk, so that the solver can still use the acquired knowledge after the application restarts. This can be accomplished by using the standard `pickle` module, as the following example illustrates:
+After solving a large number of training instances, it may be desirable to save the current state of `LearningSolver` to disk, so that the solver can still use the acquired knowledge after the application restarts. This can be accomplished by using the the utility functions `write_pickle_gz` and `read_pickle_gz`, as the following example illustrates:
 ```python
-from miplearn import LearningSolver
+from miplearn import LearningSolver, write_pickle_gz, read_pickle_gz
 import pickle
 # Solve training instances
 training_instances = [...]
@ -135,14 +134,12 @@ for instance in training_instances:
 solver.fit(training_instances)
 # Save trained solver to disk
-with open("solver.pickle", "wb") as file:
+write_pickle_gz(solver, "solver.pkl.gz")
    pickle.dump(solver, file)
 # Application restarts...
 # Load trained solver from disk
-with open("solver.pickle", "rb") as file:
+solver = read_pickle_gz("solver.pkl.gz")
    solver = pickle.load(file)
 # Solve additional instances
 test_instances = [...]
@ -171,23 +168,24 @@ solver.parallel_solve(test_instances)
 ### 6.3 Solving instances from the disk
-In all examples above, we have assumed that instances are available as Python objects, stored in memory. When problem instances are very large, or when there is a large number of problem instances, this approach may require an excessive amount of memory. To reduce memory requirements, MIPLearn can also operate on instances that are stored on disk. More precisely, the methods `fit`, `solve` and `parallel_solve` in `LearningSolver` can operate on filenames (or lists of filenames) instead of instance objects, as the next example illustrates.
+In all examples above, we have assumed that instances are available as Python objects, stored in memory. When problem instances are very large, or when there is a large number of problem instances, this approach may require an excessive amount of memory. To reduce memory requirements, MIPLearn can also operate on instances that are stored on disk, through the `PickleGzInstance` class, as the next example illustrates.
 Instance files must be pickled instance objects. The method `solve` loads at most one instance to memory at a time, while `parallel_solve` loads at most `n_jobs` instances.
 ```python
 import pickle
-from miplearn import LearningSolver
+from miplearn import (
    LearningSolver,
    PickleGzInstance,
    write_pickle_gz,
 )
 # Construct and pickle 600 problem instances
 for i in range(600):
    instance = MyProblemInstance([...])
-    with open("instance_%03d.pkl" % i, "w") as file:
+    write_pickle_gz(instance, "instance_%03d.pkl" % i)
        pickle.dump(instance, obj)
 # Split instances into training and test
-test_instances  = ["instance_%03d.pkl" % i for i in range(500)]
+test_instances  = [PickleGzInstance("instance_%03d.pkl" % i) for i in range(500)]
-train_instances = ["instance_%03d.pkl" % i for i in range(500, 600)]
+train_instances = [PickleGzInstance("instance_%03d.pkl" % i) for i in range(500, 600)]
 # Create solver
 solver = LearningSolver([...])
@ -203,20 +201,7 @@ solver.parallel_solve(test_instances, n_jobs=4)
 ```
-By default, `solve` and `parallel_solve` modify files in place. That is, after the instances are loaded from disk and solved, MIPLearn writes them back to the disk, overwriting the original files. To write to an alternative file instead, use the arguments `output_filename` (in `solve`) and `output_filenames` (in `parallel_solve`). To discard the modifications instead, use `discard_outputs=True`. This can be useful, for example, during benchmarks.
+By default, `solve` and `parallel_solve` modify files in place. That is, after the instances are loaded from disk and solved, MIPLearn writes them back to the disk, overwriting the original files. To discard the modifications instead, use `LearningSolver(..., discard_outputs=True)`. This can be useful, for example, during benchmarks.
 ```python
 # Solve a single instance file and write the output to another file
 solver.solve("knapsack_1.orig.pkl", output_filename="knapsack_1.solved.pkl")
 # Solve a list of instance files
 instances = ["knapsack_%03d.orig.pkl" % i for i in range(100)]
 output = ["knapsack_%03d.solved.pkl" % i for i in range(100)]
 solver.parallel_solve(instances, output_filenames=output)
 # Solve instances and discard solutions and training data
 solver.parallel_solve(instances, discard_outputs=True)
 ```
 ## 7. Running benchmarks
--- a/miplearn/init.py
+++ b/miplearn/init.py
@ -23,7 +23,13 @@ from .components.steps.convert_tight import ConvertTightIneqsIntoEqsStep
 from .components.steps.drop_redundant import DropRedundantInequalitiesStep
 from .components.steps.relax_integrality import RelaxIntegralityStep
 from .extractors import InstanceFeaturesExtractor
-from .instance import Instance
+from .instance import (
    Instance,
    PickleGzInstance,
    write_pickle_gz,
    write_pickle_gz_multiple,
    read_pickle_gz,
 )
 from .log import setup_logger
 from .solvers.gurobi import GurobiSolver
 from .solvers.internal import InternalSolver
--- a/miplearn/benchmark.py
+++ b/miplearn/benchmark.py
@ -52,7 +52,7 @@ class BenchmarkRunner:
    def parallel_solve(
        self,
-        instances: Union[List[str], List[Instance]],
+        instances: List[Instance],
        n_jobs: int = 1,
        n_trials: int = 3,
    ) -> None:
@ -61,7 +61,7 @@ class BenchmarkRunner:
        Parameters
        ----------
-        instances: Union[List[str], List[Instance]]
+        instances: List[Instance]
            List of instances to solve. This can either be a list of instances
            already loaded in memory, or a list of filenames pointing to pickled (and
            optionally gzipped) files.
@ -98,17 +98,14 @@ class BenchmarkRunner:
        os.makedirs(os.path.dirname(filename), exist_ok=True)
        self.results.to_csv(filename)
-    def fit(self, instances: Union[List[str], List[Instance]]) -> None:
+    def fit(self, instances: List[Instance]) -> None:
        """
        Trains all solvers with the provided training instances.
        Parameters
        ----------
-        instances:  Union[List[str], List[Instance]]
+        instances:  List[Instance]
-            List of training instances. This can either be a list of instances
+            List of training instances.
            already loaded in memory, or a list of filenames pointing to pickled (and
            optionally gzipped) files.
        """
        for (solver_name, solver) in self.solvers.items():
            logger.debug(f"Fitting {solver_name}...")
--- a/miplearn/components/component.py
+++ b/miplearn/components/component.py
@ -2,10 +2,10 @@
 #  Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
 #  Released under the modified BSD license. See COPYING.md for more details.
 from typing import Any, List, TYPE_CHECKING, Tuple, Dict, Hashable
 import numpy as np
 from typing import Any, List, Union, TYPE_CHECKING, Tuple, Dict, Optional, Hashable
 from miplearn.extractors import InstanceIterator
 from miplearn.instance import Instance
 from miplearn.types import LearningSolveStats, TrainingSample, Features
@ -120,11 +120,11 @@ class Component:
    def xy_instances(
        self,
-        instances: Union[List[str], List[Instance]],
+        instances: List[Instance],
    ) -> Tuple[Dict, Dict]:
        x_combined: Dict = {}
        y_combined: Dict = {}
-        for instance in InstanceIterator(instances):
+        for instance in instances:
            assert isinstance(instance, Instance)
            for sample in instance.training_data:
                xy = self.sample_xy(instance.features, sample)
@ -141,7 +141,7 @@ class Component:
    def fit(
        self,
-        training_instances: Union[List[str], List[Instance]],
+        training_instances: List[Instance],
    ) -> None:
        x, y = self.xy_instances(training_instances)
        for cat in x.keys():
@ -198,9 +198,9 @@ class Component:
    ) -> None:
        return
-    def evaluate(self, instances: Union[List[str], List[Instance]]) -> List:
+    def evaluate(self, instances: List[Instance]) -> List:
        ev = []
-        for instance in InstanceIterator(instances):
+        for instance in instances:
            for sample in instance.training_data:
                ev += [self.sample_evaluate(instance.features, sample)]
        return ev
--- a/miplearn/components/lazy_dynamic.py
+++ b/miplearn/components/lazy_dynamic.py
@ -13,7 +13,7 @@ from miplearn.classifiers import Classifier
 from miplearn.classifiers.counting import CountingClassifier
 from miplearn.components import classifier_evaluation_dict
 from miplearn.components.component import Component
-from miplearn.extractors import InstanceFeaturesExtractor, InstanceIterator
+from miplearn.extractors import InstanceFeaturesExtractor
 logger = logging.getLogger(__name__)
@ -68,7 +68,7 @@ class DynamicLazyConstraintsComponent(Component):
        self.classifiers = {}
        violation_to_instance_idx = {}
-        for (idx, instance) in enumerate(InstanceIterator(training_instances)):
+        for (idx, instance) in enumerate(training_instances):
            for v in instance.found_violated_lazy_constraints:
                if isinstance(v, list):
                    v = tuple(v)
--- a/miplearn/components/objective.py
+++ b/miplearn/components/objective.py
@ -3,22 +3,14 @@
 #  Released under the modified BSD license. See COPYING.md for more details.
 import logging
-from typing import List, Dict, Union, Optional, Any, TYPE_CHECKING, Tuple, Hashable
+from typing import List, Dict, Any, TYPE_CHECKING, Tuple, Hashable
 import numpy as np
 from sklearn.linear_model import LinearRegression
 from sklearn.metrics import (
    mean_squared_error,
    explained_variance_score,
    max_error,
    mean_absolute_error,
    r2_score,
 )
 from miplearn.classifiers import Regressor
 from miplearn.classifiers.sklearn import ScikitLearnRegressor
 from miplearn.components.component import Component
 from miplearn.extractors import InstanceIterator
 from miplearn.instance import Instance
 from miplearn.types import TrainingSample, LearningSolveStats, Features
--- a/miplearn/components/steps/convert_tight.py
+++ b/miplearn/components/steps/convert_tight.py
@ -13,7 +13,6 @@ from miplearn.classifiers.counting import CountingClassifier
 from miplearn.components import classifier_evaluation_dict
 from miplearn.components.component import Component
 from miplearn.components.steps.drop_redundant import DropRedundantInequalitiesStep
 from miplearn.extractors import InstanceIterator
 logger = logging.getLogger(__name__)
@ -116,7 +115,7 @@ class ConvertTightIneqsIntoEqsStep(Component):
    def _x_train(instances):
        x = {}
        for instance in tqdm(
-            InstanceIterator(instances),
+            instances,
            desc="Extract (drop:x)",
            disable=len(instances) < 5,
        ):
@ -139,7 +138,7 @@ class ConvertTightIneqsIntoEqsStep(Component):
    def y(self, instances):
        y = {}
        for instance in tqdm(
-            InstanceIterator(instances),
+            instances,
            desc="Extract (rlx:conv_ineqs:y)",
            disable=len(instances) < 5,
        ):
--- a/miplearn/components/steps/drop_redundant.py
+++ b/miplearn/components/steps/drop_redundant.py
@ -6,14 +6,13 @@ import logging
 from copy import deepcopy
 import numpy as np
 from tqdm import tqdm
 from p_tqdm import p_umap
 from tqdm import tqdm
 from miplearn.classifiers.counting import CountingClassifier
 from miplearn.components import classifier_evaluation_dict
 from miplearn.components.component import Component
 from miplearn.components.lazy_static import LazyConstraint
 from miplearn.extractors import InstanceIterator
 logger = logging.getLogger(__name__)
@ -131,7 +130,6 @@ class DropRedundantInequalitiesStep(Component):
        def _extract(instance):
            x = {}
            y = {}
            for instance in InstanceIterator([instance]):
            for training_data in instance.training_data:
                for (cid, slack) in training_data["slacks"].items():
                    category = instance.get_constraint_category(cid)
@ -149,13 +147,7 @@ class DropRedundantInequalitiesStep(Component):
            return x, y
        if n_jobs == 1:
-            results = [
+            results = [_extract(i) for i in tqdm(instances, desc="Extract (drop 1/3)")]
                _extract(i)
                for i in tqdm(
                    instances,
                    desc="Extract (drop 1/3)",
                )
            ]
        else:
            results = p_umap(
                _extract,
--- a/miplearn/extractors.py
+++ b/miplearn/extractors.py
@ -2,51 +2,14 @@
 #  Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
 #  Released under the modified BSD license. See COPYING.md for more details.
 import gzip
 import logging
 import pickle
 from abc import ABC, abstractmethod
 from typing import List, Union, cast, IO
 import numpy as np
 from tqdm.auto import tqdm
 from miplearn.instance import Instance
 logger = logging.getLogger(__name__)
 class InstanceIterator:
    def __init__(
        self,
        instances: Union[List[str], List[Instance]],
    ) -> None:
        self.instances = instances
        self.current = 0
    def __iter__(self):
        return self
    def __next__(self) -> Instance:
        if self.current >= len(self.instances):
            raise StopIteration
        result = self.instances[self.current]
        self.current += 1
        if isinstance(result, str):
            logger.debug("Read: %s" % result)
            try:
                if result.endswith(".gz"):
                    with gzip.GzipFile(result, "rb") as gzfile:
                        result = pickle.load(cast(IO[bytes], gzfile))
                else:
                    with open(result, "rb") as file:
                        result = pickle.load(cast(IO[bytes], file))
            except pickle.UnpicklingError:
                raise Exception(f"Invalid instance file: {result}")
        assert isinstance(result, Instance)
        return result
 class Extractor(ABC):
    @abstractmethod
    def extract(self, instances):
@ -77,6 +40,6 @@ class InstanceFeaturesExtractor(Extractor):
                        instance.training_data[0]["LP value"],
                    ]
                )
-                for instance in InstanceIterator(instances)
+                for instance in instances
            ]
        )
--- a/miplearn/instance.py
+++ b/miplearn/instance.py
@ -3,14 +3,34 @@
 #  Released under the modified BSD license. See COPYING.md for more details.
 import gzip
-import json
+import logging
 import os
 import pickle
 from abc import ABC, abstractmethod
-from typing import Any, List, Optional, Hashable
+from typing import Any, List, Optional, Hashable, IO, cast
 import numpy as np
 from miplearn.types import TrainingSample, VarIndex, Features
 logger = logging.getLogger(__name__)
 def write_pickle_gz(obj: Any, filename: str) -> None:
    logger.info(f"Writing: {filename}")
    os.makedirs(os.path.dirname(filename), exist_ok=True)
    with gzip.GzipFile(filename, "wb") as file:
        pickle.dump(obj, cast(IO[bytes], file))
 def read_pickle_gz(filename: str) -> Any:
    logger.info(f"Reading: {filename}")
    with gzip.GzipFile(filename, "rb") as file:
        return pickle.load(cast(IO[bytes], file))
 def write_pickle_gz_multiple(objs: List[Any], dirname: str) -> None:
    for (i, obj) in enumerate(objs):
        write_pickle_gz(obj, f"{dirname}/{i:05d}.pkl.gz")
 # noinspection PyMethodMayBeStatic
 class Instance(ABC):
@ -155,12 +175,116 @@ class Instance(ABC):
    def build_user_cut(self, model, violation):
        pass
-    def load(self, filename):
+    def flush(self) -> None:
-        with gzip.GzipFile(filename, "r") as f:
+        """
-            data = json.loads(f.read().decode("utf-8"))
+        Save any pending changes made to the instance to the underlying data store.
-        self.__dict__ = data
+        """
        pass
 def lazy_load(func):
    def inner(self, *args):
        if self.instance is None:
            self.instance = self._load()
            self.features = self.instance.features
            self.training_data = self.instance.training_data
        return func(self, *args)
    return inner
 class PickleGzInstance(Instance):
    """
    An instance backed by a gzipped pickle file.
    The instance is only loaded to memory after an operation is called (for example,
    `to_model`).
    Parameters
    ----------
    filename: str
        Path of the gzipped pickle file that should be loaded.
    """
    def __init__(self, filename: str) -> None:
        super().__init__()
        assert os.path.exists(filename), f"File not found: {filename}"
        self.instance: Optional[Instance] = None
        self.filename: str = filename
    @lazy_load
    def to_model(self) -> Any:
        assert self.instance is not None
        return self.instance.to_model()
    @lazy_load
    def get_instance_features(self) -> List[float]:
        assert self.instance is not None
        return self.instance.get_instance_features()
    @lazy_load
    def get_variable_features(self, var_name: str, index: VarIndex) -> List[float]:
        assert self.instance is not None
        return self.instance.get_variable_features(var_name, index)
    @lazy_load
    def get_variable_category(
        self,
        var_name: str,
        index: VarIndex,
    ) -> Optional[Hashable]:
        assert self.instance is not None
        return self.instance.get_variable_category(var_name, index)
    @lazy_load
    def get_constraint_features(self, cid: str) -> Optional[List[float]]:
        assert self.instance is not None
        return self.instance.get_constraint_features(cid)
    @lazy_load
    def get_constraint_category(self, cid: str) -> Optional[str]:
        assert self.instance is not None
        return self.instance.get_constraint_category(cid)
    @lazy_load
    def has_static_lazy_constraints(self) -> bool:
        assert self.instance is not None
        return self.instance.has_static_lazy_constraints()
    @lazy_load
    def has_dynamic_lazy_constraints(self):
        assert self.instance is not None
        return self.instance.has_dynamic_lazy_constraints()
    @lazy_load
    def is_constraint_lazy(self, cid: str) -> bool:
        assert self.instance is not None
        return self.instance.is_constraint_lazy(cid)
    @lazy_load
    def find_violated_lazy_constraints(self, model):
        assert self.instance is not None
        return self.instance.find_violated_lazy_constraints(model)
    @lazy_load
    def build_lazy_constraint(self, model, violation):
        assert self.instance is not None
        return self.instance.build_lazy_constraint(model, violation)
    @lazy_load
    def find_violated_user_cuts(self, model):
        assert self.instance is not None
        return self.instance.find_violated_user_cuts(model)
    @lazy_load
    def build_user_cut(self, model, violation):
        assert self.instance is not None
        return self.instance.build_user_cut(model, violation)
    def _load(self) -> Instance:
        obj = read_pickle_gz(self.filename)
        assert isinstance(obj, Instance)
        return obj
-    def dump(self, filename):
+    def flush(self) -> None:
-        data = json.dumps(self.__dict__, indent=2).encode("utf-8")
+        write_pickle_gz(self.instance, self.filename)
        with gzip.GzipFile(filename, "w") as f:
            f.write(data)
--- a/miplearn/solvers/learning.py
+++ b/miplearn/solvers/learning.py
@ -18,7 +18,7 @@ from miplearn.components.lazy_dynamic import DynamicLazyConstraintsComponent
 from miplearn.components.objective import ObjectiveValueComponent
 from miplearn.components.primal import PrimalSolutionComponent
 from miplearn.features import FeaturesExtractor
-from miplearn.instance import Instance
+from miplearn.instance import Instance, PickleGzInstance
 from miplearn.solvers import _RedirectOutput
 from miplearn.solvers.internal import InternalSolver
 from miplearn.solvers.pyomo.gurobi import GurobiPyomoSolver
@ -30,8 +30,7 @@ logger = logging.getLogger(__name__)
 class _GlobalVariables:
    def __init__(self) -> None:
        self.solver: Optional[LearningSolver] = None
-        self.instances: Optional[Union[List[str], List[Instance]]] = None
+        self.instances: Optional[List[Instance]] = None
        self.output_filenames: Optional[List[str]] = None
        self.discard_outputs: bool = False
@ -44,16 +43,10 @@ _GLOBAL = [_GlobalVariables()]
 def _parallel_solve(idx):
    solver = _GLOBAL[0].solver
    instances = _GLOBAL[0].instances
    output_filenames = _GLOBAL[0].output_filenames
    discard_outputs = _GLOBAL[0].discard_outputs
    if output_filenames is None:
        output_filename = None
    else:
        output_filename = output_filenames[idx]
    try:
        stats = solver.solve(
            instances[idx],
            output_filename=output_filename,
            discard_output=discard_outputs,
        )
        return stats, instances[idx]
@ -129,30 +122,12 @@ class LearningSolver:
    def _solve(
        self,
-        instance: Union[Instance, str],
+        instance: Instance,
        model: Any = None,
        output_filename: Optional[str] = None,
        discard_output: bool = False,
        tee: bool = False,
    ) -> LearningSolveStats:
        # Load instance from file, if necessary
        filename = None
        fileformat = None
        file: Union[BinaryIO, gzip.GzipFile]
        if isinstance(instance, str):
            filename = instance
            logger.info("Reading: %s" % filename)
            if filename.endswith(".gz"):
                fileformat = "pickle-gz"
                with gzip.GzipFile(filename, "rb") as file:
                    instance = pickle.load(cast(IO[bytes], file))
            else:
                fileformat = "pickle"
                with open(filename, "rb") as file:
                    instance = pickle.load(cast(IO[bytes], file))
        assert isinstance(instance, Instance)
        # Generate model
        if model is None:
            with _RedirectOutput([]):
@ -262,23 +237,15 @@ class LearningSolver:
            component.after_solve_mip(*callback_args)
        # Write to file, if necessary
-        if not discard_output and filename is not None:
+        if not discard_output:
-            if output_filename is None:
+            instance.flush()
-                output_filename = filename
+
            logger.info("Writing: %s" % output_filename)
            if fileformat == "pickle":
                with open(output_filename, "wb") as file:
                    pickle.dump(instance, cast(IO[bytes], file))
            else:
                with gzip.GzipFile(output_filename, "wb") as file:
                    pickle.dump(instance, cast(IO[bytes], file))
        return stats
    def solve(
        self,
-        instance: Union[Instance, str],
+        instance: Instance,
        model: Any = None,
        output_filename: Optional[str] = None,
        discard_output: bool = False,
        tee: bool = False,
    ) -> LearningSolveStats:
@ -298,14 +265,10 @@ class LearningSolver:
        Parameters
        ----------
-        instance: Union[Instance, str]
+        instance: Instance
-            The instance to be solved, or a filename.
+            The instance to be solved.
        model: Any
            The corresponding Pyomo model. If not provided, it will be created.
        output_filename: Optional[str]
            If instance is a filename and output_filename is provided, write the
            modified instance to this file, instead of replacing the original one. If
            output_filename is None (the default), modified the original file in-place.
        discard_output: bool
            If True, do not write the modified instances anywhere; simply discard
            them. Useful during benchmarking.
@ -325,30 +288,28 @@ class LearningSolver:
            details.
        """
        if self.simulate_perfect:
-            if not isinstance(instance, str):
+            if not isinstance(instance, PickleGzInstance):
                raise Exception("Not implemented")
            with tempfile.NamedTemporaryFile(suffix=os.path.basename(instance)) as tmp:
            self._solve(
                instance=instance,
                model=model,
                    output_filename=tmp.name,
                tee=tee,
                discard_output=True,
            )
-                self.fit([tmp.name])
+            self.fit([instance])
            instance.instance = None
        return self._solve(
            instance=instance,
            model=model,
            output_filename=output_filename,
            discard_output=discard_output,
            tee=tee,
        )
    def parallel_solve(
        self,
-        instances: Union[List[str], List[Instance]],
+        instances: List[Instance],
        n_jobs: int = 4,
        label: str = "Solve",
        output_filenames: Optional[List[str]] = None,
        discard_outputs: bool = False,
    ) -> List[LearningSolveStats]:
        """
@ -361,17 +322,13 @@ class LearningSolver:
        Parameters
        ----------
        output_filenames: Optional[List[str]]
            If instances are file names and output_filenames is provided, write the
            modified instances to these files, instead of replacing the original
            files. If output_filenames is None, modifies the instances in-place.
        discard_outputs: bool
            If True, do not write the modified instances anywhere; simply discard
            them instead. Useful during benchmarking.
        label: str
            Label to show in the progress bar.
-        instances: Union[List[str], List[Instance]]
+        instances: List[Instance]
-            The instances to be solved
+            The instances to be solved.
        n_jobs: int
            Number of instances to solve in parallel at a time.
@ -388,7 +345,6 @@ class LearningSolver:
            self.internal_solver = None
            self._silence_miplearn_logger()
            _GLOBAL[0].solver = self
            _GLOBAL[0].output_filenames = output_filenames
            _GLOBAL[0].instances = instances
            _GLOBAL[0].discard_outputs = discard_outputs
            results = p_map(
@ -405,7 +361,7 @@ class LearningSolver:
            self._restore_miplearn_logger()
            return stats
-    def fit(self, training_instances: Union[List[str], List[Instance]]) -> None:
+    def fit(self, training_instances: List[Instance]) -> None:
        logger.debug("Fitting...")
        if len(training_instances) == 0:
            return
--- a/tests/solvers/test_learning_solver.py
+++ b/tests/solvers/test_learning_solver.py
@ -8,6 +8,7 @@ import pickle
 import tempfile
 import os
 from miplearn.instance import PickleGzInstance, write_pickle_gz, read_pickle_gz
 from miplearn.solvers.gurobi import GurobiSolver
 from miplearn.solvers.learning import LearningSolver
 from . import _get_knapsack_instance, get_internal_solvers
@ -78,61 +79,40 @@ def test_parallel_solve():
 def test_solve_fit_from_disk():
    for internal_solver in get_internal_solvers():
        # Create instances and pickle them
-        filenames = []
+        instances = []
        for k in range(3):
            instance = _get_knapsack_instance(internal_solver)
            with tempfile.NamedTemporaryFile(suffix=".pkl", delete=False) as file:
-                filenames += [file.name]
+                instances += [PickleGzInstance(file.name)]
-                pickle.dump(instance, file)
+                write_pickle_gz(instance, file.name)
        # Test: solve
        solver = LearningSolver(solver=internal_solver)
-        solver.solve(filenames[0])
+        solver.solve(instances[0])
-        with open(filenames[0], "rb") as file:
+        instance_loaded = read_pickle_gz(instances[0].filename)
-            instance = pickle.load(file)
+        assert len(instance_loaded.training_data) > 0
            assert len(instance.training_data) > 0
        # Test: parallel_solve
-        solver.parallel_solve(filenames)
+        solver.parallel_solve(instances)
-        for filename in filenames:
+        for instance in instances:
-            with open(filename, "rb") as file:
+            instance_loaded = read_pickle_gz(instance.filename)
                instance = pickle.load(file)
            assert len(instance.training_data) > 0
        # Test: solve (with specified output)
        output = [f + ".out" for f in filenames]
        solver.solve(
            filenames[0],
            output_filename=output[0],
        )
        assert os.path.isfile(output[0])
        # Test: parallel_solve (with specified output)
        solver.parallel_solve(
            filenames,
            output_filenames=output,
        )
        for filename in output:
            assert os.path.isfile(filename)
        # Delete temporary files
-        for filename in filenames:
+        for instance in instances:
-            os.remove(filename)
+            os.remove(instance.filename)
        for filename in output:
            os.remove(filename)
 def test_simulate_perfect():
    internal_solver = GurobiSolver
    instance = _get_knapsack_instance(internal_solver)
    with tempfile.NamedTemporaryFile(suffix=".pkl", delete=False) as tmp:
-        pickle.dump(instance, tmp)
+        write_pickle_gz(instance, tmp.name)
        tmp.flush()
        solver = LearningSolver(
            solver=internal_solver,
            simulate_perfect=True,
        )
-        stats = solver.solve(tmp.name)
+        stats = solver.solve(PickleGzInstance(tmp.name))
        assert stats["Lower bound"] == stats["Objective: Predicted lower bound"]
--- a/tests/test_instance.py
+++ b/tests/test_instance.py
@ -0,0 +1,16 @@
 #  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
 #  Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
 #  Released under the modified BSD license. See COPYING.md for more details.
 import tempfile
 from miplearn import GurobiSolver
 from miplearn.instance import write_pickle_gz, PickleGzInstance
 from tests.fixtures.knapsack import get_knapsack_instance
 def test_pickled() -> None:
    original = get_knapsack_instance(GurobiSolver())
    file = tempfile.NamedTemporaryFile()
    write_pickle_gz(original, file.name)
    pickled = PickleGzInstance(file.name)
    assert pickled.to_model() is not None