Merge branch 'feature/files' into dev

2025-12-06 09:28:51 -06:00 · 2020-12-04 09:41:23 -06:00
parent 54d80bfa85 388b10c63c
commit 0b41c882ff
7 changed files with 246 additions and 68 deletions
--- a/miplearn/benchmark.py
+++ b/miplearn/benchmark.py
@@ -37,7 +37,8 @@ class BenchmarkRunner:
        for (solver_name, solver) in self.solvers.items():
            results = solver.parallel_solve(trials,
                                            n_jobs=n_jobs,
-                                            label="Solve (%s)" % solver_name)
+                                            label="Solve (%s)" % solver_name,
+                                            output=None)
            for i in range(len(trials)):
                idx = (i % len(instances)) + index_offset
                self._push_result(results[i],
--- a/miplearn/components/primal.py
+++ b/miplearn/components/primal.py
@@ -53,7 +53,6 @@ class PrimalSolutionComponent(Component):

        for category in tqdm(features.keys(),
                             desc="Fit (primal)",
-                             disable=not sys.stdout.isatty(),
                            ):
            x_train = features[category]
            for label in [0, 1]:
@@ -110,7 +109,6 @@ class PrimalSolutionComponent(Component):
              "Fix one": {}}
        for instance_idx in tqdm(range(len(instances)),
                                 desc="Evaluate (primal)",
-                                 disable=not sys.stdout.isatty(),
                                ):
            instance = instances[instance_idx]
            solution_actual = instance.solution
--- a/miplearn/components/relaxation.py
+++ b/miplearn/components/relaxation.py
@@ -4,6 +4,8 @@

 import logging
 import sys
+import numpy as np
+
 from copy import deepcopy

 from tqdm import tqdm
@@ -12,6 +14,7 @@ from miplearn import Component
 from miplearn.classifiers.counting import CountingClassifier
 from miplearn.components import classifier_evaluation_dict
 from miplearn.components.lazy_static import LazyConstraint
+from miplearn.extractors import InstanceIterator

 logger = logging.getLogger(__name__)

@@ -83,16 +86,12 @@ class RelaxationComponent(Component):
        instance.slacks = solver.internal_solver.get_constraint_slacks()

    def fit(self, training_instances):
-        training_instances = [instance
-                              for instance in training_instances
-                              if hasattr(instance, "slacks")]
        logger.debug("Extracting x and y...")
        x = self.x(training_instances)
        y = self.y(training_instances)
        logger.debug("Fitting...")
        for category in tqdm(x.keys(),
-                             desc="Fit (relaxation)",
-                             disable=not sys.stdout.isatty()):
+                             desc="Fit (relaxation)"):
            if category not in self.classifiers:
                self.classifiers[category] = deepcopy(self.classifier_prototype)
            self.classifiers[category].fit(x[category], y[category])
@@ -103,7 +102,9 @@ class RelaxationComponent(Component):
          return_constraints=False):
        x = {}
        constraints = {}
-        for instance in instances:
+        for instance in tqdm(InstanceIterator(instances),
+                             desc="Extract (relaxation:x)",
+                             disable=len(instances) < 5):
            if constraint_ids is not None:
                cids = constraint_ids
            else:
@@ -124,7 +125,9 @@ class RelaxationComponent(Component):

    def y(self, instances):
        y = {}
-        for instance in instances:
+        for instance in tqdm(InstanceIterator(instances),
+                             desc="Extract (relaxation:y)",
+                             disable=len(instances) < 5):
            for (cid, slack) in instance.slacks.items():
                category = instance.get_constraint_category(cid)
                if category is None:
@@ -143,7 +146,7 @@ class RelaxationComponent(Component):
            if category not in self.classifiers:
                continue
            y[category] = []
-            # x_cat = np.array(x_cat)
+            #x_cat = np.array(x_cat)
            proba = self.classifiers[category].predict_proba(x_cat)
            for i in range(len(proba)):
                if proba[i][1] >= self.threshold:
--- a/miplearn/extractors.py
+++ b/miplearn/extractors.py
@@ -3,14 +3,41 @@
 #  Released under the modified BSD license. See COPYING.md for more details.

 import logging
-from abc import ABC, abstractmethod
+import pickle
+import gzip

 import numpy as np
-from tqdm import tqdm
+
+from tqdm.auto import tqdm
+from abc import ABC, abstractmethod

 logger = logging.getLogger(__name__)


+class InstanceIterator:
+    def __init__(self, instances):
+        self.instances = instances
+        self.current = 0
+        
+    def __iter__(self):
+        return self
+    
+    def __next__(self):
+        if self.current >= len(self.instances):
+            raise StopIteration
+        result = self.instances[self.current]
+        self.current += 1
+        if isinstance(result, str):
+            logger.info("Read: %s" % result)
+            if result.endswith(".gz"):
+                with gzip.GzipFile(result, "rb") as file:
+                    result = pickle.load(file)
+            else:
+                with open(result, "rb") as file:
+                    result = pickle.load(file)
+        return result
+
+
 class Extractor(ABC):
    @abstractmethod
    def extract(self, instances,):
@@ -34,7 +61,7 @@ class Extractor(ABC):
 class VariableFeaturesExtractor(Extractor):
    def extract(self, instances):
        result = {}
-        for instance in tqdm(instances,
+        for instance in tqdm(InstanceIterator(instances),
                             desc="Extract (vars)",
                             disable=len(instances) < 5):
            instance_features = instance.get_instance_features()
@@ -59,7 +86,7 @@ class SolutionExtractor(Extractor):
        
    def extract(self, instances):
        result = {}
-        for instance in tqdm(instances,
+        for instance in tqdm(InstanceIterator(instances),
                             desc="Extract (solution)",
                             disable=len(instances) < 5):
            var_split = self.split_variables(instance)
@@ -87,7 +114,7 @@ class InstanceFeaturesExtractor(Extractor):
                instance.get_instance_features(),
                instance.lp_value,
            ])
-            for instance in instances
+            for instance in InstanceIterator(instances)
        ])
    
    
@@ -98,8 +125,11 @@ class ObjectiveValueExtractor(Extractor):
        
    def extract(self, instances):
        if self.kind == "lower bound":
-            return np.array([[instance.lower_bound] for instance in instances])
+            return np.array([[instance.lower_bound]
+                             for instance in InstanceIterator(instances)])
        if self.kind == "upper bound":
-            return np.array([[instance.upper_bound] for instance in instances])
+            return np.array([[instance.upper_bound]
+                             for instance in InstanceIterator(instances)])
        if self.kind == "lp":
-            return np.array([[instance.lp_value] for instance in instances])
+            return np.array([[instance.lp_value]
+                             for instance in InstanceIterator(instances)])
--- a/miplearn/solvers/learning.py
+++ b/miplearn/solvers/learning.py
@@ -3,6 +3,11 @@
 #  Released under the modified BSD license. See COPYING.md for more details.

 import logging
+import pickle
+import os
+import tempfile
+import gzip
+
 from copy import deepcopy
 from typing import Optional, List
 from p_tqdm import p_map
@@ -20,26 +25,21 @@ logger = logging.getLogger(__name__)
 # Global memory for multiprocessing
 SOLVER = [None]  # type: List[Optional[LearningSolver]]
 INSTANCES = [None]  # type: List[Optional[dict]]
+OUTPUTS = [None]


-def _parallel_solve(instance_idx):
+def _parallel_solve(idx):
    solver = deepcopy(SOLVER[0])
-    instance = INSTANCES[0][instance_idx]
-    if not hasattr(instance, "found_violated_lazy_constraints"):
-        instance.found_violated_lazy_constraints = []
-    if not hasattr(instance, "found_violated_user_cuts"):
-        instance.found_violated_user_cuts = []
-    if not hasattr(instance, "slacks"):
-        instance.slacks = {}
-    solver_results = solver.solve(instance)
-    return {
-        "solver_results": solver_results,
-        "solution": instance.solution,
-        "lp_solution": instance.lp_solution,
-        "found_violated_lazy_constraints": instance.found_violated_lazy_constraints,
-        "found_violated_user_cuts": instance.found_violated_user_cuts,
-        "slacks": instance.slacks
-    }
+    if OUTPUTS[0] is None:
+        output = None
+    elif len(OUTPUTS[0]) == 0:
+        output = ""
+    else:
+        output = OUTPUTS[0][idx]
+    instance = INSTANCES[0][idx]
+    print(instance)
+    stats = solver.solve(instance, output=output)
+    return (stats, instance)


 class LearningSolver:
@@ -145,31 +145,43 @@ class LearningSolver:
    def solve(self,
              instance,
              model=None,
+              output="",
              tee=False):
        """
        Solves the given instance. If trained machine-learning models are
        available, they will be used to accelerate the solution process.
+        
+        The argument `instance` may be either an Instance object or a
+        filename pointing to a pickled Instance object. 

        This method modifies the instance object. Specifically, the following
        properties are set:
+        
            - instance.lp_solution
            - instance.lp_value
            - instance.lower_bound
            - instance.upper_bound
            - instance.solution
            - instance.solver_log
+            
        Additional solver components may set additional properties. Please
-        see their documentation for more details.
+        see their documentation for more details. If a filename is provided,
+        then the file is modified in-place. That is, the original file is
+        overwritten.

        If `solver.solve_lp_first` is False, the properties lp_solution and
        lp_value will be set to dummy values.

        Parameters
        ----------
-        instance: miplearn.Instance
-            The instance to be solved
+        instance: miplearn.Instance or str
+            The instance to be solved, or a filename.
        model: pyomo.core.ConcreteModel
            The corresponding Pyomo model. If not provided, it will be created.
+        output: str or None
+            If instance is a filename and output is provided, write the modified
+            instance to this file, instead of replacing the original file. If
+            output is None, discard modified instance.
        tee: bool
            If true, prints solver log to screen.

@@ -185,7 +197,21 @@ class LearningSolver:
            "Predicted UB". See the documentation of each component for more
            details.
        """
-
+        
+        filename = None
+        fileformat = None
+        if isinstance(instance, str):
+            filename = instance
+            logger.info("Reading: %s" % filename)
+            if filename.endswith(".gz"):
+                fileformat = "pickle-gz"
+                with gzip.GzipFile(filename, "rb") as file:
+                    instance = pickle.load(file)
+            else:
+                fileformat = "pickle"
+                with open(filename, "rb") as file:
+                    instance = pickle.load(file)
+                
        if model is None:
            model = instance.to_model()

@@ -236,35 +262,60 @@ class LearningSolver:
        logger.debug("Calling after_solve callbacks...")
        for component in self.components.values():
            component.after_solve(self, instance, model, results)
+            
+        if filename is not None and output is not None:
+            output_filename = output
+            if len(output) == 0:
+                output_filename = filename
+            logger.info("Writing: %s" % output_filename)
+            if fileformat == "pickle":
+                with open(output_filename, "wb") as file:
+                    pickle.dump(instance, file)
+            else:
+                with gzip.GzipFile(output_filename, "wb") as file:
+                    pickle.dump(instance, file)

        return results

-    def parallel_solve(self,
-                       instances,
-                       n_jobs=4,
-                       label="Solve"):
-
+    def parallel_solve(self, instances, n_jobs=4, label="Solve", output=[]):
+        """
+        Solves multiple instances in parallel.
+        
+        This method is equivalent to calling `solve` for each item on the list,
+        but it processes multiple instances at the same time. Like `solve`, this
+        method modifies each instance in place. Also like `solve`, a list of
+        filenames may be provided.
+        
+        Parameters
+        ----------
+        instances: [miplearn.Instance] or [str]
+            The instances to be solved
+        n_jobs: int
+            Number of instances to solve in parallel at a time.
+            
+        Returns
+        -------
+        Returns a list of dictionaries, with one entry for each provided instance.
+        This dictionary is the same you would obtain by calling:
+        
+            [solver.solve(p) for p in instances]
+        
+        """
        self.internal_solver = None
        self._silence_miplearn_logger()
        SOLVER[0] = self
+        OUTPUTS[0] = output
        INSTANCES[0] = instances
-        p_map_results = p_map(_parallel_solve,
-                              list(range(len(instances))),
-                              num_cpus=n_jobs,
-                              desc=label)
-        results = [p["solver_results"] for p in p_map_results]
-        for (idx, r) in enumerate(p_map_results):
-            instances[idx].solution = r["solution"]
-            instances[idx].lp_solution = r["lp_solution"]
-            instances[idx].lp_value = r["solver_results"]["LP value"]
-            instances[idx].lower_bound = r["solver_results"]["Lower bound"]
-            instances[idx].upper_bound = r["solver_results"]["Upper bound"]
-            instances[idx].found_violated_lazy_constraints = r["found_violated_lazy_constraints"]
-            instances[idx].found_violated_user_cuts = r["found_violated_user_cuts"]
-            instances[idx].slacks = r["slacks"]
-            instances[idx].solver_log = r["solver_results"]["Log"]
+        results = p_map(_parallel_solve,
+                        list(range(len(instances))),
+                        num_cpus=n_jobs,
+                        desc=label)
+        stats = []
+        for (idx, (s, instance)) in enumerate(results):
+            stats.append(s)
+            instances[idx] = instance
        self._restore_miplearn_logger()
-        return results
+        return stats

    def fit(self, training_instances):
        if len(training_instances) == 0:
--- a/miplearn/solvers/tests/test_learning_solver.py
+++ b/miplearn/solvers/tests/test_learning_solver.py
@@ -5,6 +5,7 @@
 import logging
 import pickle
 import tempfile
+import os

 from miplearn import DynamicLazyConstraintsComponent
 from miplearn import LearningSolver
@@ -65,3 +66,45 @@ def test_add_components():
    solver.add(DynamicLazyConstraintsComponent())
    assert len(solver.components) == 1
    assert "DynamicLazyConstraintsComponent" in solver.components
+
+
+def test_solve_fit_from_disk():
+    for internal_solver in _get_internal_solvers():
+        # Create instances and pickle them
+        filenames = []
+        for k in range(3):
+            instance = _get_instance(internal_solver)
+            with tempfile.NamedTemporaryFile(suffix=".pkl",
+                                             delete=False) as file:
+                filenames += [file.name]
+                pickle.dump(instance, file)
+        
+        # Test: solve
+        solver = LearningSolver(solver=internal_solver)
+        solver.solve(filenames[0])
+        with open(filenames[0], "rb") as file:
+            instance = pickle.load(file)
+            assert hasattr(instance, "solution")
+            
+        # Test: parallel_solve
+        solver.parallel_solve(filenames)
+        for filename in filenames:
+            with open(filename, "rb") as file:
+                instance = pickle.load(file)
+                assert hasattr(instance, "solution")
+                
+        # Test: solve (with specified output)
+        output = [f + ".out" for f in filenames]
+        solver.solve(filenames[0], output=output[0])
+        assert os.path.isfile(output[0])
+            
+        # Test: parallel_solve (with specified output)
+        solver.parallel_solve(filenames, output=output)
+        for filename in output:
+            assert os.path.isfile(filename)
+            
+        # Delete temporary files
+        for filename in filenames:
+            os.remove(filename)
+        for filename in output:
+            os.remove(filename)