diff --git a/miplearn/benchmark.py b/miplearn/benchmark.py index acbe895..b4ab2b3 100644 --- a/miplearn/benchmark.py +++ b/miplearn/benchmark.py @@ -10,7 +10,8 @@ import pandas as pd from miplearn.instance import Instance from miplearn.solvers.learning import LearningSolver -from miplearn.types import LearningSolveStats + +logger = logging.getLogger(__name__) class BenchmarkRunner: @@ -110,6 +111,7 @@ class BenchmarkRunner: """ for (solver_name, solver) in self.solvers.items(): + logger.debug(f"Fitting {solver_name}...") solver.fit(instances) def _silence_miplearn_logger(self) -> None: diff --git a/miplearn/classifiers/__init__.py b/miplearn/classifiers/__init__.py index fc262f7..cfa8f64 100644 --- a/miplearn/classifiers/__init__.py +++ b/miplearn/classifiers/__init__.py @@ -34,7 +34,11 @@ class Classifier(ABC): """ assert isinstance(x_train, np.ndarray) assert isinstance(y_train, np.ndarray) - assert x_train.dtype in [np.float16, np.float32, np.float64] + assert x_train.dtype in [ + np.float16, + np.float32, + np.float64, + ], f"x_train.dtype shoule be float. Found {x_train.dtype} instead." assert y_train.dtype == np.bool8 assert len(x_train.shape) == 2 assert len(y_train.shape) == 2 @@ -67,7 +71,10 @@ class Classifier(ABC): assert isinstance(x_test, np.ndarray) assert len(x_test.shape) == 2 (n_samples, n_features_x) = x_test.shape - assert n_features_x == self.n_features + assert n_features_x == self.n_features, ( + f"Test and training data have different number of " + f"features: {n_features_x} != {self.n_features}" + ) return np.ndarray([]) diff --git a/miplearn/components/primal.py b/miplearn/components/primal.py index b30f985..b0ef043 100644 --- a/miplearn/components/primal.py +++ b/miplearn/components/primal.py @@ -24,11 +24,9 @@ from miplearn.classifiers.adaptive import AdaptiveClassifier from miplearn.classifiers.threshold import MinPrecisionThreshold, Threshold from miplearn.components import classifier_evaluation_dict from miplearn.components.component import Component -from miplearn.extractors import InstanceIterator from miplearn.instance import Instance from miplearn.types import ( TrainingSample, - VarIndex, Solution, LearningSolveStats, Features, @@ -70,30 +68,33 @@ class PrimalSolutionComponent(Component): self._n_one = 0 def before_solve_mip(self, solver, instance, model): - pass - # if len(self.thresholds) > 0: - # logger.info("Predicting primal solution...") - # solution = self.predict(instance) - # - # # Collect prediction statistics - # self._n_free = 0 - # self._n_zero = 0 - # self._n_one = 0 - # for (var, var_dict) in solution.items(): - # for (idx, value) in var_dict.items(): - # if value is None: - # self._n_free += 1 - # else: - # if value < 0.5: - # self._n_zero += 1 - # else: - # self._n_one += 1 - # - # # Provide solution to the solver - # if self.mode == "heuristic": - # solver.internal_solver.fix(solution) - # else: - # solver.internal_solver.set_warm_start(solution) + if len(self.thresholds) > 0: + logger.info("Predicting primal solution...") + solution = self.predict(instance.features, instance.training_data[-1]) + + # Collect prediction statistics + self._n_free = 0 + self._n_zero = 0 + self._n_one = 0 + for (var, var_dict) in solution.items(): + for (idx, value) in var_dict.items(): + if value is None: + self._n_free += 1 + else: + if value < 0.5: + self._n_zero += 1 + else: + self._n_one += 1 + logger.info( + f"Predicted: {self._n_free} free, {self._n_zero} fix-zero, " + f"{self._n_one} fix-one" + ) + + # Provide solution to the solver + if self.mode == "heuristic": + solver.internal_solver.fix(solution) + else: + solver.internal_solver.set_warm_start(solution) def after_solve_mip( self, @@ -120,27 +121,29 @@ class PrimalSolutionComponent(Component): self.classifiers[category] = clf self.thresholds[category] = thr - def predict(self, instance: Instance) -> Solution: - assert len(instance.training_data) > 0 - sample = instance.training_data[-1] - + def predict( + self, + features: Features, + sample: TrainingSample, + ) -> Solution: # Initialize empty solution solution: Solution = {} - for (var_name, var_dict) in instance.features["Variables"].items(): + for (var_name, var_dict) in features["Variables"].items(): solution[var_name] = {} for idx in var_dict.keys(): solution[var_name][idx] = None # Compute y_pred - x = self.x_sample(instance.features, sample) + x = self.x_sample(features, sample) y_pred = {} for category in x.keys(): assert category in self.classifiers, ( f"Classifier for category {category} has not been trained. " f"Please call component.fit before component.predict." ) - proba = self.classifiers[category].predict_proba(x[category]) - thr = self.thresholds[category].predict(x[category]) + xc = np.array(x[category]) + proba = self.classifiers[category].predict_proba(xc) + thr = self.thresholds[category].predict(xc) y_pred[category] = np.vstack( [ proba[:, 0] > thr[0], @@ -150,7 +153,7 @@ class PrimalSolutionComponent(Component): # Convert y_pred into solution category_offset: Dict[Hashable, int] = {cat: 0 for cat in x.keys()} - for (var_name, var_dict) in instance.features["Variables"].items(): + for (var_name, var_dict) in features["Variables"].items(): for (idx, var_features) in var_dict.items(): category = var_features["Category"] offset = category_offset[category] @@ -250,8 +253,9 @@ class PrimalSolutionComponent(Component): if category not in x.keys(): x[category] = [] y[category] = [] - f = var_features["User features"] - assert f is not None + f: List[float] = [] + assert var_features["User features"] is not None + f += var_features["User features"] if "LP solution" in sample and sample["LP solution"] is not None: lp_value = sample["LP solution"][var_name][idx] if lp_value is not None: diff --git a/miplearn/problems/tsp.py b/miplearn/problems/tsp.py index 6b1032f..439c489 100644 --- a/miplearn/problems/tsp.py +++ b/miplearn/problems/tsp.py @@ -157,10 +157,10 @@ class TravelingSalesmanInstance(Instance): return model def get_instance_features(self): - return [1] + return [0.0] def get_variable_features(self, var_name, index): - return [1] + return [0.0] def get_variable_category(self, var_name, index): return index diff --git a/miplearn/solvers/learning.py b/miplearn/solvers/learning.py index ab9757e..d16bf6d 100644 --- a/miplearn/solvers/learning.py +++ b/miplearn/solvers/learning.py @@ -373,25 +373,28 @@ class LearningSolver: The list is the same you would obtain by calling `[solver.solve(p) for p in instances]` """ - self.internal_solver = None - self._silence_miplearn_logger() - _GLOBAL[0].solver = self - _GLOBAL[0].output_filenames = output_filenames - _GLOBAL[0].instances = instances - _GLOBAL[0].discard_outputs = discard_outputs - results = p_map( - _parallel_solve, - list(range(len(instances))), - num_cpus=n_jobs, - desc=label, - ) - results = [r for r in results if r[0]] - stats = [] - for (idx, (s, instance)) in enumerate(results): - stats.append(s) - instances[idx] = instance - self._restore_miplearn_logger() - return stats + if n_jobs == 1: + return [self.solve(p) for p in instances] + else: + self.internal_solver = None + self._silence_miplearn_logger() + _GLOBAL[0].solver = self + _GLOBAL[0].output_filenames = output_filenames + _GLOBAL[0].instances = instances + _GLOBAL[0].discard_outputs = discard_outputs + results = p_map( + _parallel_solve, + list(range(len(instances))), + num_cpus=n_jobs, + desc=label, + ) + results = [r for r in results if r[0]] + stats = [] + for (idx, (s, instance)) in enumerate(results): + stats.append(s) + instances[idx] = instance + self._restore_miplearn_logger() + return stats def fit(self, training_instances: Union[List[str], List[Instance]]) -> None: if len(training_instances) == 0: diff --git a/tests/components/test_primal.py b/tests/components/test_primal.py index c184231..7f713d9 100644 --- a/tests/components/test_primal.py +++ b/tests/components/test_primal.py @@ -2,7 +2,6 @@ # Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved. # Released under the modified BSD license. See COPYING.md for more details. -from typing import cast from unittest.mock import Mock import numpy as np @@ -11,7 +10,6 @@ from numpy.testing import assert_array_equal from miplearn import Classifier from miplearn.classifiers.threshold import Threshold from miplearn.components.primal import PrimalSolutionComponent -from miplearn.instance import Instance from miplearn.types import TrainingSample, Features @@ -142,8 +140,7 @@ def test_predict() -> None: ) thr = Mock(spec=Threshold) thr.predict = Mock(return_value=[0.75, 0.75]) - instance = cast(Instance, Mock(spec=Instance)) - instance.features = { + features: Features = { "Variables": { "x": { 0: { @@ -161,33 +158,23 @@ def test_predict() -> None: } } } - instance.training_data = [ - { - "LP solution": { - "x": { - 0: 0.1, - 1: 0.5, - 2: 0.9, - } + sample: TrainingSample = { + "LP solution": { + "x": { + 0: 0.1, + 1: 0.5, + 2: 0.9, } } - ] - x = { - "default": np.array( - [ - [0.0, 0.0, 0.1], - [0.0, 2.0, 0.5], - [2.0, 0.0, 0.9], - ] - ) } + x = PrimalSolutionComponent.x_sample(features, sample) comp = PrimalSolutionComponent() comp.classifiers = {"default": clf} comp.thresholds = {"default": thr} - solution_actual = comp.predict(instance) + solution_actual = comp.predict(features, sample) clf.predict_proba.assert_called_once() - thr.predict.assert_called_once() assert_array_equal(x["default"], clf.predict_proba.call_args[0][0]) + thr.predict.assert_called_once() assert_array_equal(x["default"], thr.predict.call_args[0][0]) assert solution_actual == { "x": { @@ -196,3 +183,30 @@ def test_predict() -> None: 2: 1.0, } } + + +def test_fit_xy(): + comp = PrimalSolutionComponent( + classifier=lambda: Mock(spec=Classifier), + threshold=lambda: Mock(spec=Threshold), + ) + x = { + "type-a": np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]), + "type-b": np.array([[7.0, 8.0, 9.0]]), + } + y = { + "type-a": np.array([[True, False], [False, True]]), + "type-b": np.array([[True, False]]), + } + comp.fit_xy(x, y) + for category in ["type-a", "type-b"]: + assert category in comp.classifiers + assert category in comp.thresholds + clf = comp.classifiers[category] + clf.fit.assert_called_once() + assert_array_equal(x[category], clf.fit.call_args[0][0]) + assert_array_equal(y[category], clf.fit.call_args[0][1]) + thr = comp.thresholds[category] + thr.fit.assert_called_once() + assert_array_equal(x[category], thr.fit.call_args[0][1]) + assert_array_equal(y[category], thr.fit.call_args[0][2]) diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py index 58083d4..0baca38 100644 --- a/tests/test_benchmark.py +++ b/tests/test_benchmark.py @@ -12,24 +12,25 @@ from miplearn.solvers.learning import LearningSolver def test_benchmark(): - # Generate training and test instances - generator = MaxWeightStableSetGenerator(n=randint(low=25, high=26)) - train_instances = generator.generate(5) - test_instances = generator.generate(3) - - # Training phase... - training_solver = LearningSolver() - training_solver.parallel_solve(train_instances, n_jobs=10) - - # Test phase... - test_solvers = { - "Strategy A": LearningSolver(), - "Strategy B": LearningSolver(), - } - benchmark = BenchmarkRunner(test_solvers) - benchmark.fit(train_instances) - benchmark.parallel_solve(test_instances, n_jobs=2, n_trials=2) - assert benchmark.results.values.shape == (12, 18) - - benchmark.write_csv("/tmp/benchmark.csv") - assert os.path.isfile("/tmp/benchmark.csv") + for n_jobs in [1, 4]: + # Generate training and test instances + generator = MaxWeightStableSetGenerator(n=randint(low=25, high=26)) + train_instances = generator.generate(5) + test_instances = generator.generate(3) + + # Solve training instances + training_solver = LearningSolver() + training_solver.parallel_solve(train_instances, n_jobs=n_jobs) + + # Benchmark + test_solvers = { + "Strategy A": LearningSolver(), + "Strategy B": LearningSolver(), + } + benchmark = BenchmarkRunner(test_solvers) + benchmark.fit(train_instances) + benchmark.parallel_solve(test_instances, n_jobs=n_jobs, n_trials=2) + assert benchmark.results.values.shape == (12, 18) + + benchmark.write_csv("/tmp/benchmark.csv") + assert os.path.isfile("/tmp/benchmark.csv")