From b555d5739caf0b69f9328c1974493b4a1d299022 Mon Sep 17 00:00:00 2001 From: Alinson S Xavier Date: Tue, 14 Apr 2020 09:21:39 -0500 Subject: [PATCH] Move classifiers into their own module --- src/python/miplearn/__init__.py | 5 +- .../classifiers/AdaptiveClassifier.py | 110 ++++++++++++++++++ src/python/miplearn/classifiers/__init__.py | 25 ++++ src/python/miplearn/components/__init__.py | 1 - src/python/miplearn/components/primal.py | 96 +-------------- 5 files changed, 140 insertions(+), 97 deletions(-) create mode 100644 src/python/miplearn/classifiers/AdaptiveClassifier.py create mode 100644 src/python/miplearn/classifiers/__init__.py diff --git a/src/python/miplearn/__init__.py b/src/python/miplearn/__init__.py index 9a80274..5c4f10d 100644 --- a/src/python/miplearn/__init__.py +++ b/src/python/miplearn/__init__.py @@ -10,10 +10,11 @@ from .extractors import (SolutionExtractor, from .components.component import Component from .components.objective import ObjectiveValueComponent from .components.lazy import LazyConstraintsComponent -from .components.primal import (PrimalSolutionComponent, - AdaptivePredictor) +from .components.primal import PrimalSolutionComponent from .components.branching import BranchPriorityComponent +from .classifiers import AdaptiveClassifier + from .benchmark import BenchmarkRunner from .instance import Instance diff --git a/src/python/miplearn/classifiers/AdaptiveClassifier.py b/src/python/miplearn/classifiers/AdaptiveClassifier.py new file mode 100644 index 0000000..beb41ac --- /dev/null +++ b/src/python/miplearn/classifiers/AdaptiveClassifier.py @@ -0,0 +1,110 @@ +# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization +# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved. +# Released under the modified BSD license. See COPYING.md for more details. + +import logging +from copy import deepcopy + +import numpy as np + +from miplearn.classifiers import Classifier +from sklearn.model_selection import cross_val_score + +logger = logging.getLogger(__name__) + + +class AdaptiveClassifier(Classifier): + """ + A classifier that automatically switches strategies based on the number of + samples and cross-validation scores. + """ + def __init__(self, + predictor=None, + min_samples_predict=1, + min_samples_cv=100, + thr_fix=0.999, + thr_alpha=0.50, + thr_balance=0.95, + ): + self.min_samples_predict = min_samples_predict + self.min_samples_cv = min_samples_cv + self.thr_fix = thr_fix + self.thr_alpha = thr_alpha + self.thr_balance = thr_balance + self.predictor_factory = predictor + self.predictor = None + + def fit(self, x_train, y_train): + n_samples = x_train.shape[0] + + # If number of samples is too small, don't predict anything. + if n_samples < self.min_samples_predict: + logger.debug(" Too few samples (%d); always predicting false" % n_samples) + self.predictor = 0 + return + + # If vast majority of observations are false, always return false. + y_train_avg = np.average(y_train) + if y_train_avg <= 1.0 - self.thr_fix: + logger.debug(" Most samples are negative (%.3f); always returning false" % y_train_avg) + self.predictor = 0 + return + + # If vast majority of observations are true, always return true. + if y_train_avg >= self.thr_fix: + logger.debug(" Most samples are positive (%.3f); always returning true" % y_train_avg) + self.predictor = 1 + return + + # If classes are too unbalanced, don't predict anything. + if y_train_avg < (1 - self.thr_balance) or y_train_avg > self.thr_balance: + logger.debug(" Classes are too unbalanced (%.3f); always returning false" % y_train_avg) + self.predictor = 0 + return + + # Select ML model if none is provided + if self.predictor_factory is None: + if n_samples < 30: + from sklearn.neighbors import KNeighborsClassifier + self.predictor_factory = KNeighborsClassifier(n_neighbors=n_samples) + else: + from sklearn.pipeline import make_pipeline + from sklearn.preprocessing import StandardScaler + from sklearn.linear_model import LogisticRegression + self.predictor_factory = make_pipeline(StandardScaler(), LogisticRegression()) + + # Create predictor + if callable(self.predictor_factory): + pred = self.predictor_factory() + else: + pred = deepcopy(self.predictor_factory) + + # Skip cross-validation if number of samples is too small + if n_samples < self.min_samples_cv: + logger.debug(" Too few samples (%d); skipping cross validation" % n_samples) + self.predictor = pred + self.predictor.fit(x_train, y_train) + return + + # Calculate cross-validation score + cv_score = np.mean(cross_val_score(pred, x_train, y_train, cv=5)) + dummy_score = max(y_train_avg, 1 - y_train_avg) + cv_thr = 1. * self.thr_alpha + dummy_score * (1 - self.thr_alpha) + + # If cross-validation score is too low, don't predict anything. + if cv_score < cv_thr: + logger.debug(" Score is too low (%.3f < %.3f); always returning false" % (cv_score, cv_thr)) + self.predictor = 0 + else: + logger.debug(" Score is acceptable (%.3f > %.3f); training classifier" % (cv_score, cv_thr)) + self.predictor = pred + self.predictor.fit(x_train, y_train) + + def predict_proba(self, x_test): + if isinstance(self.predictor, int): + y_pred = np.zeros((x_test.shape[0], 2)) + y_pred[:, self.predictor] = 1.0 + return y_pred + else: + return self.predictor.predict_proba(x_test) + diff --git a/src/python/miplearn/classifiers/__init__.py b/src/python/miplearn/classifiers/__init__.py new file mode 100644 index 0000000..5f1d883 --- /dev/null +++ b/src/python/miplearn/classifiers/__init__.py @@ -0,0 +1,25 @@ +# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization +# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved. +# Released under the modified BSD license. See COPYING.md for more details. + +from abc import ABC, abstractmethod + + +class Classifier(ABC): + @abstractmethod + def fit(self, x_train, y_train): + pass + + @abstractmethod + def predict_proba(self, x_test): + pass + + +class Regressor(ABC): + @abstractmethod + def fit(self, x_train, y_train): + pass + + @abstractmethod + def predict(self): + pass diff --git a/src/python/miplearn/components/__init__.py b/src/python/miplearn/components/__init__.py index 2e19678..13c148b 100644 --- a/src/python/miplearn/components/__init__.py +++ b/src/python/miplearn/components/__init__.py @@ -1,4 +1,3 @@ # MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization # Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved. # Released under the modified BSD license. See COPYING.md for more details. - diff --git a/src/python/miplearn/components/primal.py b/src/python/miplearn/components/primal.py index a9401a2..28e2625 100644 --- a/src/python/miplearn/components/primal.py +++ b/src/python/miplearn/components/primal.py @@ -4,12 +4,8 @@ from copy import deepcopy -from sklearn.linear_model import LogisticRegression +from miplearn.classifiers.AdaptiveClassifier import AdaptiveClassifier from sklearn.metrics import roc_curve -from sklearn.model_selection import cross_val_score -from sklearn.neighbors import KNeighborsClassifier -from sklearn.pipeline import make_pipeline -from sklearn.preprocessing import StandardScaler from .component import Component from ..extractors import * @@ -17,100 +13,12 @@ from ..extractors import * logger = logging.getLogger(__name__) -class AdaptivePredictor: - def __init__(self, - predictor=None, - min_samples_predict=1, - min_samples_cv=100, - thr_fix=0.999, - thr_alpha=0.50, - thr_balance=0.95, - ): - self.min_samples_predict = min_samples_predict - self.min_samples_cv = min_samples_cv - self.thr_fix = thr_fix - self.thr_alpha = thr_alpha - self.thr_balance = thr_balance - self.predictor_factory = predictor - - def fit(self, x_train, y_train): - n_samples = x_train.shape[0] - - # If number of samples is too small, don't predict anything. - if n_samples < self.min_samples_predict: - logger.debug(" Too few samples (%d); always predicting false" % n_samples) - self.predictor = 0 - return - - # If vast majority of observations are false, always return false. - y_train_avg = np.average(y_train) - if y_train_avg <= 1.0 - self.thr_fix: - logger.debug(" Most samples are negative (%.3f); always returning false" % y_train_avg) - self.predictor = 0 - return - - # If vast majority of observations are true, always return true. - if y_train_avg >= self.thr_fix: - logger.debug(" Most samples are positive (%.3f); always returning true" % y_train_avg) - self.predictor = 1 - return - - # If classes are too unbalanced, don't predict anything. - if y_train_avg < (1 - self.thr_balance) or y_train_avg > self.thr_balance: - logger.debug(" Classes are too unbalanced (%.3f); always returning false" % y_train_avg) - self.predictor = 0 - return - - # Select ML model if none is provided - if self.predictor_factory is None: - if n_samples < 30: - self.predictor_factory = KNeighborsClassifier(n_neighbors=n_samples) - else: - self.predictor_factory = make_pipeline(StandardScaler(), LogisticRegression()) - - # Create predictor - if callable(self.predictor_factory): - pred = self.predictor_factory() - else: - pred = deepcopy(self.predictor_factory) - - # Skip cross-validation if number of samples is too small - if n_samples < self.min_samples_cv: - logger.debug(" Too few samples (%d); skipping cross validation" % n_samples) - self.predictor = pred - self.predictor.fit(x_train, y_train) - return - - # Calculate cross-validation score - cv_score = np.mean(cross_val_score(pred, x_train, y_train, cv=5)) - dummy_score = max(y_train_avg, 1 - y_train_avg) - cv_thr = 1. * self.thr_alpha + dummy_score * (1 - self.thr_alpha) - - # If cross-validation score is too low, don't predict anything. - if cv_score < cv_thr: - logger.debug(" Score is too low (%.3f < %.3f); always returning false" % (cv_score, cv_thr)) - self.predictor = 0 - else: - logger.debug(" Score is acceptable (%.3f > %.3f); training classifier" % (cv_score, cv_thr)) - self.predictor = pred - self.predictor.fit(x_train, y_train) - - def predict_proba(self, x_test): - if isinstance(self.predictor, int): - y_pred = np.zeros((x_test.shape[0], 2)) - y_pred[:, self.predictor] = 1.0 - return y_pred - else: - return self.predictor.predict_proba(x_test) - - class PrimalSolutionComponent(Component): """ A component that predicts primal solutions. """ - def __init__(self, - predictor=AdaptivePredictor(), + predictor=AdaptiveClassifier(), mode="exact", max_fpr=[1e-3, 1e-3], min_threshold=[0.75, 0.75],