Implement PrimalSolutionComponent.evaluate

6 years ago · 8039ac7845
parent 659ef4cf9e
commit 8039ac7845
4 changed files with 172 additions and 71 deletions
--- a/src/python/miplearn/components/init.py
+++ b/src/python/miplearn/components/init.py
@ -1,3 +1,32 @@
 #  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
 #  Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
 #  Released under the modified BSD license. See COPYING.md for more details.
 def classifier_evaluation_dict(tp, tn, fp, fn):
    p = tp + fn
    n = fp + tn
    d = {
        "Predicted positive": fp + tp,
        "Predicted negative": fn + tn,
        "Condition positive": p,
        "Condition negative": n,
        "True positive": tp,
        "True negative": tn,
        "False positive": fp,
        "False negative": fn,
        "Accuracy": (tp + tn) / (p + n),
        "F1 score": (2 * tp) / (2 * tp + fp + fn),
        "Recall": tp / p,
        "Precision": tp / (tp + fp),
    }
    t = (p + n) / 100.0
    d["Predicted positive (%)"] = d["Predicted positive"] / t
    d["Predicted negative (%)"] = d["Predicted negative"] / t
    d["Condition positive (%)"] = d["Condition positive"] / t
    d["Condition negative (%)"] = d["Condition negative"] / t
    d["True positive (%)"] = d["True positive"] / t
    d["True negative (%)"] = d["True negative"] / t
    d["False positive (%)"] = d["False positive"] / t
    d["False negative (%)"] = d["False negative"] / t
    return d
--- a/src/python/miplearn/components/lazy.py
+++ b/src/python/miplearn/components/lazy.py
@ -5,6 +5,7 @@
 from copy import deepcopy
 from miplearn.classifiers.counting import CountingClassifier
 from miplearn.components import classifier_evaluation_dict
 from .component import Component
 from ..extractors import *
@ -67,54 +68,19 @@ class LazyConstraintsComponent(Component):
        return violations
    def evaluate(self, instances):
        def _classifier_evaluation_dict(tp, tn, fp, fn):
            p = tp + fn
            n = fp + tn
            d = {
                "Predicted positive": fp + tp,
                "Predicted negative": fn + tn,
                "Condition positive": p,
                "Condition negative": n,
                "True positive": tp,
                "True negative": tn,
                "False positive": fp,
                "False negative": fn,
            }
            d["Accuracy"] = (tp + tn) / (p + n)
            d["F1 score"] = (2 * tp) / (2 * tp + fp + fn)
            d["Recall"] = tp / p
            d["Precision"] = tp / (tp + fp)
            T = (p + n) / 100.0
            d["Predicted positive (%)"] = d["Predicted positive"] / T
            d["Predicted negative (%)"] = d["Predicted negative"] / T
            d["Condition positive (%)"] = d["Condition positive"] / T
            d["Condition negative (%)"] = d["Condition negative"] / T
            d["True positive (%)"] = d["True positive"] / T
            d["True negative (%)"] = d["True negative"] / T
            d["False positive (%)"] = d["False positive"] / T
            d["False negative (%)"] = d["False negative"] / T
            return d
        results = {}
        all_violations = set()
        for instance in instances:
            all_violations |= set(instance.found_violations)
        for idx in tqdm(range(len(instances)), desc="Evaluate (lazy)"):
            instance = instances[idx]
            condition_positive = set(instance.found_violations)
            condition_negative = all_violations - condition_positive
            pred_positive = set(self.predict(instance)) & all_violations
            pred_negative = all_violations - pred_positive
            tp = len(pred_positive & condition_positive)
            tn = len(pred_negative & condition_negative)
            fp = len(pred_positive & condition_negative)
            fn = len(pred_negative & condition_positive)
-            
+            results[idx] = classifier_evaluation_dict(tp, tn, fp, fn)
            results[idx] = _classifier_evaluation_dict(tp, tn, fp, fn)
        return results
--- a/src/python/miplearn/components/primal.py
+++ b/src/python/miplearn/components/primal.py
@ -5,6 +5,7 @@
 from copy import deepcopy
 from miplearn.classifiers.adaptive import AdaptiveClassifier
 from miplearn.components import classifier_evaluation_dict
 from sklearn.metrics import roc_curve
 from .component import Component
@ -18,19 +19,19 @@ class PrimalSolutionComponent(Component):
    A component that predicts primal solutions.
    """
    def __init__(self,
-                 predictor=AdaptiveClassifier(),
+                 classifier=AdaptiveClassifier(),
                 mode="exact",
                 max_fpr=[1e-3, 1e-3],
                 min_threshold=[0.75, 0.75],
                 dynamic_thresholds=True,
                 ):
        self.mode = mode
        self.predictors = {}
        self.is_warm_start_available = False
        self.max_fpr = max_fpr
        self.min_threshold = min_threshold
        self.thresholds = {}
-        self.predictor_factory = predictor
+        self.classifiers = {}
        self.classifier_prototype = classifier
        self.dynamic_thresholds = dynamic_thresholds
    def before_solve(self, solver, instance, model):
@ -52,15 +53,15 @@ class PrimalSolutionComponent(Component):
            x_train = features[category]
            y_train = solutions[category]
            for label in [0, 1]:
-                logger.debug("Fitting predictors[%s, %s]:" % (category, label))
+                y = y_train[:, label].astype(int)
-                if callable(self.predictor_factory):
+                logger.debug("Fitting predictors[%s, %s]:" % (category, label))
-                    pred = self.predictor_factory(category, label)
+                if isinstance(self.classifier_prototype, list):
                    pred = deepcopy(self.classifier_prototype[label])
                else:
-                    pred = deepcopy(self.predictor_factory)
+                    pred = deepcopy(self.classifier_prototype)
                self.predictors[category, label] = pred
                y = y_train[:, label].astype(int)
                pred.fit(x_train, y)
                self.classifiers[category, label] = pred
                # If y is either always one or always zero, set fixed threshold
                y_avg = np.average(y)
@ -69,8 +70,15 @@ class PrimalSolutionComponent(Component):
                    logger.debug("    Setting threshold to %.4f" % self.min_threshold[label])
                    continue
                proba = pred.predict_proba(x_train)
                assert isinstance(proba, np.ndarray), \
                    "classifier should return numpy array"
                assert proba.shape == (x_train.shape[0], 2),\
                    "classifier should return (%d,%d)-shaped array, not %s" % (
                        x_train.shape[0], 2, str(proba.shape))
                # Calculate threshold dynamically using ROC curve
-                y_scores = pred.predict_proba(x_train)[:, 1]
+                y_scores = proba[:, 1]
                fpr, tpr, thresholds = roc_curve(y, y_scores)
                k = 0
                while True:
@ -95,11 +103,50 @@ class PrimalSolutionComponent(Component):
                    solution[var] = {}
                solution[var][index] = None
                for label in [0, 1]:
-                    if (category, label) not in self.predictors.keys():
+                    if (category, label) not in self.classifiers.keys():
                        continue
-                    ws = self.predictors[category, label].predict_proba(x_test[category])
+                    ws = self.classifiers[category, label].predict_proba(x_test[category])
                    logger.debug("%s[%s] ws=%.6f threshold=%.6f" %
                                 (var, index, ws[i, 1], self.thresholds[category, label]))
                    if ws[i, 1] >= self.thresholds[category, label]:
                        solution[var][index] = label
        return solution
    def evaluate(self, instances):
        ev = {}
        for (instance_idx, instance) in enumerate(instances):
            solution_actual = instance.solution
            solution_pred = self.predict(instance)
            vars_all, vars_one, vars_zero = set(), set(), set()
            pred_one_positive, pred_zero_positive = set(), set()
            for (varname, var_dict) in solution_actual.items():
                for (idx, value) in var_dict.items():
                    vars_all.add((varname, idx))
                    if value > 0.5:
                        vars_one.add((varname, idx))
                    else:
                        vars_zero.add((varname, idx))
                    if solution_pred[varname][idx] is not None:
                        if solution_pred[varname][idx] > 0.5:
                            pred_one_positive.add((varname, idx))
                        else:
                            pred_zero_positive.add((varname, idx))
            pred_one_negative = vars_all - pred_one_positive
            pred_zero_negative = vars_all - pred_zero_positive
            tp_zero = len(pred_zero_positive & vars_zero)
            fp_zero = len(pred_zero_positive & vars_one)
            tn_zero = len(pred_zero_negative & vars_one)
            fn_zero = len(pred_zero_negative & vars_zero)
            tp_one = len(pred_one_positive & vars_one)
            fp_one = len(pred_one_positive & vars_zero)
            tn_one = len(pred_one_negative & vars_zero)
            fn_one = len(pred_one_negative & vars_one)
            ev[instance_idx] = {
                "Fix zero": classifier_evaluation_dict(tp_zero, tn_zero, fp_zero, fn_zero),
                "Fix one": classifier_evaluation_dict(tp_one, tn_one, fp_one, fn_one),
            }
        return ev
--- a/src/python/miplearn/components/tests/test_primal.py
+++ b/src/python/miplearn/components/tests/test_primal.py
@ -2,32 +2,91 @@
 #  Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
 #  Released under the modified BSD license. See COPYING.md for more details.
-from miplearn import LearningSolver, PrimalSolutionComponent
+from unittest.mock import Mock
 from miplearn.problems.knapsack import KnapsackInstance
 import numpy as np
 import tempfile
-def _get_instances():
+import numpy as np
-    instances = [
+from miplearn import PrimalSolutionComponent
-        KnapsackInstance(
+from miplearn.classifiers import Classifier
-            weights=[23., 26., 20., 18.],
+from miplearn.tests import get_training_instances_and_models
            prices=[505., 352., 458., 220.],
            capacity=67.,
        ),
    ] * 5
    models = [inst.to_model() for inst in instances]
    solver = LearningSolver()
    for i in range(len(instances)):
        solver.solve(instances[i], models[i])
    return instances, models
 def test_predict():
-    instances, models = _get_instances()
+    instances, models = get_training_instances_and_models()
    comp = PrimalSolutionComponent()
    comp.fit(instances)
    solution = comp.predict(instances[0])
    assert "x" in solution
-    for idx in range(4):
+    assert 0 in solution["x"]
-        assert idx in solution["x"]
+    assert 1 in solution["x"]
    assert 2 in solution["x"]
    assert 3 in solution["x"]
 def test_evaluate():
    instances, models = get_training_instances_and_models()
    clf_zero = Mock(spec=Classifier)
    clf_zero.predict_proba = Mock(return_value=np.array([
        [0., 1.],  # x[0]
        [0., 1.],  # x[1]
        [1., 0.],  # x[2]
        [1., 0.],  # x[3]
    ]))
    clf_one = Mock(spec=Classifier)
    clf_one.predict_proba = Mock(return_value=np.array([
        [1., 0.],  # x[0] instances[0]
        [1., 0.],  # x[1] instances[0]
        [0., 1.],  # x[2] instances[0]
        [1., 0.],  # x[3] instances[0]
    ]))
    comp = PrimalSolutionComponent(classifier=[clf_zero, clf_one],
                                   dynamic_thresholds=False)
    comp.fit(instances[:1])
    assert comp.predict(instances[0]) == {"x": {0: 0,
                                                1: 0,
                                                2: 1,
                                                3: None}}
    assert instances[0].solution == {"x": {0: 1,
                                           1: 0,
                                           2: 1,
                                           3: 1}}
    ev = comp.evaluate(instances[:1])
    assert ev == {0: {'Fix one': {'Accuracy': 0.5,
                                  'Condition negative': 1,
                                  'Condition negative (%)': 25.0,
                                  'Condition positive': 3,
                                  'Condition positive (%)': 75.0,
                                  'F1 score': 0.5,
                                  'False negative': 2,
                                  'False negative (%)': 50.0,
                                  'False positive': 0,
                                  'False positive (%)': 0.0,
                                  'Precision': 1.0,
                                  'Predicted negative': 3,
                                  'Predicted negative (%)': 75.0,
                                  'Predicted positive': 1,
                                  'Predicted positive (%)': 25.0,
                                  'Recall': 0.3333333333333333,
                                  'True negative': 1,
                                  'True negative (%)': 25.0,
                                  'True positive': 1,
                                  'True positive (%)': 25.0},
                      'Fix zero': {'Accuracy': 0.75,
                                   'Condition negative': 3,
                                   'Condition negative (%)': 75.0,
                                   'Condition positive': 1,
                                   'Condition positive (%)': 25.0,
                                   'F1 score': 0.6666666666666666,
                                   'False negative': 0,
                                   'False negative (%)': 0.0,
                                   'False positive': 1,
                                   'False positive (%)': 25.0,
                                   'Precision': 0.5,
                                   'Predicted negative': 2,
                                   'Predicted negative (%)': 50.0,
                                   'Predicted positive': 2,
                                   'Predicted positive (%)': 50.0,
                                   'Recall': 1.0,
                                   'True negative': 2,
                                   'True negative (%)': 50.0,
                                   'True positive': 1,
                                   'True positive (%)': 25.0}}}