# MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization # Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved. # Released under the modified BSD license. See COPYING.md for more details. from abc import ABC, abstractmethod from typing import Optional, Any, cast import numpy as np class Classifier(ABC): """ A Classifier decides which class each sample belongs to, based on historical data. """ def __init__(self) -> None: self.n_features: Optional[int] = None self.n_classes: Optional[int] = None @abstractmethod def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None: """ Trains the classifier. Parameters ---------- x_train: np.ndarray An array of features with shape (`n_samples`, `n_features`). Each entry must be a float. y_train: np.ndarray An array of labels with shape (`n_samples`, `n_classes`). Each entry must be a bool, and there must be exactly one True element in each row. """ assert isinstance(x_train, np.ndarray) assert isinstance(y_train, np.ndarray) assert x_train.dtype in [ np.float16, np.float32, np.float64, ], f"x_train.dtype shoule be float. Found {x_train.dtype} instead." assert y_train.dtype == np.bool8 assert len(x_train.shape) == 2 assert len(y_train.shape) == 2 (n_samples_x, n_features) = x_train.shape (n_samples_y, n_classes) = y_train.shape assert n_samples_y == n_samples_x self.n_features = n_features self.n_classes = n_classes @abstractmethod def predict_proba(self, x_test: np.ndarray) -> np.ndarray: """ Predicts the probability of each sample belonging to each class. Must be called after fit. Parameters ---------- x_test: np.ndarray An array of features with shape (`n_samples`, `n_features`). The number of features in `x_test` must match the number of features in `x_train` provided to `fit`. Returns ------- np.ndarray An array of predicted probabilities with shape (`n_samples`, `n_classes`), where `n_classes` is the number of columns in `y_train` provided to `fit`. """ assert self.n_features is not None assert isinstance(x_test, np.ndarray) assert len(x_test.shape) == 2 (n_samples, n_features_x) = x_test.shape assert n_features_x == self.n_features, ( f"Test and training data have different number of " f"features: {n_features_x} != {self.n_features}" ) return np.ndarray([]) class Regressor(ABC): """ A Regressor tries to predict the values of some continous variables, given the values of other variables. """ def __init__(self) -> None: self.n_inputs: Optional[int] = None @abstractmethod def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None: """ Trains the regressor. Parameters ---------- x_train: np.ndarray An array of inputs with shape (`n_samples`, `n_inputs`). Each entry must be a float. y_train: np.ndarray An array of outputs with shape (`n_samples`, `n_outputs`). Each entry must be a float. """ assert isinstance(x_train, np.ndarray) assert isinstance(y_train, np.ndarray) assert x_train.dtype in [np.float16, np.float32, np.float64] assert y_train.dtype in [np.float16, np.float32, np.float64] assert len(x_train.shape) == 2 assert len(y_train.shape) == 2 (n_samples_x, n_inputs) = x_train.shape (n_samples_y, n_outputs) = y_train.shape assert n_samples_y == n_samples_x self.n_inputs = n_inputs @abstractmethod def predict(self, x_test: np.ndarray) -> np.ndarray: """ Predicts the values of the output variables. Must be called after fit. Parameters ---------- x_test: np.ndarray An array of inputs with shape (`n_samples`, `n_inputs`), where `n_inputs` must match the number of columns in `x_train` provided to `fit`. Returns ------- np.ndarray An array of outputs with shape (`n_samples`, `n_outputs`), where `n_outputs` is the number of columns in `y_train` provided to `fit`. """ assert self.n_inputs is not None assert isinstance(x_test, np.ndarray) assert len(x_test.shape) == 2 (n_samples, n_inputs_x) = x_test.shape assert n_inputs_x == self.n_inputs return np.ndarray([]) class ScikitLearnClassifier(Classifier): """ Wrapper for ScikitLearn classifiers, which makes sure inputs and outputs have the correct dimensions and types. """ def __init__(self, clf: Any) -> None: super().__init__() self.inner_clf = clf self.constant: Optional[np.ndarray] = None def fit(self, x_train: np.ndarray, y_train: np.ndarray) -> None: super().fit(x_train, y_train) (n_samples, n_classes) = y_train.shape assert n_classes == 2, ( f"Scikit-learn classifiers must have exactly two classes. " f"{n_classes} classes were provided instead." ) # When all samples belong to the same class, sklearn's predict_proba returns # an array with a single column. The following check avoid this strange # behavior. mean = cast(np.ndarray, y_train.astype(float).mean(axis=0)) if mean.max() == 1.0: self.constant = mean return self.inner_clf.fit(x_train, y_train[:, 1]) def predict_proba(self, x_test: np.ndarray) -> np.ndarray: super().predict_proba(x_test) n_samples = x_test.shape[0] if self.constant is not None: return np.array([self.constant for n in range(n_samples)]) sklearn_proba = self.inner_clf.predict_proba(x_test) if isinstance(sklearn_proba, list): assert len(sklearn_proba) == self.n_classes for pb in sklearn_proba: assert isinstance(pb, np.ndarray) assert pb.dtype in [np.float16, np.float32, np.float64] assert pb.shape == (n_samples, 2) proba = np.hstack([pb[:, [1]] for pb in sklearn_proba]) assert proba.shape == (n_samples, self.n_classes) return proba else: assert isinstance(sklearn_proba, np.ndarray) assert sklearn_proba.shape == (n_samples, 2) return sklearn_proba