Make get_variable_{categories,features} return np.ndarray

master
Alinson S. Xavier 4 years ago
parent 56b39b6c9c
commit 895cb962b6
No known key found for this signature in database
GPG Key ID: DCA0DAD4D2F58624

@ -9,7 +9,7 @@ from p_tqdm import p_umap
from miplearn.features.sample import Sample from miplearn.features.sample import Sample
from miplearn.instance.base import Instance from miplearn.instance.base import Instance
from miplearn.types import LearningSolveStats from miplearn.types import LearningSolveStats, Category
if TYPE_CHECKING: if TYPE_CHECKING:
from miplearn.solvers.learning import LearningSolver from miplearn.solvers.learning import LearningSolver
@ -101,8 +101,8 @@ class Component:
def fit_xy( def fit_xy(
self, self,
x: Dict[str, np.ndarray], x: Dict[Category, np.ndarray],
y: Dict[str, np.ndarray], y: Dict[Category, np.ndarray],
) -> None: ) -> None:
""" """
Given two dictionaries x and y, mapping the name of the category to matrices Given two dictionaries x and y, mapping the name of the category to matrices

@ -47,8 +47,8 @@ class PrimalSolutionComponent(Component):
assert isinstance(threshold, Threshold) assert isinstance(threshold, Threshold)
assert mode in ["exact", "heuristic"] assert mode in ["exact", "heuristic"]
self.mode = mode self.mode = mode
self.classifiers: Dict[str, Classifier] = {} self.classifiers: Dict[Category, Classifier] = {}
self.thresholds: Dict[str, Threshold] = {} self.thresholds: Dict[Category, Threshold] = {}
self.threshold_prototype = threshold self.threshold_prototype = threshold
self.classifier_prototype = classifier self.classifier_prototype = classifier
@ -96,7 +96,7 @@ class PrimalSolutionComponent(Component):
def sample_predict(self, sample: Sample) -> Solution: def sample_predict(self, sample: Sample) -> Solution:
var_names = sample.get_array("static_var_names") var_names = sample.get_array("static_var_names")
var_categories = sample.get_vector("static_var_categories") var_categories = sample.get_array("static_var_categories")
assert var_names is not None assert var_names is not None
assert var_categories is not None assert var_categories is not None
@ -120,7 +120,7 @@ class PrimalSolutionComponent(Component):
# Convert y_pred into solution # Convert y_pred into solution
solution: Solution = {v: None for v in var_names} solution: Solution = {v: None for v in var_names}
category_offset: Dict[str, int] = {cat: 0 for cat in x.keys()} category_offset: Dict[Category, int] = {cat: 0 for cat in x.keys()}
for (i, var_name) in enumerate(var_names): for (i, var_name) in enumerate(var_names):
category = var_categories[i] category = var_categories[i]
if category not in category_offset: if category not in category_offset:
@ -146,7 +146,7 @@ class PrimalSolutionComponent(Component):
mip_var_values = sample.get_array("mip_var_values") mip_var_values = sample.get_array("mip_var_values")
var_features = sample.get_vector_list("lp_var_features") var_features = sample.get_vector_list("lp_var_features")
var_names = sample.get_array("static_var_names") var_names = sample.get_array("static_var_names")
var_categories = sample.get_vector("static_var_categories") var_categories = sample.get_array("static_var_categories")
if var_features is None: if var_features is None:
var_features = sample.get_vector_list("static_var_features") var_features = sample.get_vector_list("static_var_features")
assert instance_features is not None assert instance_features is not None
@ -157,7 +157,7 @@ class PrimalSolutionComponent(Component):
for (i, var_name) in enumerate(var_names): for (i, var_name) in enumerate(var_names):
# Initialize categories # Initialize categories
category = var_categories[i] category = var_categories[i]
if category is None: if len(category) == 0:
continue continue
if category not in x.keys(): if category not in x.keys():
x[category] = [] x[category] = []
@ -176,7 +176,7 @@ class PrimalSolutionComponent(Component):
f"Variable {var_name} has non-binary value {opt_value} in the " f"Variable {var_name} has non-binary value {opt_value} in the "
"optimal solution. Predicting values of non-binary " "optimal solution. Predicting values of non-binary "
"variables is not currently supported. Please set its " "variables is not currently supported. Please set its "
"category to None." "category to ''."
) )
y[category].append([opt_value < 0.5, opt_value >= 0.5]) y[category].append([opt_value < 0.5, opt_value >= 0.5])
return x, y return x, y
@ -230,8 +230,8 @@ class PrimalSolutionComponent(Component):
@overrides @overrides
def fit_xy( def fit_xy(
self, self,
x: Dict[str, np.ndarray], x: Dict[Category, np.ndarray],
y: Dict[str, np.ndarray], y: Dict[Category, np.ndarray],
) -> None: ) -> None:
for category in x.keys(): for category in x.keys():
clf = self.classifier_prototype.clone() clf = self.classifier_prototype.clone()

@ -46,20 +46,25 @@ class FeaturesExtractor:
vars_features_user, var_categories = self._extract_user_features_vars( vars_features_user, var_categories = self._extract_user_features_vars(
instance, sample instance, sample
) )
sample.put_vector("static_var_categories", var_categories) sample.put_array("static_var_categories", var_categories)
self._extract_user_features_constrs(instance, sample) self._extract_user_features_constrs(instance, sample)
self._extract_user_features_instance(instance, sample) self._extract_user_features_instance(instance, sample)
alw17 = self._extract_var_features_AlvLouWeh2017(sample) alw17 = self._extract_var_features_AlvLouWeh2017(sample)
sample.put_vector_list(
# Build static_var_features
assert variables.lower_bounds is not None
assert variables.obj_coeffs is not None
assert variables.upper_bounds is not None
sample.put_array(
"static_var_features", "static_var_features",
self._combine( np.hstack(
[ [
alw17,
vars_features_user, vars_features_user,
sample.get_array("static_var_lower_bounds"), alw17,
sample.get_array("static_var_obj_coeffs"), variables.lower_bounds.reshape(-1, 1),
sample.get_array("static_var_upper_bounds"), variables.obj_coeffs.reshape(-1, 1),
], variables.upper_bounds.reshape(-1, 1),
]
), ),
) )
@ -88,23 +93,29 @@ class FeaturesExtractor:
sample.put_array("lp_constr_sa_rhs_up", constraints.sa_rhs_up) sample.put_array("lp_constr_sa_rhs_up", constraints.sa_rhs_up)
sample.put_array("lp_constr_slacks", constraints.slacks) sample.put_array("lp_constr_slacks", constraints.slacks)
alw17 = self._extract_var_features_AlvLouWeh2017(sample) alw17 = self._extract_var_features_AlvLouWeh2017(sample)
sample.put_vector_list(
"lp_var_features", # Build lp_var_features
self._combine( lp_var_features_list = []
[ for f in [
alw17, sample.get_array("static_var_features"),
sample.get_array("lp_var_reduced_costs"), alw17,
sample.get_array("lp_var_sa_lb_down"), ]:
sample.get_array("lp_var_sa_lb_up"), if f is not None:
sample.get_array("lp_var_sa_obj_down"), lp_var_features_list.append(f)
sample.get_array("lp_var_sa_obj_up"), for f in [
sample.get_array("lp_var_sa_ub_down"), variables.reduced_costs,
sample.get_array("lp_var_sa_ub_up"), variables.sa_lb_down,
sample.get_array("lp_var_values"), variables.sa_lb_up,
sample.get_vector_list("static_var_features"), variables.sa_obj_down,
], variables.sa_obj_up,
), variables.sa_ub_down,
) variables.sa_ub_up,
variables.values,
]:
if f is not None:
lp_var_features_list.append(f.reshape(-1, 1))
sample.put_array("lp_var_features", np.hstack(lp_var_features_list))
sample.put_vector_list( sample.put_vector_list(
"lp_constr_features", "lp_constr_features",
self._combine( self._combine(
@ -148,60 +159,49 @@ class FeaturesExtractor:
self, self,
instance: "Instance", instance: "Instance",
sample: Sample, sample: Sample,
) -> Tuple[List, List]: ) -> Tuple[np.ndarray, np.ndarray]:
# Query variable names # Query variable names
var_names = sample.get_array("static_var_names") var_names = sample.get_array("static_var_names")
assert var_names is not None assert var_names is not None
# Query variable features and categories # Query variable features
var_features_dict = { var_features = instance.get_variable_features(var_names)
v.encode(): f for (v, f) in instance.get_variable_features().items() assert isinstance(var_features, np.ndarray), (
} f"Variable features must be a numpy array. "
var_categories_dict = { f"Found {var_features.__class__} instead."
v.encode(): f for (v, f) in instance.get_variable_categories().items() )
} assert len(var_features.shape) == 2, (
f"Variable features must be 2-dimensional array. "
# Assert that variables in user-provided dicts actually exist f"Found array with shape {var_features.shape} instead."
var_names_set = set(var_names) )
for keys in [var_features_dict.keys(), var_categories_dict.keys()]: assert var_features.shape[0] == len(var_names), (
for vn in cast(KeysView, keys): f"Variable features must have exactly {len(var_names)} rows. "
assert ( f"Found {var_features.shape[0]} rows instead."
vn in var_names_set )
), f"Variable {vn!r} not found in the problem; {var_names_set}" assert var_features.dtype.kind in ["f"], (
f"Variable features must be floating point numbers. "
# Assemble into compact lists f"Found dtype: {var_features.dtype} instead."
user_features: List[Optional[List[float]]] = [] )
categories: List[Optional[str]] = []
for (i, var_name) in enumerate(var_names): # Query variable categories
if var_name not in var_categories_dict: var_categories = instance.get_variable_categories(var_names)
user_features.append(None) assert isinstance(var_categories, np.ndarray), (
categories.append(None) f"Variable categories must be a numpy array. "
continue f"Found {var_categories.__class__} instead."
category: str = var_categories_dict[var_name] )
assert isinstance(category, str), ( assert len(var_categories.shape) == 1, (
f"Variable category must be a string. " f"Variable categories must be a vector. "
f"Found {type(category).__name__} instead for var={var_name}." f"Found array with shape {var_categories.shape} instead."
) )
categories.append(category) assert len(var_categories) == len(var_names), (
user_features_i: Optional[List[float]] = None f"Variable categories must have exactly {len(var_names)} elements. "
if var_name in var_features_dict: f"Found {var_features.shape[0]} elements instead."
user_features_i = var_features_dict[var_name] )
if isinstance(user_features_i, np.ndarray): assert var_categories.dtype.kind == "S", (
user_features_i = user_features_i.tolist() f"Variable categories must be a numpy array with dtype='S'. "
assert isinstance(user_features_i, list), ( f"Found {var_categories.dtype} instead."
f"Variable features must be a list. " )
f"Found {type(user_features_i).__name__} instead for " return var_features, var_categories
f"var={var_name}."
)
for v in user_features_i:
assert isinstance(v, numbers.Real), (
f"Variable features must be a list of numbers. "
f"Found {type(v).__name__} instead "
f"for var={var_name}."
)
user_features_i = list(user_features_i)
user_features.append(user_features_i)
return user_features, categories
def _extract_user_features_constrs( def _extract_user_features_constrs(
self, self,
@ -277,7 +277,7 @@ class FeaturesExtractor:
# Alvarez, A. M., Louveaux, Q., & Wehenkel, L. (2017). A machine learning-based # Alvarez, A. M., Louveaux, Q., & Wehenkel, L. (2017). A machine learning-based
# approximation of strong branching. INFORMS Journal on Computing, 29(1), 185-195. # approximation of strong branching. INFORMS Journal on Computing, 29(1), 185-195.
def _extract_var_features_AlvLouWeh2017(self, sample: Sample) -> List: def _extract_var_features_AlvLouWeh2017(self, sample: Sample) -> np.ndarray:
obj_coeffs = sample.get_array("static_var_obj_coeffs") obj_coeffs = sample.get_array("static_var_obj_coeffs")
obj_sa_down = sample.get_array("lp_var_sa_obj_down") obj_sa_down = sample.get_array("lp_var_sa_obj_down")
obj_sa_up = sample.get_array("lp_var_sa_obj_up") obj_sa_up = sample.get_array("lp_var_sa_obj_up")
@ -351,7 +351,7 @@ class FeaturesExtractor:
f[i] = 0.0 f[i] = 0.0
features.append(f) features.append(f)
return features return np.array(features, dtype=float)
def _combine( def _combine(
self, self,

@ -63,7 +63,7 @@ class Instance(ABC):
""" """
return np.zeros(1) return np.zeros(1)
def get_variable_features(self) -> Dict[str, List[float]]: def get_variable_features(self, names: np.ndarray) -> np.ndarray:
""" """
Returns dictionary mapping the name of each variable to a (1-dimensional) list Returns dictionary mapping the name of each variable to a (1-dimensional) list
of numerical features describing a particular decision variable. of numerical features describing a particular decision variable.
@ -81,11 +81,11 @@ class Instance(ABC):
If features are not provided for a given variable, MIPLearn will use a If features are not provided for a given variable, MIPLearn will use a
default set of features. default set of features.
By default, returns {}. By default, returns [[0.0], ..., [0.0]].
""" """
return {} return np.zeros((len(names), 1))
def get_variable_categories(self) -> Dict[str, str]: def get_variable_categories(self, names: np.ndarray) -> np.ndarray:
""" """
Returns a dictionary mapping the name of each variable to its category. Returns a dictionary mapping the name of each variable to its category.
@ -93,9 +93,9 @@ class Instance(ABC):
internal ML model to predict the values of both variables. If a variable is not internal ML model to predict the values of both variables. If a variable is not
listed in the dictionary, ML models will ignore the variable. listed in the dictionary, ML models will ignore the variable.
By default, returns {}. By default, returns `names`.
""" """
return {} return names
def get_constraint_features(self) -> Dict[str, List[float]]: def get_constraint_features(self) -> Dict[str, List[float]]:
return {} return {}

@ -36,14 +36,14 @@ class FileInstance(Instance):
return self.instance.get_instance_features() return self.instance.get_instance_features()
@overrides @overrides
def get_variable_features(self) -> Dict[str, List[float]]: def get_variable_features(self, names: np.ndarray) -> np.ndarray:
assert self.instance is not None assert self.instance is not None
return self.instance.get_variable_features() return self.instance.get_variable_features(names)
@overrides @overrides
def get_variable_categories(self) -> Dict[str, str]: def get_variable_categories(self, names: np.ndarray) -> np.ndarray:
assert self.instance is not None assert self.instance is not None
return self.instance.get_variable_categories() return self.instance.get_variable_categories(names)
@overrides @overrides
def get_constraint_features(self) -> Dict[str, List[float]]: def get_constraint_features(self) -> Dict[str, List[float]]:

@ -48,14 +48,14 @@ class PickleGzInstance(Instance):
return self.instance.get_instance_features() return self.instance.get_instance_features()
@overrides @overrides
def get_variable_features(self) -> Dict[str, List[float]]: def get_variable_features(self, names: np.ndarray) -> np.ndarray:
assert self.instance is not None assert self.instance is not None
return self.instance.get_variable_features() return self.instance.get_variable_features(names)
@overrides @overrides
def get_variable_categories(self) -> Dict[str, str]: def get_variable_categories(self, names: np.ndarray) -> np.ndarray:
assert self.instance is not None assert self.instance is not None
return self.instance.get_variable_categories() return self.instance.get_variable_categories(names)
@overrides @overrides
def get_constraint_features(self) -> Dict[str, List[float]]: def get_constraint_features(self) -> Dict[str, List[float]]:

@ -98,11 +98,13 @@ class MultiKnapsackInstance(Instance):
return np.array([float(np.mean(self.prices))] + list(self.capacities)) return np.array([float(np.mean(self.prices))] + list(self.capacities))
@overrides @overrides
def get_variable_features(self) -> Dict[str, List[float]]: def get_variable_features(self, names: np.ndarray) -> np.ndarray:
return { features = []
f"x[{i}]": [self.prices[i] + list(self.weights[:, i])] for i in range(len(self.weights)):
for i in range(self.n) f = [self.prices[i]]
} f.extend(self.weights[:, i])
features.append(f)
return np.array(features)
# noinspection PyPep8Naming # noinspection PyPep8Naming

@ -66,9 +66,11 @@ class MaxWeightStableSetInstance(Instance):
return model return model
@overrides @overrides
def get_variable_features(self) -> Dict[str, List[float]]: def get_variable_features(self, names: np.ndarray) -> np.ndarray:
features = {} features = []
for v1 in self.nodes: assert len(names) == len(self.nodes)
for i, v1 in enumerate(self.nodes):
assert names[i] == f"x[{v1}]".encode()
neighbor_weights = [0.0] * 15 neighbor_weights = [0.0] * 15
neighbor_degrees = [100.0] * 15 neighbor_degrees = [100.0] * 15
for v2 in self.graph.neighbors(v1): for v2 in self.graph.neighbors(v1):
@ -80,12 +82,12 @@ class MaxWeightStableSetInstance(Instance):
f += neighbor_weights[:5] f += neighbor_weights[:5]
f += neighbor_degrees[:5] f += neighbor_degrees[:5]
f += [self.graph.degree(v1)] f += [self.graph.degree(v1)]
features[f"x[{v1}]"] = f features.append(f)
return features return np.array(features)
@overrides @overrides
def get_variable_categories(self) -> Dict[str, str]: def get_variable_categories(self, names: np.ndarray) -> np.ndarray:
return {f"x[{v}]": "default" for v in self.nodes} return np.array(["default" for _ in names], dtype="S")
class MaxWeightStableSetGenerator: class MaxWeightStableSetGenerator:

@ -80,10 +80,6 @@ class TravelingSalesmanInstance(Instance):
) )
return model return model
@overrides
def get_variable_categories(self) -> Dict[str, str]:
return {f"x[{e}]": f"x[{e}]" for e in self.edges}
@overrides @overrides
def find_violated_lazy_constraints( def find_violated_lazy_constraints(
self, self,

@ -605,6 +605,7 @@ class PyomoTestInstanceKnapsack(Instance):
self.weights = weights self.weights = weights
self.prices = prices self.prices = prices
self.capacity = capacity self.capacity = capacity
self.n = len(weights)
@overrides @overrides
def to_model(self) -> pe.ConcreteModel: def to_model(self) -> pe.ConcreteModel:
@ -631,15 +632,17 @@ class PyomoTestInstanceKnapsack(Instance):
) )
@overrides @overrides
def get_variable_features(self) -> Dict[str, List[float]]: def get_variable_features(self, names: np.ndarray) -> np.ndarray:
return { return np.vstack(
f"x[{i}]": [ [
self.weights[i], [[self.weights[i], self.prices[i]] for i in range(self.n)],
self.prices[i], [0.0, 0.0],
] ]
for i in range(len(self.weights)) )
}
@overrides @overrides
def get_variable_categories(self) -> Dict[str, str]: def get_variable_categories(self, names: np.ndarray) -> np.ndarray:
return {f"x[{i}]": "default" for i in range(len(self.weights))} return np.array(
["default" if n.decode().startswith("x") else "" for n in names],
dtype="S",
)

@ -10,7 +10,7 @@ if TYPE_CHECKING:
# noinspection PyUnresolvedReferences # noinspection PyUnresolvedReferences
from miplearn.solvers.learning import InternalSolver from miplearn.solvers.learning import InternalSolver
Category = str Category = bytes
IterationCallback = Callable[[], bool] IterationCallback = Callable[[], bool]
LazyCallback = Callable[[Any, Any], None] LazyCallback = Callable[[Any, Any], None]
SolverParams = Dict[str, Any] SolverParams = Dict[str, Any]

@ -23,21 +23,28 @@ def sample() -> Sample:
sample = MemorySample( sample = MemorySample(
{ {
"static_var_names": np.array(["x[0]", "x[1]", "x[2]", "x[3]"], dtype="S"), "static_var_names": np.array(["x[0]", "x[1]", "x[2]", "x[3]"], dtype="S"),
"static_var_categories": ["default", None, "default", "default"], "static_var_categories": np.array(
["default", "", "default", "default"],
dtype="S",
),
"mip_var_values": np.array([0.0, 1.0, 1.0, 0.0]), "mip_var_values": np.array([0.0, 1.0, 1.0, 0.0]),
"static_instance_features": [5.0], "static_instance_features": np.array([5.0]),
"static_var_features": [ "static_var_features": np.array(
[0.0, 0.0], [
None, [0.0, 0.0],
[1.0, 0.0], [0.0, 0.0],
[1.0, 1.0], [1.0, 0.0],
], [1.0, 1.0],
"lp_var_features": [ ]
[0.0, 0.0, 2.0, 2.0], ),
None, "lp_var_features": np.array(
[1.0, 0.0, 3.0, 2.0], [
[1.0, 1.0, 3.0, 3.0], [0.0, 0.0, 2.0, 2.0],
], [0.0, 0.0, 0.0, 0.0],
[1.0, 0.0, 3.0, 2.0],
[1.0, 1.0, 3.0, 3.0],
]
),
}, },
) )
return sample return sample
@ -45,14 +52,14 @@ def sample() -> Sample:
def test_xy(sample: Sample) -> None: def test_xy(sample: Sample) -> None:
x_expected = { x_expected = {
"default": [ b"default": [
[5.0, 0.0, 0.0, 2.0, 2.0], [5.0, 0.0, 0.0, 2.0, 2.0],
[5.0, 1.0, 0.0, 3.0, 2.0], [5.0, 1.0, 0.0, 3.0, 2.0],
[5.0, 1.0, 1.0, 3.0, 3.0], [5.0, 1.0, 1.0, 3.0, 3.0],
] ]
} }
y_expected = { y_expected = {
"default": [ b"default": [
[True, False], [True, False],
[False, True], [False, True],
[True, False], [True, False],
@ -72,15 +79,15 @@ def test_fit_xy() -> None:
thr.clone = lambda: Mock(spec=Threshold) thr.clone = lambda: Mock(spec=Threshold)
comp = PrimalSolutionComponent(classifier=clf, threshold=thr) comp = PrimalSolutionComponent(classifier=clf, threshold=thr)
x = { x = {
"type-a": np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]), b"type-a": np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]),
"type-b": np.array([[7.0, 8.0, 9.0]]), b"type-b": np.array([[7.0, 8.0, 9.0]]),
} }
y = { y = {
"type-a": np.array([[True, False], [False, True]]), b"type-a": np.array([[True, False], [False, True]]),
"type-b": np.array([[True, False]]), b"type-b": np.array([[True, False]]),
} }
comp.fit_xy(x, y) comp.fit_xy(x, y)
for category in ["type-a", "type-b"]: for category in [b"type-a", b"type-b"]:
assert category in comp.classifiers assert category in comp.classifiers
assert category in comp.thresholds assert category in comp.thresholds
clf = comp.classifiers[category] # type: ignore clf = comp.classifiers[category] # type: ignore
@ -142,13 +149,13 @@ def test_predict(sample: Sample) -> None:
thr.predict = Mock(return_value=[0.75, 0.75]) thr.predict = Mock(return_value=[0.75, 0.75])
comp = PrimalSolutionComponent() comp = PrimalSolutionComponent()
x, _ = comp.sample_xy(None, sample) x, _ = comp.sample_xy(None, sample)
comp.classifiers = {"default": clf} comp.classifiers = {b"default": clf}
comp.thresholds = {"default": thr} comp.thresholds = {b"default": thr}
pred = comp.sample_predict(sample) pred = comp.sample_predict(sample)
clf.predict_proba.assert_called_once() clf.predict_proba.assert_called_once()
thr.predict.assert_called_once() thr.predict.assert_called_once()
assert_array_equal(x["default"], clf.predict_proba.call_args[0][0]) assert_array_equal(x[b"default"], clf.predict_proba.call_args[0][0])
assert_array_equal(x["default"], thr.predict.call_args[0][0]) assert_array_equal(x[b"default"], thr.predict.call_args[0][0])
assert pred == { assert pred == {
b"x[0]": 0.0, b"x[0]": 0.0,
b"x[1]": None, b"x[1]": None,

@ -49,8 +49,8 @@ def test_knapsack() -> None:
sample.get_vector("static_var_upper_bounds"), [1.0, 1.0, 1.0, 1.0, 67.0] sample.get_vector("static_var_upper_bounds"), [1.0, 1.0, 1.0, 1.0, 67.0]
) )
assert_equals( assert_equals(
sample.get_vector("static_var_categories"), sample.get_array("static_var_categories"),
["default", "default", "default", "default", None], np.array(["default", "default", "default", "default", ""], dtype="S"),
) )
assert sample.get_vector_list("static_var_features") is not None assert sample.get_vector_list("static_var_features") is not None
assert_equals( assert_equals(

Loading…
Cancel
Save