diff --git a/miplearn/components/objective.py b/miplearn/components/objective.py index cc4e30c..af1316f 100644 --- a/miplearn/components/objective.py +++ b/miplearn/components/objective.py @@ -3,7 +3,7 @@ # Released under the modified BSD license. See COPYING.md for more details. import logging -from typing import List, Dict, Any, TYPE_CHECKING, Tuple, Optional +from typing import List, Dict, Any, TYPE_CHECKING, Tuple, Optional, cast import numpy as np from overrides import overrides @@ -77,10 +77,11 @@ class ObjectiveValueComponent(Component): _: Optional[Instance], sample: Sample, ) -> Tuple[Dict[str, List[List[float]]], Dict[str, List[List[float]]]]: - lp_instance_features = sample.get_vector("lp_instance_features") - if lp_instance_features is None: - lp_instance_features = sample.get_vector("static_instance_features") - assert lp_instance_features is not None + lp_instance_features_np = sample.get_array("lp_instance_features") + if lp_instance_features_np is None: + lp_instance_features_np = sample.get_array("static_instance_features") + assert lp_instance_features_np is not None + lp_instance_features = cast(List[float], lp_instance_features_np.tolist()) # Features x: Dict[str, List[List[float]]] = { diff --git a/miplearn/components/primal.py b/miplearn/components/primal.py index 769cbef..00a0140 100644 --- a/miplearn/components/primal.py +++ b/miplearn/components/primal.py @@ -142,13 +142,13 @@ class PrimalSolutionComponent(Component): ) -> Tuple[Dict[Category, List[List[float]]], Dict[Category, List[List[float]]]]: x: Dict = {} y: Dict = {} - instance_features = sample.get_vector("static_instance_features") + instance_features = sample.get_array("static_instance_features") mip_var_values = sample.get_array("mip_var_values") - var_features = sample.get_vector_list("lp_var_features") + var_features = sample.get_array("lp_var_features") var_names = sample.get_array("static_var_names") var_categories = sample.get_array("static_var_categories") if var_features is None: - var_features = sample.get_vector_list("static_var_features") + var_features = sample.get_array("static_var_features") assert instance_features is not None assert var_features is not None assert var_names is not None diff --git a/miplearn/components/static_lazy.py b/miplearn/components/static_lazy.py index efc11d8..2dd300e 100644 --- a/miplearn/components/static_lazy.py +++ b/miplearn/components/static_lazy.py @@ -207,14 +207,14 @@ class StaticLazyConstraintsComponent(Component): x: Dict[ConstraintCategory, List[List[float]]] = {} y: Dict[ConstraintCategory, List[List[float]]] = {} cids: Dict[ConstraintCategory, List[ConstraintName]] = {} - instance_features = sample.get_vector("static_instance_features") - constr_features = sample.get_vector_list("lp_constr_features") + instance_features = sample.get_array("static_instance_features") + constr_features = sample.get_array("lp_constr_features") constr_names = sample.get_array("static_constr_names") - constr_categories = sample.get_vector("static_constr_categories") + constr_categories = sample.get_array("static_constr_categories") constr_lazy = sample.get_array("static_constr_lazy") lazy_enforced = sample.get_set("mip_constr_lazy_enforced") if constr_features is None: - constr_features = sample.get_vector_list("static_constr_features") + constr_features = sample.get_array("static_constr_features") assert instance_features is not None assert constr_features is not None @@ -227,7 +227,7 @@ class StaticLazyConstraintsComponent(Component): if not constr_lazy[cidx]: continue category = constr_categories[cidx] - if category is None: + if len(category) == 0: continue if category not in x: x[category] = [] diff --git a/miplearn/features/sample.py b/miplearn/features/sample.py index d3b8d65..6afde9e 100644 --- a/miplearn/features/sample.py +++ b/miplearn/features/sample.py @@ -38,15 +38,6 @@ VectorList = Union[ class Sample(ABC): """Abstract dictionary-like class that stores training data.""" - @abstractmethod - def get_bytes(self, key: str) -> Optional[Bytes]: - warnings.warn("Deprecated", DeprecationWarning) - return None - - @abstractmethod - def put_bytes(self, key: str, value: Bytes) -> None: - warnings.warn("Deprecated", DeprecationWarning) - @abstractmethod def get_scalar(self, key: str) -> Optional[Any]: pass @@ -64,15 +55,6 @@ class Sample(ABC): def put_vector(self, key: str, value: Vector) -> None: warnings.warn("Deprecated", DeprecationWarning) - @abstractmethod - def get_vector_list(self, key: str) -> Optional[Any]: - warnings.warn("Deprecated", DeprecationWarning) - return None - - @abstractmethod - def put_vector_list(self, key: str, value: VectorList) -> None: - warnings.warn("Deprecated", DeprecationWarning) - @abstractmethod def put_array(self, key: str, value: Optional[np.ndarray]) -> None: pass @@ -90,6 +72,7 @@ class Sample(ABC): pass def get_set(self, key: str) -> Set: + warnings.warn("Deprecated", DeprecationWarning) v = self.get_vector(key) if v: return set(v) @@ -97,6 +80,7 @@ class Sample(ABC): return set() def put_set(self, key: str, value: Set) -> None: + warnings.warn("Deprecated", DeprecationWarning) v = list(value) self.put_vector(key, v) @@ -114,15 +98,6 @@ class Sample(ABC): for v in value: self._assert_is_scalar(v) - def _assert_is_vector_list(self, value: Any) -> None: - assert isinstance( - value, (list, np.ndarray) - ), f"list or numpy array expected; found instead: {value} ({value.__class__})" - for v in value: - if v is None: - continue - self._assert_is_vector(v) - def _assert_supported(self, value: np.ndarray) -> None: assert isinstance(value, np.ndarray) assert value.dtype.kind in "biufS", f"Unsupported dtype: {value.dtype}" @@ -145,10 +120,6 @@ class MemorySample(Sample): self._data: Dict[str, Any] = data self._check_data = check_data - @overrides - def get_bytes(self, key: str) -> Optional[Bytes]: - return self._get(key) - @overrides def get_scalar(self, key: str) -> Optional[Any]: return self._get(key) @@ -157,17 +128,6 @@ class MemorySample(Sample): def get_vector(self, key: str) -> Optional[Any]: return self._get(key) - @overrides - def get_vector_list(self, key: str) -> Optional[Any]: - return self._get(key) - - @overrides - def put_bytes(self, key: str, value: Bytes) -> None: - assert isinstance( - value, (bytes, bytearray) - ), f"bytes expected; found: {value}" # type: ignore - self._put(key, value) - @overrides def put_scalar(self, key: str, value: Scalar) -> None: if value is None: @@ -184,12 +144,6 @@ class MemorySample(Sample): self._assert_is_vector(value) self._put(key, value) - @overrides - def put_vector_list(self, key: str, value: VectorList) -> None: - if self._check_data: - self._assert_is_vector_list(value) - self._put(key, value) - def _get(self, key: str) -> Optional[Any]: if key in self._data: return self._data[key] @@ -239,16 +193,6 @@ class Hdf5Sample(Sample): self.file = h5py.File(filename, mode, libver="latest") self._check_data = check_data - @overrides - def get_bytes(self, key: str) -> Optional[Bytes]: - if key not in self.file: - return None - ds = self.file[key] - assert ( - len(ds.shape) == 1 - ), f"1-dimensional array expected; found shape {ds.shape}" - return ds[()].tobytes() - @overrides def get_scalar(self, key: str) -> Optional[Any]: if key not in self.file: @@ -277,26 +221,6 @@ class Hdf5Sample(Sample): else: return ds[:].tolist() - @overrides - def get_vector_list(self, key: str) -> Optional[Any]: - if key not in self.file: - return None - ds = self.file[key] - lens = self.get_vector(f"{key}_lengths") - if h5py.check_string_dtype(ds.dtype): - padded = ds.asstr()[:].tolist() - else: - padded = ds[:].tolist() - return _crop(padded, lens) - - @overrides - def put_bytes(self, key: str, value: Bytes) -> None: - if self._check_data: - assert isinstance( - value, (bytes, bytearray) - ), f"bytes expected; found: {value}" # type: ignore - self._put(key, np.frombuffer(value, dtype="uint8"), compress=True) - @overrides def put_scalar(self, key: str, value: Any) -> None: if value is None: @@ -328,29 +252,6 @@ class Hdf5Sample(Sample): self._put(key, value, compress=True) - @overrides - def put_vector_list(self, key: str, value: VectorList) -> None: - if self._check_data: - self._assert_is_vector_list(value) - padded, lens = _pad(value) - self.put_vector(f"{key}_lengths", lens) - data = None - for v in value: - if v is None or len(v) == 0: - continue - if isinstance(v[0], str): - data = np.array(padded, dtype="S") - elif isinstance(v[0], float): - data = np.array(padded, dtype=np.dtype("f2")) - elif isinstance(v[0], bool): - data = np.array(padded, dtype=bool) - else: - data = np.array(padded) - break - if data is None: - data = np.array(padded) - self._put(key, data, compress=True) - def _put(self, key: str, value: Any, compress: bool = False) -> Dataset: if key in self.file: del self.file[key] @@ -394,44 +295,3 @@ class Hdf5Sample(Sample): assert col is not None assert data is not None return coo_matrix((data, (row, col))) - - -def _pad(veclist: VectorList) -> Tuple[VectorList, List[int]]: - veclist = deepcopy(veclist) - lens = [len(v) if v is not None else -1 for v in veclist] - maxlen = max(lens) - - # Find appropriate constant to pad the vectors - constant: Union[int, float, str] = 0 - for v in veclist: - if v is None or len(v) == 0: - continue - if isinstance(v[0], int): - constant = 0 - elif isinstance(v[0], float): - constant = 0.0 - elif isinstance(v[0], str): - constant = "" - else: - assert False, f"unsupported data type: {v[0]}" - - # Pad vectors - for (i, vi) in enumerate(veclist): - if vi is None: - vi = veclist[i] = [] - assert isinstance(vi, list), f"list expected; found: {vi}" - for k in range(len(vi), maxlen): - vi.append(constant) - - return veclist, lens - - -def _crop(veclist: VectorList, lens: List[int]) -> VectorList: - result: VectorList = cast(VectorList, []) - for (i, v) in enumerate(veclist): - if lens[i] < 0: - result.append(None) # type: ignore - else: - assert isinstance(v, list) - result.append(v[: lens[i]]) - return result diff --git a/miplearn/instance/file.py b/miplearn/instance/file.py index 5c2615f..a08d6b2 100644 --- a/miplearn/instance/file.py +++ b/miplearn/instance/file.py @@ -111,14 +111,14 @@ class FileInstance(Instance): def load(self) -> None: if self.instance is not None: return - self.instance = pickle.loads(self.h5.get_bytes("pickled")) + self.instance = pickle.loads(self.h5.get_array("pickled").tobytes()) assert isinstance(self.instance, Instance) @classmethod def save(cls, instance: Instance, filename: str) -> None: h5 = Hdf5Sample(filename, mode="w") - instance_pkl = pickle.dumps(instance) - h5.put_bytes("pickled", instance_pkl) + instance_pkl = np.frombuffer(pickle.dumps(instance), dtype=np.int8) + h5.put_array("pickled", instance_pkl) @overrides def create_sample(self) -> Sample: diff --git a/tests/components/test_objective.py b/tests/components/test_objective.py index 82a6a05..fc45083 100644 --- a/tests/components/test_objective.py +++ b/tests/components/test_objective.py @@ -13,6 +13,7 @@ from miplearn.components.objective import ObjectiveValueComponent from miplearn.features.sample import Sample, MemorySample from miplearn.solvers.learning import LearningSolver from miplearn.solvers.pyomo.gurobi import GurobiPyomoSolver +from miplearn.solvers.tests import assert_equals @pytest.fixture @@ -21,7 +22,7 @@ def sample() -> Sample: { "mip_lower_bound": 1.0, "mip_upper_bound": 2.0, - "lp_instance_features": [1.0, 2.0, 3.0], + "lp_instance_features": np.array([1.0, 2.0, 3.0]), }, ) return sample @@ -29,18 +30,18 @@ def sample() -> Sample: def test_sample_xy(sample: Sample) -> None: x_expected = { - "Lower bound": [[1.0, 2.0, 3.0]], - "Upper bound": [[1.0, 2.0, 3.0]], + "Lower bound": np.array([[1.0, 2.0, 3.0]]), + "Upper bound": np.array([[1.0, 2.0, 3.0]]), } y_expected = { - "Lower bound": [[1.0]], - "Upper bound": [[2.0]], + "Lower bound": np.array([[1.0]]), + "Upper bound": np.array([[2.0]]), } xy = ObjectiveValueComponent().sample_xy(None, sample) assert xy is not None x_actual, y_actual = xy - assert x_actual == x_expected - assert y_actual == y_expected + assert_equals(x_actual, x_expected) + assert_equals(y_actual, y_expected) def test_fit_xy() -> None: diff --git a/tests/components/test_static_lazy.py b/tests/components/test_static_lazy.py index cd09d16..5a38822 100644 --- a/tests/components/test_static_lazy.py +++ b/tests/components/test_static_lazy.py @@ -36,13 +36,15 @@ def sample() -> Sample: "static_constr_names": np.array(["c1", "c2", "c3", "c4", "c5"], dtype="S"), "static_instance_features": [5.0], "mip_constr_lazy_enforced": {b"c1", b"c2", b"c4"}, - "lp_constr_features": [ - [1.0, 1.0], - [1.0, 2.0], - [1.0, 3.0], - [1.0, 4.0, 0.0], - None, - ], + "lp_constr_features": np.array( + [ + [1.0, 1.0, 0.0], + [1.0, 2.0, 0.0], + [1.0, 3.0, 0.0], + [1.0, 4.0, 0.0], + [0.0, 0.0, 0.0], + ] + ), "static_constr_lazy_count": 4, }, ) @@ -216,7 +218,7 @@ def test_fit_xy() -> None: def test_sample_xy(sample: Sample) -> None: x_expected = { - b"type-a": [[5.0, 1.0, 1.0], [5.0, 1.0, 2.0], [5.0, 1.0, 3.0]], + b"type-a": [[5.0, 1.0, 1.0, 0.0], [5.0, 1.0, 2.0, 0.0], [5.0, 1.0, 3.0, 0.0]], b"type-b": [[5.0, 1.0, 4.0, 0.0]], } y_expected = { diff --git a/tests/features/test_extractor.py b/tests/features/test_extractor.py index 2c6ac37..c6527f7 100644 --- a/tests/features/test_extractor.py +++ b/tests/features/test_extractor.py @@ -61,7 +61,7 @@ def test_knapsack() -> None: np.array(["default", "default", "default", "default", ""], dtype="S"), ) assert_equals( - sample.get_vector_list("static_var_features"), + sample.get_array("static_var_features"), np.array( [ [23.0, 505.0, 1.0, 0.32899, 0.0, 505.0, 1.0], @@ -155,7 +155,7 @@ def test_knapsack() -> None: np.array([1.0, 0.923077, 1.0, 0.0, 67.0]), ) assert_equals( - sample.get_vector_list("lp_var_features"), + sample.get_array("lp_var_features"), np.array( [ [ diff --git a/tests/instance/test_file.py b/tests/instance/test_file.py index 4dfb607..4beb80a 100644 --- a/tests/instance/test_file.py +++ b/tests/instance/test_file.py @@ -18,7 +18,7 @@ def test_usage() -> None: filename = tempfile.mktemp() FileInstance.save(original, filename) sample = Hdf5Sample(filename, check_data=True) - assert len(sample.get_bytes("pickled")) > 0 + assert len(sample.get_array("pickled")) > 0 # Solve instance from disk solver = LearningSolver(solver=GurobiSolver())