Hdf5Sample: Enable compression

master
Alinson S. Xavier 4 years ago
parent 7163472cfc
commit c513515725
No known key found for this signature in database
GPG Key ID: DCA0DAD4D2F58624

@ -233,7 +233,7 @@ class Hdf5Sample(Sample):
assert isinstance( assert isinstance(
value, (bytes, bytearray) value, (bytes, bytearray)
), f"bytes expected; found: {value}" # type: ignore ), f"bytes expected; found: {value}" # type: ignore
self._put(key, np.frombuffer(value, dtype="uint8")) self._put(key, np.frombuffer(value, dtype="uint8"), compress=True)
@overrides @overrides
def put_scalar(self, key: str, value: Any) -> None: def put_scalar(self, key: str, value: Any) -> None:
@ -248,7 +248,7 @@ class Hdf5Sample(Sample):
return return
self._assert_is_vector(value) self._assert_is_vector(value)
modified = [v if v is not None else "" for v in value] modified = [v if v is not None else "" for v in value]
self._put(key, modified) self._put(key, modified, compress=True)
@overrides @overrides
def put_vector_list(self, key: str, value: VectorList) -> None: def put_vector_list(self, key: str, value: VectorList) -> None:
@ -268,12 +268,16 @@ class Hdf5Sample(Sample):
break break
if data is None: if data is None:
data = np.array(padded) data = np.array(padded)
self._put(key, data) self._put(key, data, compress=True)
def _put(self, key: str, value: Any) -> Dataset: def _put(self, key: str, value: Any, compress: bool = False) -> Dataset:
if key in self.file: if key in self.file:
del self.file[key] del self.file[key]
return self.file.create_dataset(key, data=value) if compress:
ds = self.file.create_dataset(key, data=value, compression="gzip")
else:
ds = self.file.create_dataset(key, data=value)
return ds
def _pad(veclist: VectorList) -> Tuple[VectorList, List[int]]: def _pad(veclist: VectorList) -> Tuple[VectorList, List[int]]:

Loading…
Cancel
Save