Hdf5Sample: store lengths as dataset instead of attr

This commit is contained in:
2021-07-27 11:47:26 -05:00
parent 728a6bc835
commit b6880f068c

View File

@@ -207,7 +207,7 @@ class Hdf5Sample(Sample):
if key not in self.file: if key not in self.file:
return None return None
ds = self.file[key] ds = self.file[key]
lens = ds.attrs["lengths"] lens = self.get_vector(f"{key}_lengths")
if h5py.check_string_dtype(ds.dtype): if h5py.check_string_dtype(ds.dtype):
padded = ds.asstr()[:].tolist() padded = ds.asstr()[:].tolist()
else: else:
@@ -238,6 +238,7 @@ class Hdf5Sample(Sample):
def put_vector_list(self, key: str, value: VectorList) -> None: def put_vector_list(self, key: str, value: VectorList) -> None:
self._assert_is_vector_list(value) self._assert_is_vector_list(value)
padded, lens = _pad(value) padded, lens = _pad(value)
self.put_vector(f"{key}_lengths", lens)
data = None data = None
for v in value: for v in value:
if v is None or len(v) == 0: if v is None or len(v) == 0:
@@ -251,8 +252,7 @@ class Hdf5Sample(Sample):
break break
if data is None: if data is None:
data = np.array(padded) data = np.array(padded)
ds = self._put(key, data) self._put(key, data)
ds.attrs["lengths"] = lens
def _put(self, key: str, value: Any) -> Dataset: def _put(self, key: str, value: Any) -> Dataset:
if key in self.file: if key in self.file: