|
|
@ -3,13 +3,25 @@
|
|
|
|
# Released under the modified BSD license. See COPYING.md for more details.
|
|
|
|
# Released under the modified BSD license. See COPYING.md for more details.
|
|
|
|
|
|
|
|
|
|
|
|
from abc import ABC, abstractmethod
|
|
|
|
from abc import ABC, abstractmethod
|
|
|
|
from typing import Dict, Optional, Any, Union, List
|
|
|
|
from copy import deepcopy
|
|
|
|
|
|
|
|
from typing import Dict, Optional, Any, Union, List, Tuple, cast
|
|
|
|
|
|
|
|
|
|
|
|
import h5py
|
|
|
|
import h5py
|
|
|
|
|
|
|
|
import numpy as np
|
|
|
|
from overrides import overrides
|
|
|
|
from overrides import overrides
|
|
|
|
|
|
|
|
|
|
|
|
Scalar = Union[None, bool, str, int, float]
|
|
|
|
Scalar = Union[None, bool, str, int, float]
|
|
|
|
Vector = Union[None, List[bool], List[str], List[int], List[float]]
|
|
|
|
Vector = Union[None, List[bool], List[str], List[int], List[float]]
|
|
|
|
|
|
|
|
VectorList = Union[
|
|
|
|
|
|
|
|
List[List[bool]],
|
|
|
|
|
|
|
|
List[List[str]],
|
|
|
|
|
|
|
|
List[List[int]],
|
|
|
|
|
|
|
|
List[List[float]],
|
|
|
|
|
|
|
|
List[Optional[List[bool]]],
|
|
|
|
|
|
|
|
List[Optional[List[str]]],
|
|
|
|
|
|
|
|
List[Optional[List[int]]],
|
|
|
|
|
|
|
|
List[Optional[List[float]]],
|
|
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Sample(ABC):
|
|
|
|
class Sample(ABC):
|
|
|
@ -31,6 +43,14 @@ class Sample(ABC):
|
|
|
|
def put_vector(self, key: str, value: Vector) -> None:
|
|
|
|
def put_vector(self, key: str, value: Vector) -> None:
|
|
|
|
pass
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
|
|
|
|
def get_vector_list(self, key: str) -> Optional[Any]:
|
|
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
|
|
|
|
def put_vector_list(self, key: str, value: VectorList) -> None:
|
|
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
@abstractmethod
|
|
|
|
def get(self, key: str) -> Optional[Any]:
|
|
|
|
def get(self, key: str) -> Optional[Any]:
|
|
|
|
pass
|
|
|
|
pass
|
|
|
@ -65,17 +85,24 @@ class Sample(ABC):
|
|
|
|
return
|
|
|
|
return
|
|
|
|
assert False, f"Value has unsupported type: {value}"
|
|
|
|
assert False, f"Value has unsupported type: {value}"
|
|
|
|
|
|
|
|
|
|
|
|
def _assert_scalar(self, value: Any) -> None:
|
|
|
|
def _assert_is_scalar(self, value: Any) -> None:
|
|
|
|
if value is None:
|
|
|
|
if value is None:
|
|
|
|
return
|
|
|
|
return
|
|
|
|
if isinstance(value, (str, bool, int, float)):
|
|
|
|
if isinstance(value, (str, bool, int, float)):
|
|
|
|
return
|
|
|
|
return
|
|
|
|
assert False, f"Scalar expected; found instead: {value}"
|
|
|
|
assert False, f"Scalar expected; found instead: {value}"
|
|
|
|
|
|
|
|
|
|
|
|
def _assert_vector(self, value: Any) -> None:
|
|
|
|
def _assert_is_vector(self, value: Any) -> None:
|
|
|
|
assert isinstance(value, list), f"List expected; found instead: {value}"
|
|
|
|
assert isinstance(value, list), f"List expected; found instead: {value}"
|
|
|
|
for v in value:
|
|
|
|
for v in value:
|
|
|
|
self._assert_scalar(v)
|
|
|
|
self._assert_is_scalar(v)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _assert_is_vector_list(self, value: Any) -> None:
|
|
|
|
|
|
|
|
assert isinstance(value, list), f"List expected; found instead: {value}"
|
|
|
|
|
|
|
|
for v in value:
|
|
|
|
|
|
|
|
if v is None:
|
|
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
self._assert_is_vector(v)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class MemorySample(Sample):
|
|
|
|
class MemorySample(Sample):
|
|
|
@ -94,19 +121,28 @@ class MemorySample(Sample):
|
|
|
|
return self.get(key)
|
|
|
|
return self.get(key)
|
|
|
|
|
|
|
|
|
|
|
|
@overrides
|
|
|
|
@overrides
|
|
|
|
def put_scalar(self, key: str, value: Scalar) -> None:
|
|
|
|
def get_vector(self, key: str) -> Optional[Any]:
|
|
|
|
self._assert_scalar(value)
|
|
|
|
return self.get(key)
|
|
|
|
self.put(key, value)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@overrides
|
|
|
|
@overrides
|
|
|
|
def get_vector(self, key: str) -> Optional[Any]:
|
|
|
|
def get_vector_list(self, key: str) -> Optional[Any]:
|
|
|
|
return self.get(key)
|
|
|
|
return self.get(key)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@overrides
|
|
|
|
|
|
|
|
def put_scalar(self, key: str, value: Scalar) -> None:
|
|
|
|
|
|
|
|
self._assert_is_scalar(value)
|
|
|
|
|
|
|
|
self.put(key, value)
|
|
|
|
|
|
|
|
|
|
|
|
@overrides
|
|
|
|
@overrides
|
|
|
|
def put_vector(self, key: str, value: Vector) -> None:
|
|
|
|
def put_vector(self, key: str, value: Vector) -> None:
|
|
|
|
if value is None:
|
|
|
|
if value is None:
|
|
|
|
return
|
|
|
|
return
|
|
|
|
self._assert_vector(value)
|
|
|
|
self._assert_is_vector(value)
|
|
|
|
|
|
|
|
self.put(key, value)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@overrides
|
|
|
|
|
|
|
|
def put_vector_list(self, key: str, value: VectorList) -> None:
|
|
|
|
|
|
|
|
self._assert_is_vector_list(value)
|
|
|
|
self.put(key, value)
|
|
|
|
self.put(key, value)
|
|
|
|
|
|
|
|
|
|
|
|
@overrides
|
|
|
|
@overrides
|
|
|
@ -145,23 +181,55 @@ class Hdf5Sample(Sample):
|
|
|
|
def get_vector(self, key: str) -> Optional[Any]:
|
|
|
|
def get_vector(self, key: str) -> Optional[Any]:
|
|
|
|
ds = self.file[key]
|
|
|
|
ds = self.file[key]
|
|
|
|
assert len(ds.shape) == 1
|
|
|
|
assert len(ds.shape) == 1
|
|
|
|
|
|
|
|
print(ds.dtype)
|
|
|
|
if h5py.check_string_dtype(ds.dtype):
|
|
|
|
if h5py.check_string_dtype(ds.dtype):
|
|
|
|
return ds.asstr()[:].tolist()
|
|
|
|
return ds.asstr()[:].tolist()
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
return ds[:].tolist()
|
|
|
|
return ds[:].tolist()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@overrides
|
|
|
|
|
|
|
|
def get_vector_list(self, key: str) -> Optional[Any]:
|
|
|
|
|
|
|
|
ds = self.file[key]
|
|
|
|
|
|
|
|
lens = ds.attrs["lengths"]
|
|
|
|
|
|
|
|
if h5py.check_string_dtype(ds.dtype):
|
|
|
|
|
|
|
|
padded = ds.asstr()[:].tolist()
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
padded = ds[:].tolist()
|
|
|
|
|
|
|
|
return _crop(padded, lens)
|
|
|
|
|
|
|
|
|
|
|
|
@overrides
|
|
|
|
@overrides
|
|
|
|
def put_scalar(self, key: str, value: Any) -> None:
|
|
|
|
def put_scalar(self, key: str, value: Any) -> None:
|
|
|
|
self._assert_scalar(value)
|
|
|
|
self._assert_is_scalar(value)
|
|
|
|
self.put(key, value)
|
|
|
|
self.put(key, value)
|
|
|
|
|
|
|
|
|
|
|
|
@overrides
|
|
|
|
@overrides
|
|
|
|
def put_vector(self, key: str, value: Vector) -> None:
|
|
|
|
def put_vector(self, key: str, value: Vector) -> None:
|
|
|
|
if value is None:
|
|
|
|
if value is None:
|
|
|
|
return
|
|
|
|
return
|
|
|
|
self._assert_vector(value)
|
|
|
|
self._assert_is_vector(value)
|
|
|
|
self.put(key, value)
|
|
|
|
self.put(key, value)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@overrides
|
|
|
|
|
|
|
|
def put_vector_list(self, key: str, value: VectorList) -> None:
|
|
|
|
|
|
|
|
self._assert_is_vector_list(value)
|
|
|
|
|
|
|
|
if key in self.file:
|
|
|
|
|
|
|
|
del self.file[key]
|
|
|
|
|
|
|
|
padded, lens = _pad(value)
|
|
|
|
|
|
|
|
data = None
|
|
|
|
|
|
|
|
for v in value:
|
|
|
|
|
|
|
|
if v is None or len(v) == 0:
|
|
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
if isinstance(v[0], str):
|
|
|
|
|
|
|
|
data = np.array(padded, dtype="S")
|
|
|
|
|
|
|
|
elif isinstance(v[0], bool):
|
|
|
|
|
|
|
|
data = np.array(padded, dtype=bool)
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
data = np.array(padded)
|
|
|
|
|
|
|
|
break
|
|
|
|
|
|
|
|
assert data is not None
|
|
|
|
|
|
|
|
ds = self.file.create_dataset(key, data=data)
|
|
|
|
|
|
|
|
ds.attrs["lengths"] = lens
|
|
|
|
|
|
|
|
|
|
|
|
@overrides
|
|
|
|
@overrides
|
|
|
|
def get(self, key: str) -> Optional[Any]:
|
|
|
|
def get(self, key: str) -> Optional[Any]:
|
|
|
|
ds = self.file[key]
|
|
|
|
ds = self.file[key]
|
|
|
@ -175,3 +243,45 @@ class Hdf5Sample(Sample):
|
|
|
|
if key in self.file:
|
|
|
|
if key in self.file:
|
|
|
|
del self.file[key]
|
|
|
|
del self.file[key]
|
|
|
|
self.file.create_dataset(key, data=value)
|
|
|
|
self.file.create_dataset(key, data=value)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _pad(veclist: VectorList) -> Tuple[VectorList, List[int]]:
|
|
|
|
|
|
|
|
veclist = deepcopy(veclist)
|
|
|
|
|
|
|
|
lens = [len(v) if v is not None else -1 for v in veclist]
|
|
|
|
|
|
|
|
maxlen = max(lens)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Find appropriate constant to pad the vectors
|
|
|
|
|
|
|
|
constant: Union[int, float, str, None] = None
|
|
|
|
|
|
|
|
for v in veclist:
|
|
|
|
|
|
|
|
if v is None or len(v) == 0:
|
|
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
if isinstance(v[0], int):
|
|
|
|
|
|
|
|
constant = 0
|
|
|
|
|
|
|
|
elif isinstance(v[0], float):
|
|
|
|
|
|
|
|
constant = 0.0
|
|
|
|
|
|
|
|
elif isinstance(v[0], str):
|
|
|
|
|
|
|
|
constant = ""
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
assert False, f"Unsupported data type: {v[0]}"
|
|
|
|
|
|
|
|
assert constant is not None, "veclist must not be completely empty"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Pad vectors
|
|
|
|
|
|
|
|
for (i, vi) in enumerate(veclist):
|
|
|
|
|
|
|
|
if vi is None:
|
|
|
|
|
|
|
|
vi = veclist[i] = []
|
|
|
|
|
|
|
|
assert isinstance(vi, list)
|
|
|
|
|
|
|
|
for k in range(len(vi), maxlen):
|
|
|
|
|
|
|
|
vi.append(constant)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return veclist, lens
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _crop(veclist: VectorList, lens: List[int]) -> VectorList:
|
|
|
|
|
|
|
|
result: VectorList = cast(VectorList, [])
|
|
|
|
|
|
|
|
for (i, v) in enumerate(veclist):
|
|
|
|
|
|
|
|
if lens[i] < 0:
|
|
|
|
|
|
|
|
result.append(None) # type: ignore
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
assert isinstance(v, list)
|
|
|
|
|
|
|
|
result.append(v[: lens[i]])
|
|
|
|
|
|
|
|
return result
|
|
|
|