import abc
import os
import numpy as np
import h5py
import pymia.data.indexexpression as expr
[docs]class Writer(abc.ABC):
"""Represents the abstract dataset writer defining an interface for the writing process."""
def __enter__(self):
self.open()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
def __del__(self):
self.close()
[docs] @abc.abstractmethod
def close(self):
"""Close the writer."""
pass
[docs] @abc.abstractmethod
def open(self):
"""Open the writer."""
pass
[docs] @abc.abstractmethod
def reserve(self, entry: str, shape: tuple, dtype=None):
"""Reserve space in the dataset for later writing.
Args:
entry(str): The dataset entry to be created.
shape(tuple): The shape to be reserved.
dtype: The dtype.
"""
pass
[docs] @abc.abstractmethod
def fill(self, entry: str, data, index: expr.IndexExpression = None):
"""Fill parts of a reserved dataset entry.
Args:
entry(str): The dataset entry to be filled.
data(object): The data to write.
index(.IndexExpression): The slicing expression.
"""
pass
[docs] @abc.abstractmethod
def write(self, entry: str, data, dtype=None):
"""Create and write entry.
Args:
entry(str): The dataset entry to be written.
data(object): The data to write.
dtype: The dtype.
"""
pass
[docs]class Hdf5Writer(Writer):
str_type = h5py.special_dtype(vlen=str)
def __init__(self, file_path: str) -> None:
"""Writer class for HDF5 file type.
Args:
file_path(str): The path to the dataset file to write.
"""
self.h5 = None # type: h5py.File
self.file_path = file_path
[docs] def close(self):
"""see :meth:`.Writer.close`"""
if self.h5 is not None:
self.h5.close()
self.h5 = None
[docs] def open(self):
"""see :meth:`.Writer.open`"""
self.h5 = h5py.File(self.file_path, mode='a', libver='latest')
[docs] def reserve(self, entry: str, shape: tuple, dtype=None):
"""see :meth:`.Writer.reserve`"""
# special string handling (in order not to use length limited strings)
if dtype is str or dtype == 'str' or (isinstance(dtype, np.dtype) and dtype.type == np.str_):
dtype = self.str_type
self.h5.create_dataset(entry, shape, dtype=dtype)
[docs] def fill(self, entry: str, data, index: expr.IndexExpression = None):
"""see :meth:`.Writer.fill`"""
# special string handling (in order not to use length limited strings)
if self.h5[entry].dtype is self.str_type:
data = np.asarray(data, dtype=object)
if index is None:
index = expr.IndexExpression()
self.h5[entry][index.expression] = data
[docs] def write(self, entry: str, data, dtype=None):
"""see :meth:`.Writer.write`"""
# special string handling (in order not to use length limited strings)
if dtype is str or dtype == 'str' or (isinstance(dtype, np.dtype) and dtype.type == np.str_):
dtype = self.str_type
data = np.asarray(data, dtype=object)
if entry in self.h5:
del self.h5[entry]
self.h5.create_dataset(entry, dtype=dtype, data=data)
[docs]def get_writer(file_path: str) -> Writer:
"""Get the dataset writer corresponding to the file extension.
Args:
file_path(str): The path of the dataset file to be written.
Returns:
.creation.writer.Writer: Writer corresponding to dataset file extension.
"""
extension = os.path.splitext(file_path)[1]
if extension not in writer_registry:
raise ValueError('unknown dataset file extension "{}"'.format(extension))
return writer_registry[extension](file_path)
writer_registry = {'.h5': Hdf5Writer, '.hdf5': Hdf5Writer}
"""Registry defining the mapping between file extension and :class:`.Writer` class.
Alternative writers need to be added to this registry in order to use :func:`.get_writer`."""