Source code for pymia.data.extraction.reader

import abc
import os

import h5py
import numpy as np

import pymia.data.definition as defs
import pymia.data.extraction.byte_converter as byte_converter
import pymia.data.indexexpression as expr


[docs]class Reader(abc.ABC): def __init__(self, file_path: str) -> None: """Abstract dataset reader. Args: file_path(str): The path to the dataset file. """ super().__init__() self.file_path = file_path def __enter__(self): self.open() return self def __exit__(self, exc_type, exc_val, exc_tb): self.close() def __del__(self): self.close()
[docs] @abc.abstractmethod def get_subject_entries(self) -> list: """Get the dataset entries holding the subject's data. Returns: list: The list of subject entry strings. """ pass
[docs] @abc.abstractmethod def get_shape(self, subject_index: int) -> list: """Get the shape from an entry. Args: subject_index(int): The index of the subject. Returns: list: The shape of each dimension. """ pass
[docs] @abc.abstractmethod def get_subjects(self) -> list: """Get the subject names in the dataset. Returns: list: The list of subject names. """ pass
[docs] @abc.abstractmethod def read(self, entry: str, index: expr.IndexExpression = None): """Read a dataset entry. Args: entry(str): The dataset entry. index(expr.IndexExpression): The slicing expression. Returns: The read data. """ pass
[docs] @abc.abstractmethod def has(self, entry: str) -> bool: """Check whether a dataset entry exists. Args: entry(str): The dataset entry. Returns: bool: Whether the entry exists. """ pass
[docs] @abc.abstractmethod def open(self): """Open the reader.""" pass
[docs] @abc.abstractmethod def close(self): """Close the reader.""" pass
[docs]class Hdf5Reader(Reader): """Represents the dataset reader for HDF5 files.""" def __init__(self, file_path: str, category=defs.KEY_IMAGES) -> None: """Initializes a new instance. Args: file_path(str): The path to the dataset file. category(str): The category of an entry that defines the shape request """ super().__init__(file_path) self.h5 = None # type: h5py.File self.category = category
[docs] def get_subject_entries(self) -> list: """see :meth:`.Reader.get_subject_entries`""" nb_subjects = len(self.get_subjects()) return [defs.subject_index_to_str(i, nb_subjects) for i in range(nb_subjects)]
[docs] def get_shape(self, subject_index: int) -> list: """see :meth:`.Reader.get_shape`""" return self.read(defs.LOC_SHAPE_PLACEHOLDER.format(self.category), expr.IndexExpression(subject_index)).tolist()
[docs] def get_subjects(self) -> list: """see :meth:`.Reader.get_subjects`""" return byte_converter.convert_to_string(self.read(defs.LOC_SUBJECT))
[docs] def read(self, entry: str, index: expr.IndexExpression = None): """see :meth:`.Reader.read`""" if index is None: data = self.h5[entry][()] # need () instead of util.IndexExpression(None) [which is equal to slice(None)] else: data = self.h5[entry][index.expression] if isinstance(data, np.ndarray) and data.dtype == np.object: return data.tolist() # if h5py.check_dtype(vlen=self.h5[entry].dtype) == str and not isinstance(data, str): # return data.tolist() return data
[docs] def has(self, entry: str) -> bool: """see :meth:`.Reader.has`""" return entry in self.h5
[docs] def open(self): """see :meth:`.Reader.open`""" self.h5 = h5py.File(self.file_path, mode='r', libver='latest')
[docs] def close(self): """see :meth:`.Reader.close`""" if self.h5 is not None: self.h5.close() self.h5 = None
[docs]def get_reader(file_path: str, direct_open: bool = False) -> Reader: """Get the dataset reader corresponding to the file extension. Args: file_path(str): The path to the dataset file. direct_open(bool): Whether the file should directly be opened. Returns: Reader: Reader corresponding to dataset file extension. """ extension = os.path.splitext(file_path)[1] if extension not in reader_registry: raise ValueError('unknown dataset file extension "{}"'.format(extension)) reader = reader_registry[extension](file_path) if direct_open: reader.open() return reader
reader_registry = {'.h5': Hdf5Reader, '.hdf5': Hdf5Reader} """Registry defining the mapping between file extension and :class:`.Reader` class. Alternative writers need to be added to this registry in order to use :func:`.get_reader`."""