diff --git a/kwave/kWaveSimulation_helper/save_to_disk_func.py b/kwave/kWaveSimulation_helper/save_to_disk_func.py index 4faef74e..3a7f4e7c 100644 --- a/kwave/kWaveSimulation_helper/save_to_disk_func.py +++ b/kwave/kWaveSimulation_helper/save_to_disk_func.py @@ -57,7 +57,7 @@ def save_to_disk_func( # ========================================================================= remove_z_dimension(float_variables, kgrid.dim) - save_file(opt.input_filename, integer_variables, float_variables, opt.hdf_compression_level, + save_file(opt.input_filename, integer_variables, float_variables, opt.hdf_compression_options, auto_chunk=auto_chunk) # update command line status @@ -447,12 +447,12 @@ def enforce_filename_standards(filepath): return filepath, filename_ext -def save_file(filepath, integer_variables, float_variables, hdf_compression_level, auto_chunk): +def save_file(filepath, integer_variables, float_variables, hdf_compression_options, auto_chunk): filepath, filename_ext = enforce_filename_standards(filepath) # save file if filename_ext == '.h5': - save_h5_file(filepath, integer_variables, float_variables, hdf_compression_level, auto_chunk) + save_h5_file(filepath, integer_variables, float_variables, hdf_compression_options, auto_chunk) elif filename_ext == '.mat': save_mat_file(filepath, integer_variables, float_variables) @@ -461,7 +461,7 @@ def save_file(filepath, integer_variables, float_variables, hdf_compression_leve raise NotImplementedError('unknown file extension for ''save_to_disk'' filename') -def save_h5_file(filepath, integer_variables, float_variables, hdf_compression_level, auto_chunk): +def save_h5_file(filepath, integer_variables, float_variables, hdf_compression_options, auto_chunk): # ---------------- # SAVE HDF5 FILE # ---------------- @@ -476,7 +476,7 @@ def save_h5_file(filepath, integer_variables, float_variables, hdf_compression_l for key, value in float_variables.items(): # cast matrix to single precision value = np.array(value, dtype=np.float32) - write_matrix(filepath, value, key, hdf_compression_level, auto_chunk) + write_matrix(filepath, value, key, hdf_compression_options, auto_chunk) del value # change all the index variables to be in 64-bit unsigned integers @@ -484,7 +484,7 @@ def save_h5_file(filepath, integer_variables, float_variables, hdf_compression_l for key, value in integer_variables.items(): # cast matrix to 64-bit unsigned integer value = np.array(value, dtype=np.uint64) - write_matrix(filepath, value, key, hdf_compression_level, auto_chunk) + write_matrix(filepath, value, key, hdf_compression_options, auto_chunk) del value # set additional file attributes diff --git a/kwave/options/simulation_options.py b/kwave/options/simulation_options.py index 5bd6bd18..c2c46f19 100644 --- a/kwave/options/simulation_options.py +++ b/kwave/options/simulation_options.py @@ -12,7 +12,7 @@ # Found here: https://adamj.eu/tech/2021/05/13/python-type-hints-how-to-fix-circular-imports/ from kwave.kgrid import kWaveGrid from kwave.utils.data import get_date_string -from kwave.utils.io import get_h5_literals +from kwave.utils.io import get_h5_literals, CompressionOption from kwave.utils.pml import get_optimal_pml_size @@ -41,6 +41,7 @@ def is_axisymmetric(self): return self == SimulationType.AXISYMMETRIC + @dataclass class SimulationOptions(object): """ @@ -70,7 +71,7 @@ class SimulationOptions(object): The saved variables can be used to run simulations using the C++ code. data_recast: recast the sensor data back to double precision cartesian_interp: interpolation mode for Cartesian sensor mask - hdf_compression_level: zip compression level for HDF5 input files + hdf_compression_options: either gzip compression level for HDF5 input files, or type of compression used data_cast: data cast pml_search_range: search range used when automatically determining PML size radial_symmetry: radial symmetry used in axisymmetric code @@ -102,11 +103,11 @@ class SimulationOptions(object): stream_to_disk: bool = False data_recast: Optional[bool] = False cartesian_interp: str = 'linear' - hdf_compression_level: Optional[int] = None data_cast: str = 'off' pml_search_range: List[int] = field(default_factory=lambda: [10, 40]) radial_symmetry: str = 'WSWA-FFT' multi_axial_PML_ratio: float = 0.1 + hdf_compression_options: Optional[CompressionOption] = CompressionOption.GZIP_4.value data_path: Optional[str] = field(default_factory=lambda: gettempdir()) output_filename: Optional[str] = field(default_factory=lambda: f"{get_date_string()}_kwave_input.h5") input_filename: Optional[str] = field(default_factory=lambda: f"{get_date_string()}_kwave_output.h5") @@ -130,12 +131,14 @@ def __post_init__(self): if self.data_cast == 'double': self.data_cast = 'off' - # load the HDF5 literals (for the default compression level) + # load the HDF5 literals (for the default compression settings) h5_literals = get_h5_literals() - self.hdf_compression_level = h5_literals.HDF_COMPRESSION_LEVEL + self.hdf_compression_options = h5_literals.HDF_COMPRESSION_OPTIONS # check value is an integer between 0 and 9 - assert isinstance(self.hdf_compression_level, int) and 0 <= self.hdf_compression_level <= 9, \ - "Optional input ''hdf_compression_level'' must be an integer between 0 and 9." + assert ((isinstance(self.hdf_compression_options, int) and (0 <= self.hdf_compression_options <= 9)) or + (isinstance(self.hdf_compression_options, str) and ((self.hdf_compression_options.lower() == 'lzf') or + (self.hdf_compression_options.lower() == 'szip')))), \ + "Optional input ''hdf_compression_options'' is false: must an integer be between 0-9 or either 'lzf' or 'szip'" assert np.isscalar(self.multi_axial_PML_ratio) and self.multi_axial_PML_ratio >= 0, \ "Optional input ''multi_axial_PML_ratio'' must be a single positive value." @@ -206,9 +209,11 @@ def option_factory(kgrid: "kWaveGrid", options: SimulationOptions): * data_recast: Boolean controlling whether the output data is cast back to double precision. If set to false, sensor_data will be returned in the data format set using the 'data_cast' option. - * hdf_compression_level: Compression level used for writing the input HDF5 file when using + * hdf_compression_options: Compression level used for writing the input HDF5 file when using 'save_to_disk' or kspaceFirstOrder3DC. Can be set to an integer - between 0 (no compression, the default) and 9 (maximum compression). + between 0 (no compression, the default) and 9 (maximum compression) for gzip + compression or as a string for lzf or szip compression. + Note that szip compression requires additional libraries to be installed. The compression is lossless. Increasing the compression level will reduce the file size if there are portions of the medium that are homogeneous, but will also increase the time to create the HDF5 file. diff --git a/kwave/utils/io.py b/kwave/utils/io.py index f99009ea..47f11e4d 100644 --- a/kwave/utils/io.py +++ b/kwave/utils/io.py @@ -2,6 +2,7 @@ import os import platform import socket +from enum import Enum from datetime import datetime from typing import Optional @@ -14,6 +15,25 @@ from .data import get_date_string from .dotdictionary import dotdict +# from kwave.options.simulation_options import CompressionOption + +class CompressionOption(Enum): + """ + Enum for the compression options + """ + GZIP_0 = 0 + GZIP_1 = 1 + GZIP_2 = 2 + GZIP_3 = 3 + GZIP_4 = 4 + GZIP_5 = 5 + GZIP_6 = 6 + GZIP_7 = 7 + GZIP_8 = 8 + GZIP_9 = 9 + LZF = 'lzf' + SZIP = 'szip' + def get_h5_literals(): literals = dotdict({ @@ -46,21 +66,21 @@ def get_h5_literals(): 'HDF_FILE_MAJOR_VERSION': '1', 'HDF_FILE_MINOR_VERSION': '2', - # compression level - 'HDF_COMPRESSION_LEVEL': 0 + # compression level: set to be same as default h5py + 'HDF_COMPRESSION_OPTIONS': CompressionOption.GZIP_4.value }) return literals -def write_matrix(filename, matrix: np.ndarray, matrix_name: str, compression_level:int =None, auto_chunk: bool =True): +def write_matrix(filename, matrix: np.ndarray, + matrix_name: str, + compression_options: Optional[CompressionOption] = get_h5_literals().HDF_COMPRESSION_OPTIONS, + auto_chunk: bool = True): # get literals h5_literals = get_h5_literals() assert isinstance(auto_chunk, bool), "auto_chunk must be a boolean." - if compression_level is None: - compression_level = h5_literals.HDF_COMPRESSION_LEVEL - # dims = num_dim(matrix) dims = len(matrix.shape) @@ -78,7 +98,7 @@ def write_matrix(filename, matrix: np.ndarray, matrix_name: str, compression_lev else: Nx, Ny, Nz = 1, 1, 1 - # check size of matrix and set chunk size and compression level + # check size of matrix and set chunk size and compression options if dims == 3: # set chunk size to Nx * Ny chunk_size = [Nx, Ny, 1] @@ -99,7 +119,7 @@ def write_matrix(filename, matrix: np.ndarray, matrix_name: str, compression_lev else: # set no compression - compression_level = 0 + compression_options = CompressionOption.GZIP_0.value # set chunk size to grid size if matrix.size == 1: @@ -186,9 +206,9 @@ def write_matrix(filename, matrix: np.ndarray, matrix_name: str, compression_lev 'chunks': auto_chunk if auto_chunk is True else tuple(chunk_size) } - if compression_level != 0: + if compression_options != CompressionOption.GZIP_0.value: # use compression - opts['compression'] = compression_level + opts['compression'] = compression_options # write the matrix into the file with h5py.File(filename, "a") as f: diff --git a/tests/matlab_test_data_collectors/python_testers/h5io_test.py b/tests/matlab_test_data_collectors/python_testers/h5io_test.py index 297103aa..8e7e0c48 100644 --- a/tests/matlab_test_data_collectors/python_testers/h5io_test.py +++ b/tests/matlab_test_data_collectors/python_testers/h5io_test.py @@ -26,12 +26,13 @@ def compare_h5_values(local_h5_path, ref_path): def test_write_matrix(tmp_path_factory): + compression_options = range(1,9) idx = 0 for dim in range(1, 3): - for compression_level in range(1, 9): + for compression_option in compression_options: tmp_path = tmp_path_factory.mktemp("matrix") / f"{idx}.h5" matrix = np.single(10.0 * np.ones([1, dim])) - write_matrix(tmp_path, matrix=matrix, matrix_name='test') + write_matrix(tmp_path, matrix=matrix, matrix_name='test', compression_options=compression_option) ref_path = os.path.join(Path(__file__).parent, f"collectedValues/writeMatrix/{idx}.h5") compare_h5_values(tmp_path, ref_path) idx = idx + 1 @@ -85,3 +86,18 @@ def test_write_grid(tmp_path_factory): compare_h5_values(tmp_path, ref_path) idx = idx + 1 pass + + +def test_write_matrix_lzf(tmp_path_factory): + """ + Tests the compression option `lzf`, which is not an option for the matlab h5create function, by + comparing written data to a reference matrix + """ + compression_option = 'lzf' + for idx, dim in enumerate(range(2, 3)): + tmp_path = tmp_path_factory.mktemp("matrix") / f"{idx}.h5" + matrix = np.single(10.0 * np.ones([1, dim])) + write_matrix(tmp_path, matrix=matrix, matrix_name='test', compression_options=compression_option) + tmp_h5 = h5py.File(tmp_path, 'r') + assert np.isclose(tmp_h5['test'], matrix).all() + pass \ No newline at end of file