Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extend HDF5 compression options #332

Open
wants to merge 23 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions kwave/kWaveSimulation_helper/save_to_disk_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def save_to_disk_func(
# =========================================================================

remove_z_dimension(float_variables, kgrid.dim)
save_file(opt.input_filename, integer_variables, float_variables, opt.hdf_compression_level,
save_file(opt.input_filename, integer_variables, float_variables, opt.hdf_compression_options,
auto_chunk=auto_chunk)

# update command line status
Expand Down Expand Up @@ -447,12 +447,12 @@ def enforce_filename_standards(filepath):
return filepath, filename_ext


def save_file(filepath, integer_variables, float_variables, hdf_compression_level, auto_chunk):
def save_file(filepath, integer_variables, float_variables, hdf_compression_options, auto_chunk):
filepath, filename_ext = enforce_filename_standards(filepath)

# save file
if filename_ext == '.h5':
save_h5_file(filepath, integer_variables, float_variables, hdf_compression_level, auto_chunk)
save_h5_file(filepath, integer_variables, float_variables, hdf_compression_options, auto_chunk)

elif filename_ext == '.mat':
save_mat_file(filepath, integer_variables, float_variables)
Expand All @@ -461,7 +461,7 @@ def save_file(filepath, integer_variables, float_variables, hdf_compression_leve
raise NotImplementedError('unknown file extension for ''save_to_disk'' filename')


def save_h5_file(filepath, integer_variables, float_variables, hdf_compression_level, auto_chunk):
def save_h5_file(filepath, integer_variables, float_variables, hdf_compression_options, auto_chunk):
# ----------------
# SAVE HDF5 FILE
# ----------------
Expand All @@ -476,15 +476,15 @@ def save_h5_file(filepath, integer_variables, float_variables, hdf_compression_l
for key, value in float_variables.items():
# cast matrix to single precision
value = np.array(value, dtype=np.float32)
write_matrix(filepath, value, key, hdf_compression_level, auto_chunk)
write_matrix(filepath, value, key, hdf_compression_options, auto_chunk)
del value

# change all the index variables to be in 64-bit unsigned integers
# (long in C++), then add to HDF5 file
for key, value in integer_variables.items():
# cast matrix to 64-bit unsigned integer
value = np.array(value, dtype=np.uint64)
write_matrix(filepath, value, key, hdf_compression_level, auto_chunk)
write_matrix(filepath, value, key, hdf_compression_options, auto_chunk)
del value

# set additional file attributes
Expand Down
22 changes: 13 additions & 9 deletions kwave/options/simulation_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from dataclasses import dataclass, field
from enum import Enum
from tempfile import gettempdir
from typing import List, Optional, TYPE_CHECKING
from typing import List, Union, Optional, TYPE_CHECKING

import numpy as np

Expand Down Expand Up @@ -70,7 +70,7 @@ class SimulationOptions(object):
The saved variables can be used to run simulations using the C++ code.
data_recast: recast the sensor data back to double precision
cartesian_interp: interpolation mode for Cartesian sensor mask
hdf_compression_level: zip compression level for HDF5 input files
hdf_compression_options: either gzip compression level for HDF5 input files, or type of compression used
data_cast: data cast
pml_search_range: search range used when automatically determining PML size
radial_symmetry: radial symmetry used in axisymmetric code
Expand Down Expand Up @@ -102,7 +102,7 @@ class SimulationOptions(object):
stream_to_disk: bool = False
data_recast: Optional[bool] = False
cartesian_interp: str = 'linear'
hdf_compression_level: Optional[int] = None
hdf_compression_options: Optional[Union[int, str]] = None
djps marked this conversation as resolved.
Show resolved Hide resolved
data_cast: str = 'off'
pml_search_range: List[int] = field(default_factory=lambda: [10, 40])
radial_symmetry: str = 'WSWA-FFT'
Expand Down Expand Up @@ -130,12 +130,14 @@ def __post_init__(self):
if self.data_cast == 'double':
self.data_cast = 'off'

# load the HDF5 literals (for the default compression level)
# load the HDF5 literals (for the default compression settings)
h5_literals = get_h5_literals()
self.hdf_compression_level = h5_literals.HDF_COMPRESSION_LEVEL
self.hdf_compression_options = h5_literals.HDF_COMPRESSION_OPTIONS
# check value is an integer between 0 and 9
assert isinstance(self.hdf_compression_level, int) and 0 <= self.hdf_compression_level <= 9, \
"Optional input ''hdf_compression_level'' must be an integer between 0 and 9."
assert ((isinstance(self.hdf_compression_options, int) and (0 <= self.hdf_compression_options <= 9)) or
(isinstance(self.hdf_compression_options, str) and ((self.hdf_compression_options.lower() == 'lzf') or
(self.hdf_compression_options.lower() == 'szip')))), \
"Optional input ''hdf_compression_options'' is false: must an integer be between 0-9 or either 'lzf' or 'szip'"

assert np.isscalar(self.multi_axial_PML_ratio) and self.multi_axial_PML_ratio >= 0, \
"Optional input ''multi_axial_PML_ratio'' must be a single positive value."
Expand Down Expand Up @@ -206,9 +208,11 @@ def option_factory(kgrid: "kWaveGrid", options: SimulationOptions):
* data_recast: Boolean controlling whether the output data is cast back to double precision.
If set to false, sensor_data will be returned in
the data format set using the 'data_cast' option.
* hdf_compression_level: Compression level used for writing the input HDF5 file when using
* hdf_compression_options: Compression level used for writing the input HDF5 file when using
'save_to_disk' or kspaceFirstOrder3DC. Can be set to an integer
between 0 (no compression, the default) and 9 (maximum compression).
between 0 (no compression, the default) and 9 (maximum compression) for gzip
compression or as a string for lzf or szip compression.
Note that szip compression requires additional libraries to be installed.
The compression is lossless. Increasing the compression level will reduce
the file size if there are portions of the medium that are homogeneous,
but will also increase the time to create the HDF5 file.
Expand Down
20 changes: 10 additions & 10 deletions kwave/utils/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import platform
import socket
from datetime import datetime
from typing import Optional
from typing import Optional, Union

import cv2
import h5py
Expand Down Expand Up @@ -46,20 +46,20 @@ def get_h5_literals():
'HDF_FILE_MAJOR_VERSION': '1',
'HDF_FILE_MINOR_VERSION': '2',

# compression level
'HDF_COMPRESSION_LEVEL': 0
# compression level: set to be same as default h5py
'HDF_COMPRESSION_OPTIONS': 4
})
return literals


def write_matrix(filename, matrix: np.ndarray, matrix_name: str, compression_level:int =None, auto_chunk: bool =True):
def write_matrix(filename, matrix: np.ndarray, matrix_name: str, compression_options: Union[int, str] = None, auto_chunk: bool = True):
# get literals
h5_literals = get_h5_literals()

assert isinstance(auto_chunk, bool), "auto_chunk must be a boolean."

if compression_level is None:
compression_level = h5_literals.HDF_COMPRESSION_LEVEL
if compression_options is None:
djps marked this conversation as resolved.
Show resolved Hide resolved
compression_options = h5_literals.HDF_COMPRESSION_OPTIONS

# dims = num_dim(matrix)
dims = len(matrix.shape)
Expand All @@ -78,7 +78,7 @@ def write_matrix(filename, matrix: np.ndarray, matrix_name: str, compression_lev
else:
Nx, Ny, Nz = 1, 1, 1

# check size of matrix and set chunk size and compression level
# check size of matrix and set chunk size and compression options
if dims == 3:
# set chunk size to Nx * Ny
chunk_size = [Nx, Ny, 1]
Expand All @@ -99,7 +99,7 @@ def write_matrix(filename, matrix: np.ndarray, matrix_name: str, compression_lev
else:

# set no compression
compression_level = 0
compression_options = 0

# set chunk size to grid size
if matrix.size == 1:
Expand Down Expand Up @@ -186,9 +186,9 @@ def write_matrix(filename, matrix: np.ndarray, matrix_name: str, compression_lev
'chunks': auto_chunk if auto_chunk is True else tuple(chunk_size)
}

if compression_level != 0:
if compression_options != 0:
# use compression
opts['compression'] = compression_level
opts['compression'] = compression_options

# write the matrix into the file
with h5py.File(filename, "a") as f:
Expand Down
20 changes: 18 additions & 2 deletions tests/matlab_test_data_collectors/python_testers/h5io_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,13 @@ def compare_h5_values(local_h5_path, ref_path):


def test_write_matrix(tmp_path_factory):
compression_options = range(1,9)
idx = 0
for dim in range(1, 3):
for compression_level in range(1, 9):
for compression_option in compression_options:
tmp_path = tmp_path_factory.mktemp("matrix") / f"{idx}.h5"
matrix = np.single(10.0 * np.ones([1, dim]))
write_matrix(tmp_path, matrix=matrix, matrix_name='test')
write_matrix(tmp_path, matrix=matrix, matrix_name='test', compression_options=compression_option)
ref_path = os.path.join(Path(__file__).parent, f"collectedValues/writeMatrix/{idx}.h5")
compare_h5_values(tmp_path, ref_path)
idx = idx + 1
Expand Down Expand Up @@ -85,3 +86,18 @@ def test_write_grid(tmp_path_factory):
compare_h5_values(tmp_path, ref_path)
idx = idx + 1
pass


def test_write_matrix_lzf(tmp_path_factory):
"""
Tests the compression option `lzf`, which is not an option for the matlab h5create function, by
comparing written data to a reference matrix
"""
compression_option = 'lzf'
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would like to see the test to test all possible compression options.

for idx, dim in enumerate(range(2, 3)):
tmp_path = tmp_path_factory.mktemp("matrix") / f"{idx}.h5"
matrix = np.single(10.0 * np.ones([1, dim]))
write_matrix(tmp_path, matrix=matrix, matrix_name='test', compression_options=compression_option)
tmp_h5 = h5py.File(tmp_path, 'r')
assert np.isclose(tmp_h5['test'], matrix).all()
pass
Loading