From 67ede4c6b4f1707f1da18351945e10904a5572de Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Sun, 15 Jan 2023 13:02:57 -0500 Subject: [PATCH] implement fletcher32 (#412) * implement fletcher32 * Update numcodecs/fletcher32.pyx Co-authored-by: Ryan Abernathey * Add docstring and erorr test * Use HDF C impl * Remove unused, add docstrings * to runtime and int test * to cython * Update numcodecs/fletcher32.pyx Co-authored-by: Ryan Abernathey * Add docs Co-authored-by: Ryan Abernathey --- docs/checksum32.rst | 11 ++++ docs/release.rst | 3 +- numcodecs/__init__.py | 3 ++ numcodecs/fletcher32.pyx | 85 ++++++++++++++++++++++++++++++ numcodecs/tests/test_fletcher32.py | 42 +++++++++++++++ setup.py | 28 +++++++++- 6 files changed, 170 insertions(+), 2 deletions(-) create mode 100644 numcodecs/fletcher32.pyx create mode 100644 numcodecs/tests/test_fletcher32.py diff --git a/docs/checksum32.rst b/docs/checksum32.rst index 1d5522e2..5e682afc 100644 --- a/docs/checksum32.rst +++ b/docs/checksum32.rst @@ -22,3 +22,14 @@ Adler32 .. automethod:: decode .. automethod:: get_config .. automethod:: from_config + + +Fletcher32 +---------- + +.. autoclass:: numcodecs.fletcher32.Fletcher32 + + .. autoattribute:: codec_id + .. automethod:: encode + .. automethod:: decode + diff --git a/docs/release.rst b/docs/release.rst index 6f176b8c..90d62750 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -15,7 +15,8 @@ Unreleased Enhancements ~~~~~~~~~~~~ -* +* Add ``fletcher32`` checksum codec + By :user:`Martin Durant `, :issue:`410`. Fix ~~~ diff --git a/numcodecs/__init__.py b/numcodecs/__init__.py index 53f3e795..1e3c8536 100644 --- a/numcodecs/__init__.py +++ b/numcodecs/__init__.py @@ -111,3 +111,6 @@ register_codec(VLenUTF8) register_codec(VLenBytes) register_codec(VLenArray) + +from numcodecs.fletcher32 import Fletcher32 +register_codec(Fletcher32) diff --git a/numcodecs/fletcher32.pyx b/numcodecs/fletcher32.pyx new file mode 100644 index 00000000..02f9319c --- /dev/null +++ b/numcodecs/fletcher32.pyx @@ -0,0 +1,85 @@ +# cython: language_level=3 +# cython: overflowcheck=False +# cython: cdivision=True +import struct + +from numcodecs.abc import Codec +from numcodecs.compat import ensure_contiguous_ndarray + +from libc.stdint cimport uint8_t, uint16_t, uint32_t + + +cdef uint32_t _fletcher32(const uint8_t[::1] _data): + # converted from + # https://github.com/Unidata/netcdf-c/blob/main/plugins/H5checksum.c#L109 + cdef: + const uint8_t *data = &_data[0] + size_t _len = _data.shape[0] + size_t len = _len / 2 + size_t tlen + uint32_t sum1 = 0, sum2 = 0; + + + while len: + tlen = 360 if len > 360 else len + len -= tlen + while True: + sum1 += ((data[0]) << 8) | (data[1]) + data += 2 + sum2 += sum1 + tlen -= 1 + if tlen < 1: + break + sum1 = (sum1 & 0xffff) + (sum1 >> 16) + sum2 = (sum2 & 0xffff) + (sum2 >> 16) + + if _len % 2: + sum1 += (((data[0])) << 8) + sum2 += sum1 + sum1 = (sum1 & 0xffff) + (sum1 >> 16) + sum2 = (sum2 & 0xffff) + (sum2 >> 16) + + sum1 = (sum1 & 0xffff) + (sum1 >> 16) + sum2 = (sum2 & 0xffff) + (sum2 >> 16) + + return (sum2 << 16) | sum1 + + +class Fletcher32(Codec): + """The fletcher checksum with 16-bit words and 32-bit output + + This is the netCDF4/HED5 implementation, which is not equivalent + to the one in wikipedia + https://github.com/Unidata/netcdf-c/blob/main/plugins/H5checksum.c#L95 + + With this codec, the checksum is concatenated on the end of the data + bytes when encoded. At decode time, the checksum is performed on + the data portion and compared with the four-byte checksum, raising + RuntimeError if inconsistent. + """ + + codec_id = "fletcher32" + + def encode(self, buf): + """Return buffer plus 4-byte fletcher checksum""" + buf = ensure_contiguous_ndarray(buf).ravel().view('uint8') + cdef const uint8_t[::1] b_ptr = buf + val = _fletcher32(b_ptr) + return buf.tobytes() + struct.pack("