-
Notifications
You must be signed in to change notification settings - Fork 92
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* implement fletcher32 * Update numcodecs/fletcher32.pyx Co-authored-by: Ryan Abernathey <[email protected]> * Add docstring and erorr test * Use HDF C impl * Remove unused, add docstrings * to runtime and int test * to cython * Update numcodecs/fletcher32.pyx Co-authored-by: Ryan Abernathey <[email protected]> * Add docs Co-authored-by: Ryan Abernathey <[email protected]>
- Loading branch information
1 parent
4f2a2e3
commit 67ede4c
Showing
6 changed files
with
170 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
# cython: language_level=3 | ||
# cython: overflowcheck=False | ||
# cython: cdivision=True | ||
import struct | ||
|
||
from numcodecs.abc import Codec | ||
from numcodecs.compat import ensure_contiguous_ndarray | ||
|
||
from libc.stdint cimport uint8_t, uint16_t, uint32_t | ||
|
||
|
||
cdef uint32_t _fletcher32(const uint8_t[::1] _data): | ||
# converted from | ||
# https://github.com/Unidata/netcdf-c/blob/main/plugins/H5checksum.c#L109 | ||
cdef: | ||
const uint8_t *data = &_data[0] | ||
size_t _len = _data.shape[0] | ||
size_t len = _len / 2 | ||
size_t tlen | ||
uint32_t sum1 = 0, sum2 = 0; | ||
|
||
|
||
while len: | ||
tlen = 360 if len > 360 else len | ||
len -= tlen | ||
while True: | ||
sum1 += <uint32_t>((<uint16_t>data[0]) << 8) | (<uint16_t>data[1]) | ||
data += 2 | ||
sum2 += sum1 | ||
tlen -= 1 | ||
if tlen < 1: | ||
break | ||
sum1 = (sum1 & 0xffff) + (sum1 >> 16) | ||
sum2 = (sum2 & 0xffff) + (sum2 >> 16) | ||
|
||
if _len % 2: | ||
sum1 += <uint32_t>((<uint16_t>(data[0])) << 8) | ||
sum2 += sum1 | ||
sum1 = (sum1 & 0xffff) + (sum1 >> 16) | ||
sum2 = (sum2 & 0xffff) + (sum2 >> 16) | ||
|
||
sum1 = (sum1 & 0xffff) + (sum1 >> 16) | ||
sum2 = (sum2 & 0xffff) + (sum2 >> 16) | ||
|
||
return (sum2 << 16) | sum1 | ||
|
||
|
||
class Fletcher32(Codec): | ||
"""The fletcher checksum with 16-bit words and 32-bit output | ||
This is the netCDF4/HED5 implementation, which is not equivalent | ||
to the one in wikipedia | ||
https://github.com/Unidata/netcdf-c/blob/main/plugins/H5checksum.c#L95 | ||
With this codec, the checksum is concatenated on the end of the data | ||
bytes when encoded. At decode time, the checksum is performed on | ||
the data portion and compared with the four-byte checksum, raising | ||
RuntimeError if inconsistent. | ||
""" | ||
|
||
codec_id = "fletcher32" | ||
|
||
def encode(self, buf): | ||
"""Return buffer plus 4-byte fletcher checksum""" | ||
buf = ensure_contiguous_ndarray(buf).ravel().view('uint8') | ||
cdef const uint8_t[::1] b_ptr = buf | ||
val = _fletcher32(b_ptr) | ||
return buf.tobytes() + struct.pack("<I", val) | ||
|
||
def decode(self, buf, out=None): | ||
"""Check fletcher checksum, and return buffer without it""" | ||
b = ensure_contiguous_ndarray(buf).view('uint8') | ||
cdef const uint8_t[::1] b_ptr = b[:-4] | ||
val = _fletcher32(b_ptr) | ||
found = b[-4:].view("<u4")[0] | ||
if val != found: | ||
raise RuntimeError( | ||
f"The fletcher32 checksum of the data ({val}) did not" | ||
f" match the expected checksum ({found}).\n" | ||
"This could be a sign that the data has been corrupted." | ||
) | ||
if out: | ||
out.view("uint8")[:] = b[:-4] | ||
return out | ||
return memoryview(b[:-4]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
import numpy as np | ||
import pytest | ||
|
||
from numcodecs.fletcher32 import Fletcher32 | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"dtype", | ||
["uint8", "int32", "float32"] | ||
) | ||
def test_with_data(dtype): | ||
data = np.arange(100, dtype=dtype) | ||
f = Fletcher32() | ||
arr = np.frombuffer(f.decode(f.encode(data)), dtype=dtype) | ||
assert (arr == data).all() | ||
|
||
|
||
def test_error(): | ||
data = np.arange(100) | ||
f = Fletcher32() | ||
enc = f.encode(data) | ||
enc2 = bytearray(enc) | ||
enc2[0] += 1 | ||
with pytest.raises(RuntimeError) as e: | ||
f.decode(enc2) | ||
assert "fletcher32 checksum" in str(e.value) | ||
|
||
|
||
def test_known(): | ||
data = ( | ||
b'w\x07\x00\x00\x00\x00\x00\x00\x85\xf6\xff\xff\xff\xff\xff\xff' | ||
b'i\x07\x00\x00\x00\x00\x00\x00\x94\xf6\xff\xff\xff\xff\xff\xff' | ||
b'\x88\t\x00\x00\x00\x00\x00\x00i\x03\x00\x00\x00\x00\x00\x00' | ||
b'\x93\xfd\xff\xff\xff\xff\xff\xff\xc3\xfc\xff\xff\xff\xff\xff\xff' | ||
b"'\x02\x00\x00\x00\x00\x00\x00\xba\xf7\xff\xff\xff\xff\xff\xff" | ||
b'\xfd%\x86d') | ||
data3 = Fletcher32().decode(data) | ||
outarr = np.frombuffer(data3, dtype="<i8") | ||
expected = [ | ||
1911, -2427, 1897, -2412, 2440, 873, -621, -829, 551, -2118, | ||
] | ||
assert outarr.tolist() == expected |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters