Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: update snappy frame decompress #7333

Merged
merged 3 commits into from
Jan 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,9 +1,38 @@
import crc32c from "@chainsafe/fast-crc32c";

export enum ChunkType {
IDENTIFIER = 0xff,
COMPRESSED = 0x00,
UNCOMPRESSED = 0x01,
PADDING = 0xfe,
SKIPPABLE = 0x80,
}

export const IDENTIFIER = Buffer.from([0x73, 0x4e, 0x61, 0x50, 0x70, 0x59]);
export const IDENTIFIER_FRAME = Buffer.from([0xff, 0x06, 0x00, 0x00, 0x73, 0x4e, 0x61, 0x50, 0x70, 0x59]);

/**
* As per the snappy framing format for streams, the size of any uncompressed chunk can be
* no longer than 65536 bytes.
*
* From: https://github.com/google/snappy/blob/main/framing_format.txt#L90:L92
*/
export const UNCOMPRESSED_CHUNK_SIZE = 65536;

export function crc(value: Uint8Array): Buffer {
// this function doesn't actually need a buffer
// see https://github.com/napi-rs/node-rs/blob/main/packages/crc32/index.d.ts
const x = crc32c.calculate(value as Buffer);
const result = Buffer.allocUnsafe?.(4) ?? Buffer.alloc(4);

// As defined in section 3 of https://github.com/google/snappy/blob/master/framing_format.txt
// And other implementations for reference:
// Go: https://github.com/golang/snappy/blob/2e65f85255dbc3072edf28d6b5b8efc472979f5a/snappy.go#L97
// Python: https://github.com/andrix/python-snappy/blob/602e9c10d743f71bef0bac5e4c4dffa17340d7b3/snappy/snappy.py#L70
// Mask the right hand to (32 - 17) = 15 bits -> 0x7fff, to keep correct 32 bit values.
// Shift the left hand with >>> for correct 32 bit intermediate result.
// Then final >>> 0 for 32 bits output
result.writeUInt32LE((((x >>> 15) | ((x & 0x7fff) << 17)) + 0xa282ead8) >>> 0, 0);

return result;
}
Original file line number Diff line number Diff line change
@@ -1,33 +1,8 @@
import crc32c from "@chainsafe/fast-crc32c";
import snappy from "snappy";
import {ChunkType, IDENTIFIER_FRAME} from "./common.js";
import {ChunkType, IDENTIFIER_FRAME, UNCOMPRESSED_CHUNK_SIZE, crc} from "./common.js";

// The logic in this file is largely copied (in simplified form) from https://github.com/ChainSafe/node-snappy-stream/

/**
* As per the snappy framing format for streams, the size of any uncompressed chunk can be
* no longer than 65536 bytes.
*
* From: https://github.com/google/snappy/blob/main/framing_format.txt#L90:L92
*/
const UNCOMPRESSED_CHUNK_SIZE = 65536;

function checksum(value: Buffer): Buffer {
const x = crc32c.calculate(value);
const result = Buffer.allocUnsafe?.(4) ?? Buffer.alloc(4);

// As defined in section 3 of https://github.com/google/snappy/blob/master/framing_format.txt
// And other implementations for reference:
// Go: https://github.com/golang/snappy/blob/2e65f85255dbc3072edf28d6b5b8efc472979f5a/snappy.go#L97
// Python: https://github.com/andrix/python-snappy/blob/602e9c10d743f71bef0bac5e4c4dffa17340d7b3/snappy/snappy.py#L70
// Mask the right hand to (32 - 17) = 15 bits -> 0x7fff, to keep correct 32 bit values.
// Shift the left hand with >>> for correct 32 bit intermediate result.
// Then final >>> 0 for 32 bits output
result.writeUInt32LE((((x >>> 15) | ((x & 0x7fff) << 17)) + 0xa282ead8) >>> 0, 0);

return result;
}

export async function* encodeSnappy(bytes: Buffer): AsyncGenerator<Buffer> {
yield IDENTIFIER_FRAME;

Expand All @@ -36,17 +11,13 @@ export async function* encodeSnappy(bytes: Buffer): AsyncGenerator<Buffer> {
const compressed = snappy.compressSync(chunk);
if (compressed.length < chunk.length) {
const size = compressed.length + 4;
yield Buffer.concat([
Buffer.from([ChunkType.COMPRESSED, size, size >> 8, size >> 16]),
checksum(chunk),
compressed,
]);
yield Buffer.concat([Buffer.from([ChunkType.COMPRESSED, size, size >> 8, size >> 16]), crc(chunk), compressed]);
} else {
const size = chunk.length + 4;
yield Buffer.concat([
//
Buffer.from([ChunkType.UNCOMPRESSED, size, size >> 8, size >> 16]),
checksum(chunk),
crc(chunk),
chunk,
]);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import {uncompress} from "snappyjs";
import {Uint8ArrayList} from "uint8arraylist";
import {ChunkType, IDENTIFIER} from "./common.js";
import {ChunkType, IDENTIFIER, UNCOMPRESSED_CHUNK_SIZE, crc} from "./common.js";

export class SnappyFramesUncompress {
private buffer = new Uint8ArrayList();
Expand All @@ -21,32 +21,55 @@ export class SnappyFramesUncompress {
if (this.buffer.length < 4) break;

const type = getChunkType(this.buffer.get(0));

if (!this.state.foundIdentifier && type !== ChunkType.IDENTIFIER) {
throw "malformed input: must begin with an identifier";
}

const frameSize = getFrameSize(this.buffer, 1);

if (this.buffer.length - 4 < frameSize) {
break;
}

const data = this.buffer.subarray(4, 4 + frameSize);
const frame = this.buffer.subarray(4, 4 + frameSize);
this.buffer.consume(4 + frameSize);

if (!this.state.foundIdentifier && type !== ChunkType.IDENTIFIER) {
throw "malformed input: must begin with an identifier";
}
switch (type) {
case ChunkType.IDENTIFIER: {
if (!Buffer.prototype.equals.call(frame, IDENTIFIER)) {
throw "malformed input: bad identifier";
}
this.state.foundIdentifier = true;
continue;
}
case ChunkType.PADDING:
case ChunkType.SKIPPABLE:
continue;
case ChunkType.COMPRESSED: {
const checksum = frame.subarray(0, 4);
const data = frame.subarray(4);

if (type === ChunkType.IDENTIFIER) {
if (!Buffer.prototype.equals.call(data, IDENTIFIER)) {
throw "malformed input: bad identifier";
const uncompressed = uncompress(data, UNCOMPRESSED_CHUNK_SIZE);
if (crc(uncompressed).compare(checksum) !== 0) {
throw "malformed input: bad checksum";
}
result.append(uncompressed);
break;
}
this.state.foundIdentifier = true;
continue;
}
case ChunkType.UNCOMPRESSED: {
const checksum = frame.subarray(0, 4);
const uncompressed = frame.subarray(4);

if (type === ChunkType.COMPRESSED) {
result.append(uncompress(data.subarray(4)));
}
if (type === ChunkType.UNCOMPRESSED) {
result.append(data.subarray(4));
if (uncompressed.length > UNCOMPRESSED_CHUNK_SIZE) {
throw "malformed input: too large";
}
if (crc(uncompressed).compare(checksum) !== 0) {
throw "malformed input: bad checksum";
}
result.append(uncompressed);
break;
}
}
}
if (result.length === 0) {
Expand Down Expand Up @@ -82,6 +105,10 @@ function getChunkType(value: number): ChunkType {
case ChunkType.PADDING:
return ChunkType.PADDING;
default:
// https://github.com/google/snappy/blob/main/framing_format.txt#L129
if (value >= 0x80 && value <= 0xfd) {
return ChunkType.SKIPPABLE;
}
throw new Error("Unsupported snappy chunk type");
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import {pipe} from "it-pipe";
import {Uint8ArrayList} from "uint8arraylist";
import {describe, expect, it} from "vitest";
import {ChunkType, IDENTIFIER_FRAME, crc} from "../../../../../src/encodingStrategies/sszSnappy/snappyFrames/common.js";
import {encodeSnappy} from "../../../../../src/encodingStrategies/sszSnappy/snappyFrames/compress.js";
import {SnappyFramesUncompress} from "../../../../../src/encodingStrategies/sszSnappy/snappyFrames/uncompress.js";

Expand Down Expand Up @@ -56,4 +57,43 @@ describe("encodingStrategies / sszSnappy / snappy frames / uncompress", () => {

expect(decompress.uncompress(new Uint8ArrayList(Buffer.alloc(3, 1)))).toBe(null);
});

it("should detect invalid checksum", () => {
const chunks = new Uint8ArrayList();
chunks.append(IDENTIFIER_FRAME);

chunks.append(Uint8Array.from([ChunkType.UNCOMPRESSED, 0x80, 0x00, 0x00]));
// first 4 bytes are checksum
// 0xffffffff is clearly an invalid checksum
chunks.append(Uint8Array.from(Array.from({length: 0x80}, () => 0xff)));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why not use 128 here? Guessing this is a snappy spec thing but curious none-the-less

Suggested change
chunks.append(Uint8Array.from(Array.from({length: 0x80}, () => 0xff)));
chunks.append(Uint8Array.from(Array.from({length: 128}, () => 0xff)));

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just an artifact from a test case I had been provided, there's nothing special about 128.


const decompress = new SnappyFramesUncompress();
expect(() => decompress.uncompress(chunks)).toThrow(/checksum/);
});

it("should detect skippable frames", () => {
const chunks = new Uint8ArrayList();
chunks.append(IDENTIFIER_FRAME);

chunks.append(Uint8Array.from([ChunkType.SKIPPABLE, 0x80, 0x00, 0x00]));
chunks.append(Uint8Array.from(Array.from({length: 0x80}, () => 0xff)));

const decompress = new SnappyFramesUncompress();
expect(decompress.uncompress(chunks)).toBeNull();
});

it("should detect large data", () => {
const chunks = new Uint8ArrayList();
chunks.append(IDENTIFIER_FRAME);

// add a chunk of size 100000
chunks.append(Uint8Array.from([ChunkType.UNCOMPRESSED, 160, 134, 1]));
const data = Uint8Array.from(Array.from({length: 100000 - 4}, () => 0xff));
const checksum = crc(data);
chunks.append(checksum);
chunks.append(data);

const decompress = new SnappyFramesUncompress();
expect(() => decompress.uncompress(chunks)).toThrow(/large/);
});
});
Loading