From 5c6171f2eb58d866e42417b8a73c41f8db2f7664 Mon Sep 17 00:00:00 2001 From: Sebastian Nagel Date: Fri, 8 Nov 2024 21:05:58 +0100 Subject: [PATCH] Add optional support for brotli content encoding --- pom.xml | 7 +++++++ src/org/netpreserve/jwarc/BrotliUtils.java | 22 ++++++++++++++++++++++ src/org/netpreserve/jwarc/DecodedBody.java | 7 ++++++- src/org/netpreserve/jwarc/HttpMessage.java | 2 ++ 4 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 src/org/netpreserve/jwarc/BrotliUtils.java diff --git a/pom.xml b/pom.xml index 61994a0..051fb08 100644 --- a/pom.xml +++ b/pom.xml @@ -68,6 +68,13 @@ 4.13.2 test + + org.brotli + dec + 0.1.2 + compile + true + diff --git a/src/org/netpreserve/jwarc/BrotliUtils.java b/src/org/netpreserve/jwarc/BrotliUtils.java new file mode 100644 index 0000000..f080a8c --- /dev/null +++ b/src/org/netpreserve/jwarc/BrotliUtils.java @@ -0,0 +1,22 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright (C) 2024 National Library of Australia and the jwarc contributors + */ + +package org.netpreserve.jwarc; + +import java.io.IOException; +import java.nio.channels.Channels; +import java.nio.channels.ReadableByteChannel; + +import org.brotli.dec.BrotliInputStream; + +/** + * Utility class to read brotli-encoded data, based on org.brotli:dec. + */ +public final class BrotliUtils { + + public static ReadableByteChannel brotliChannel(ReadableByteChannel brotli) throws IOException { + return Channels.newChannel(new BrotliInputStream(Channels.newInputStream(brotli))); + } +} diff --git a/src/org/netpreserve/jwarc/DecodedBody.java b/src/org/netpreserve/jwarc/DecodedBody.java index 4f54e0e..6254be8 100644 --- a/src/org/netpreserve/jwarc/DecodedBody.java +++ b/src/org/netpreserve/jwarc/DecodedBody.java @@ -35,7 +35,12 @@ private DecodedBody(ReadableByteChannel channel, Encoding encoding) throws IOExc this.channel = IOUtils.gunzipChannel(channel); break; case BROTLI: - throw new IOException("Brotli encoding not yet supported"); + try { + this.channel = BrotliUtils.brotliChannel(channel); + } catch (NoClassDefFoundError e) { + throw new IOException("Brotli decoder not found, please install org.brotli:dec", e); + } + break; default: throw new IOException("Unsupported encoding"); } diff --git a/src/org/netpreserve/jwarc/HttpMessage.java b/src/org/netpreserve/jwarc/HttpMessage.java index 037466c..809a009 100644 --- a/src/org/netpreserve/jwarc/HttpMessage.java +++ b/src/org/netpreserve/jwarc/HttpMessage.java @@ -42,6 +42,8 @@ public MessageBody bodyDecoded() throws IOException { } else if (contentEncodings.get(0).equalsIgnoreCase("gzip") || contentEncodings.get(0).equalsIgnoreCase("x-gzip")) { return DecodedBody.create(payload, DecodedBody.Encoding.GZIP); + } else if (contentEncodings.get(0).equalsIgnoreCase("br")) { + return DecodedBody.create(payload, DecodedBody.Encoding.BROTLI); } else if (contentEncodings.get(0).equalsIgnoreCase("deflate")) { return DecodedBody.create(payload, DecodedBody.Encoding.DEFLATE); } else {