Skip to content

Commit

Permalink
Add dictionary API to cgo wrapper
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 698745795
  • Loading branch information
eustas authored and copybara-github committed Nov 21, 2024
1 parent 39904bd commit 5c66724
Show file tree
Hide file tree
Showing 3 changed files with 160 additions and 19 deletions.
41 changes: 41 additions & 0 deletions go/cbrotli/cbrotli_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -375,3 +375,44 @@ func TestEncodeDecode(t *testing.T) {
}
}
}

func TestEncodeDecodeWithDictionary(t *testing.T) {
q := 5
l := 4096

input := make([]byte, l)
for i := 0; i < l; i++ {
input[i] = byte(i*7 + i*i*5)
}
// use dictionary same as input
pd := NewPreparedDictionary(input, DtRaw, q)
defer pd.Close()

encoded, err := Encode(input, WriterOptions{Quality: q, Dictionary: pd})
if err != nil {
t.Errorf("Encode: %v", err)
}
limit := 20
if len(encoded) > limit {
t.Errorf("Output length exceeds expectations: %d > %d", len(encoded), limit)
}

decoded, err := DecodeWithRawDictionary(encoded, input)
if err != nil {
t.Errorf("Decode: %v", err)
}
if !bytes.Equal(decoded, input) {
var want string
if len(input) > 320 {
want = fmt.Sprintf("<%d bytes>", len(input))
} else {
want = fmt.Sprintf("%q", input)
}
t.Errorf(""+
"Decode content:\n"+
"%q\n"+
"want:\n"+
"%s",
decoded, want)
}
}
59 changes: 48 additions & 11 deletions go/cbrotli/reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import (
"errors"
"io"
"io/ioutil"
"runtime"
)

type decodeError C.BrotliDecoderErrorCode
Expand All @@ -49,10 +50,11 @@ var errReaderClosed = errors.New("cbrotli: Reader is closed")
// Reader implements io.ReadCloser by reading Brotli-encoded data from an
// underlying Reader.
type Reader struct {
src io.Reader
state *C.BrotliDecoderState
buf []byte // scratch space for reading from src
in []byte // current chunk to decode; usually aliases buf
src io.Reader
state *C.BrotliDecoderState
buf []byte // scratch space for reading from src
in []byte // current chunk to decode; usually aliases buf
pinner *runtime.Pinner // raw dictionary pinner
}

// readBufSize is a "good" buffer size that avoids excessive round-trips
Expand All @@ -63,10 +65,26 @@ const readBufSize = 32 * 1024
// NewReader initializes new Reader instance.
// Close MUST be called to free resources.
func NewReader(src io.Reader) *Reader {
return NewReaderWithRawDictionary(src, nil)
}

// NewReaderWithRawDictionary initializes new Reader instance with shared dictionary.
// Close MUST be called to free resources.
func NewReaderWithRawDictionary(src io.Reader, dictionary []byte) *Reader {
s := C.BrotliDecoderCreateInstance(nil, nil, nil)
var p *runtime.Pinner
if dictionary != nil {
p = new(runtime.Pinner)
p.Pin(&dictionary[0])
// TODO(eustas): use return value
C.BrotliDecoderAttachDictionary(s, C.BrotliSharedDictionaryType( /* RAW */ 0),
C.size_t(len(dictionary)), (*C.uint8_t)(&dictionary[0]))
}
return &Reader{
src: src,
state: C.BrotliDecoderCreateInstance(nil, nil, nil),
buf: make([]byte, readBufSize),
src: src,
state: s,
buf: make([]byte, readBufSize),
pinner: p,
}
}

Expand All @@ -78,6 +96,10 @@ func (r *Reader) Close() error {
// Close despite the state; i.e. there might be some unread decoded data.
C.BrotliDecoderDestroyInstance(r.state)
r.state = nil
if r.pinner != nil {
r.pinner.Unpin()
r.pinner = nil
}
return nil
}

Expand Down Expand Up @@ -153,11 +175,26 @@ func (r *Reader) Read(p []byte) (n int, err error) {

// Decode decodes Brotli encoded data.
func Decode(encodedData []byte) ([]byte, error) {
return DecodeWithRawDictionary(encodedData, nil)
}

// DecodeWithRawDictionary decodes Brotli encoded data with shared dictionary.
func DecodeWithRawDictionary(encodedData []byte, dictionary []byte) ([]byte, error) {
s := C.BrotliDecoderCreateInstance(nil, nil, nil)
var p *runtime.Pinner
if dictionary != nil {
p = new(runtime.Pinner)
p.Pin(&dictionary[0])
// TODO(eustas): use return value
C.BrotliDecoderAttachDictionary(s, C.BrotliSharedDictionaryType( /* RAW */ 0),
C.size_t(len(dictionary)), (*C.uint8_t)(&dictionary[0]))
}
r := &Reader{
src: bytes.NewReader(nil),
state: C.BrotliDecoderCreateInstance(nil, nil, nil),
buf: make([]byte, 4), // arbitrarily small but nonzero so that r.src.Read returns io.EOF
in: encodedData,
src: bytes.NewReader(nil),
state: s,
buf: make([]byte, 4), // arbitrarily small but nonzero so that r.src.Read returns io.EOF
in: encodedData,
pinner: p,
}
defer r.Close()
return ioutil.ReadAll(r)
Expand Down
79 changes: 71 additions & 8 deletions go/cbrotli/writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,54 @@ import (
"bytes"
"errors"
"io"
"runtime"
"unsafe"
)

// PreparedDictionary is a handle to native object.
type PreparedDictionary struct {
opaque *C.BrotliEncoderPreparedDictionary
pinner *runtime.Pinner
}

// DictionaryType is type for shared dictionary
type DictionaryType int

const (
// DtRaw denotes LZ77 prefix dictionary
DtRaw DictionaryType = 0
// DtSerialized denotes serialized format
DtSerialized DictionaryType = 1
)

// NewPreparedDictionary prepares dictionary data for encoder.
// Same instance can be used for multiple encoding sessions.
// Close MUST be called to free resources.
func NewPreparedDictionary(data []byte, dictionaryType DictionaryType, quality int) *PreparedDictionary {
var ptr *C.uint8_t
if len(data) != 0 {
ptr = (*C.uint8_t)(&data[0])
}
p := new(runtime.Pinner)
p.Pin(&data[0])
d := C.BrotliEncoderPrepareDictionary(C.BrotliSharedDictionaryType(dictionaryType), C.size_t(len(data)), ptr, C.int(quality), nil, nil, nil)
return &PreparedDictionary{
opaque: d,
pinner: p,
}
}

// Close frees C resources.
// IMPORTANT: calling Close until all encoders that use that dictionary are closed as well will
// cause crash.
func (p *PreparedDictionary) Close() error {
// C-Brotli tolerates `nil` pointer here.
C.BrotliEncoderDestroyPreparedDictionary(p.opaque)
p.opaque = nil
p.pinner.Unpin()
return nil
}

// WriterOptions configures Writer.
type WriterOptions struct {
// Quality controls the compression-speed vs compression-density trade-offs.
Expand All @@ -56,38 +101,56 @@ type WriterOptions struct {
// LGWin is the base 2 logarithm of the sliding window size.
// Range is 10 to 24. 0 indicates automatic configuration based on Quality.
LGWin int
// Prepared shared dictionary
Dictionary *PreparedDictionary
}

// Writer implements io.WriteCloser by writing Brotli-encoded data to an
// underlying Writer.
type Writer struct {
healthy bool
dst io.Writer
state *C.BrotliEncoderState
buf, encoded []byte
}

var (
errEncode = errors.New("cbrotli: encode error")
errWriterClosed = errors.New("cbrotli: Writer is closed")
errEncode = errors.New("cbrotli: encode error")
errWriterClosed = errors.New("cbrotli: Writer is closed")
errWriterUnhealthy = errors.New("cbrotli: Writer is unhealthy")
)

// NewWriter initializes new Writer instance.
// Close MUST be called to free resources.
func NewWriter(dst io.Writer, options WriterOptions) *Writer {
state := C.BrotliEncoderCreateInstance(nil, nil, nil)
C.BrotliEncoderSetParameter(
state, C.BROTLI_PARAM_QUALITY, (C.uint32_t)(options.Quality))
healthy := state != nil
if C.BrotliEncoderSetParameter(
state, C.BROTLI_PARAM_QUALITY, (C.uint32_t)(options.Quality)) == 0 {
healthy = false
}
if options.LGWin > 0 {
C.BrotliEncoderSetParameter(
state, C.BROTLI_PARAM_LGWIN, (C.uint32_t)(options.LGWin))
if C.BrotliEncoderSetParameter(
state, C.BROTLI_PARAM_LGWIN, (C.uint32_t)(options.LGWin)) == 0 {
healthy = false
}
}
if options.Dictionary != nil {
if C.BrotliEncoderAttachPreparedDictionary(state, options.Dictionary.opaque) == 0 {
healthy = false
}
}
return &Writer{
dst: dst,
state: state,
healthy: healthy,
dst: dst,
state: state,
}
}

func (w *Writer) writeChunk(p []byte, op C.BrotliEncoderOperation) (n int, err error) {
if !w.healthy {
return 0, errWriterUnhealthy
}
if w.state == nil {
return 0, errWriterClosed
}
Expand Down

0 comments on commit 5c66724

Please sign in to comment.