Add util/compression package to consolidate snappy/zstd use in Prometheus. (#16156)
# Conflicts: # tsdb/db_test.go Apply suggestions from code review tmp Addressed comments. Update util/compression/buffers.go Signed-off-by: Bartlomiej Plotka <bwplotka@gmail.com> Co-authored-by: Arthur Silva Sens <arthursens2005@gmail.com>pull/16197/head
parent
e32d89af7f
commit
7a7bc65237
@ -0,0 +1,142 @@ |
||||
// Copyright 2025 The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package compression |
||||
|
||||
import ( |
||||
"sync" |
||||
|
||||
"github.com/klauspost/compress/zstd" |
||||
) |
||||
|
||||
type EncodeBuffer interface { |
||||
zstdEncBuf() *zstd.Encoder |
||||
get() []byte |
||||
set([]byte) |
||||
} |
||||
|
||||
type syncEBuffer struct { |
||||
onceZstd sync.Once |
||||
w *zstd.Encoder |
||||
buf []byte |
||||
} |
||||
|
||||
// NewSyncEncodeBuffer returns synchronous buffer that can only be used
|
||||
// on one encoding goroutine at once. Notably, the encoded byte slice returned
|
||||
// by Encode is valid only until the next Encode call.
|
||||
func NewSyncEncodeBuffer() EncodeBuffer { |
||||
return &syncEBuffer{} |
||||
} |
||||
|
||||
func (b *syncEBuffer) zstdEncBuf() *zstd.Encoder { |
||||
b.onceZstd.Do(func() { |
||||
// Without params this never returns error.
|
||||
b.w, _ = zstd.NewWriter(nil) |
||||
}) |
||||
return b.w |
||||
} |
||||
|
||||
func (b *syncEBuffer) get() []byte { |
||||
return b.buf |
||||
} |
||||
|
||||
func (b *syncEBuffer) set(buf []byte) { |
||||
b.buf = buf |
||||
} |
||||
|
||||
type concurrentEBuffer struct { |
||||
onceZstd sync.Once |
||||
w *zstd.Encoder |
||||
} |
||||
|
||||
// NewConcurrentEncodeBuffer returns a buffer that can be used concurrently.
|
||||
// NOTE: For Zstd compression, a concurrency limit equal to GOMAXPROCS is implied.
|
||||
func NewConcurrentEncodeBuffer() EncodeBuffer { |
||||
return &concurrentEBuffer{} |
||||
} |
||||
|
||||
func (b *concurrentEBuffer) zstdEncBuf() *zstd.Encoder { |
||||
b.onceZstd.Do(func() { |
||||
// Without params this never returns error.
|
||||
b.w, _ = zstd.NewWriter(nil) |
||||
}) |
||||
return b.w |
||||
} |
||||
|
||||
// TODO(bwplotka): We could use pool, but putting it back into the pool needs to be
|
||||
// on the caller side, so no pool for now.
|
||||
func (b *concurrentEBuffer) get() []byte { |
||||
return nil |
||||
} |
||||
|
||||
func (b *concurrentEBuffer) set([]byte) {} |
||||
|
||||
type DecodeBuffer interface { |
||||
zstdDecBuf() *zstd.Decoder |
||||
get() []byte |
||||
set([]byte) |
||||
} |
||||
|
||||
type syncDBuffer struct { |
||||
onceZstd sync.Once |
||||
r *zstd.Decoder |
||||
buf []byte |
||||
} |
||||
|
||||
// NewSyncDecodeBuffer returns synchronous buffer that can only be used
|
||||
// on one decoding goroutine at once. Notably, the decoded byte slice returned
|
||||
// by Decode is valid only until the next Decode call.
|
||||
func NewSyncDecodeBuffer() DecodeBuffer { |
||||
return &syncDBuffer{} |
||||
} |
||||
|
||||
func (b *syncDBuffer) zstdDecBuf() *zstd.Decoder { |
||||
b.onceZstd.Do(func() { |
||||
// Without params this never returns error.
|
||||
b.r, _ = zstd.NewReader(nil) |
||||
}) |
||||
return b.r |
||||
} |
||||
|
||||
func (b *syncDBuffer) get() []byte { |
||||
return b.buf |
||||
} |
||||
|
||||
func (b *syncDBuffer) set(buf []byte) { |
||||
b.buf = buf |
||||
} |
||||
|
||||
type concurrentDBuffer struct { |
||||
onceZstd sync.Once |
||||
r *zstd.Decoder |
||||
} |
||||
|
||||
// NewConcurrentDecodeBuffer returns a buffer that can be used concurrently.
|
||||
// NOTE: For Zstd compression a concurrency limit, equal to GOMAXPROCS is implied.
|
||||
func NewConcurrentDecodeBuffer() DecodeBuffer { |
||||
return &concurrentDBuffer{} |
||||
} |
||||
|
||||
func (b *concurrentDBuffer) zstdDecBuf() *zstd.Decoder { |
||||
b.onceZstd.Do(func() { |
||||
// Without params this never returns error.
|
||||
b.r, _ = zstd.NewReader(nil) |
||||
}) |
||||
return b.r |
||||
} |
||||
|
||||
func (b *concurrentDBuffer) get() []byte { |
||||
return nil |
||||
} |
||||
|
||||
func (b *concurrentDBuffer) set([]byte) {} |
@ -0,0 +1,122 @@ |
||||
// Copyright 2025 The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package compression |
||||
|
||||
import ( |
||||
"errors" |
||||
"fmt" |
||||
|
||||
"github.com/golang/snappy" |
||||
) |
||||
|
||||
// Type represents a valid compression type supported by this package.
|
||||
type Type = string |
||||
|
||||
const ( |
||||
// None represents no compression case.
|
||||
// None is the default when Type is empty.
|
||||
None Type = "none" |
||||
// Snappy represents snappy block format.
|
||||
Snappy Type = "snappy" |
||||
// Zstd represents "speed" mode of Zstd (Zstandard https://facebook.github.io/zstd/).
|
||||
// This is roughly equivalent to the default Zstandard mode (level 3).
|
||||
Zstd Type = "zstd" |
||||
) |
||||
|
||||
func Types() []Type { return []Type{None, Snappy, Zstd} } |
||||
|
||||
// Encode returns the encoded form of src for the given compression type.
|
||||
// For None or empty message the encoding is not attempted.
|
||||
//
|
||||
// The buf allows passing various buffer implementations that make encoding more
|
||||
// efficient. See NewSyncEncodeBuffer and NewConcurrentEncodeBuffer for further
|
||||
// details. For non-zstd compression types, it is valid to pass nil buf.
|
||||
//
|
||||
// Encode is concurrency-safe, however note the concurrency limits for the
|
||||
// buffer of your choice.
|
||||
func Encode(t Type, src []byte, buf EncodeBuffer) (ret []byte, err error) { |
||||
if len(src) == 0 || t == "" || t == None { |
||||
return src, nil |
||||
} |
||||
if t == Snappy { |
||||
// If MaxEncodedLen is less than 0 the record is too large to be compressed.
|
||||
if snappy.MaxEncodedLen(len(src)) < 0 { |
||||
return src, fmt.Errorf("compression: Snappy can't encode such a large message: %v", len(src)) |
||||
} |
||||
var b []byte |
||||
if buf != nil { |
||||
b = buf.get() |
||||
defer func() { |
||||
buf.set(ret) |
||||
}() |
||||
} |
||||
|
||||
// The snappy library uses `len` to calculate if we need a new buffer.
|
||||
// In order to allocate as few buffers as possible make the length
|
||||
// equal to the capacity.
|
||||
b = b[:cap(b)] |
||||
return snappy.Encode(b, src), nil |
||||
} |
||||
if t == Zstd { |
||||
if buf == nil { |
||||
return nil, errors.New("zstd requested but EncodeBuffer was not provided") |
||||
} |
||||
b := buf.get() |
||||
defer func() { |
||||
buf.set(ret) |
||||
}() |
||||
|
||||
return buf.zstdEncBuf().EncodeAll(src, b[:0]), nil |
||||
} |
||||
return nil, fmt.Errorf("unsupported compression type: %s", t) |
||||
} |
||||
|
||||
// Decode returns the decoded form of src for the given compression type.
|
||||
//
|
||||
// The buf allows passing various buffer implementations that make decoding more
|
||||
// efficient. See NewSyncDecodeBuffer and NewConcurrentDecodeBuffer for further
|
||||
// details. For non-zstd compression types, it is valid to pass nil buf.
|
||||
//
|
||||
// Decode is concurrency-safe, however note the concurrency limits for the
|
||||
// buffer of your choice.
|
||||
func Decode(t Type, src []byte, buf DecodeBuffer) (ret []byte, err error) { |
||||
if len(src) == 0 || t == "" || t == None { |
||||
return src, nil |
||||
} |
||||
if t == Snappy { |
||||
var b []byte |
||||
if buf != nil { |
||||
b = buf.get() |
||||
defer func() { |
||||
buf.set(ret) |
||||
}() |
||||
} |
||||
// The snappy library uses `len` to calculate if we need a new buffer.
|
||||
// In order to allocate as few buffers as possible make the length
|
||||
// equal to the capacity.
|
||||
b = b[:cap(b)] |
||||
return snappy.Decode(b, src) |
||||
} |
||||
if t == Zstd { |
||||
if buf == nil { |
||||
return nil, errors.New("zstd requested but DecodeBuffer was not provided") |
||||
} |
||||
b := buf.get() |
||||
defer func() { |
||||
buf.set(ret) |
||||
}() |
||||
return buf.zstdDecBuf().DecodeAll(src, b[:0]) |
||||
} |
||||
return nil, fmt.Errorf("unsupported compression type: %s", t) |
||||
} |
@ -0,0 +1,193 @@ |
||||
// Copyright 2025 The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package compression |
||||
|
||||
import ( |
||||
"errors" |
||||
"fmt" |
||||
"testing" |
||||
|
||||
"github.com/stretchr/testify/require" |
||||
) |
||||
|
||||
const compressible = `ddddddddddsfpgjsdoadjgfpajdspfgjasfjapddddddddddaaaaaaaa |
||||
fsfsdfsfddddddddddsfpgjsdoadjgfpajdspfgjasfjapddddddddddaaaaaaaa |
||||
ddddddddddsfpgjsdoadjgfpajdspfgjasfjapddddddddddaaaaaaaa |
||||
ddddddddddsfpgjsdoadjgfpajdspfgjasfjapddddddddddaaaaaaaa |
||||
ddddddddddsfpgjsdoadjgfpajdspfgjasfjapddddddddddaaaaaaaa |
||||
ddddddddddsfpgjsdoadjgfpajdspfgjasfjapddddddddddaaaaaaaa |
||||
ddddddddddsfpgjsdoadjgfpajdspfgjasfjapddddddddddaaaaaaaa2 |
||||
ddddddddddsfpgjsdoadjgfpajdspfgjasfjapddddddddddaaaaaaaa12 |
||||
ddddddddddsfpgjsdoadjgfpajdspfgjasfjapddddddddddaaaaaaaa1 |
||||
ddddddddddsfpgjsdoadjgfpajdspfgjasfjapddddddddddaaaaaaaa121 |
||||
ddddddddddsfpgjsdoadjgfpajdspfgjasfjapddddddddddaaaaaaaa |
||||
ddddddddddsfpgjsdoadjgfpajdspfgjasfjapddddddddddaaaaaaaa |
||||
ddddddddddsfpgjsdoadjgfpajdspfgjasfjapddddddddddaaaaaaaa |
||||
ddddddddddsfpgjsdoadjgfpajdspfgjasfjapddddddddddaaaaaaaa |
||||
ddddddddddsfpgjsdoadjgfpajdspfgjasfjapddddddddddaaaaaaaa |
||||
ddddddddddsfpgjsdoadjgfpajdspfgjasfjapddddddddddaaaaaaaa324 |
||||
ddddddddddsfpgjsdoadjgfpajdspfgjasfjapddddddddddaaaaaaaa145 |
||||
` |
||||
|
||||
func TestEncodeDecode(t *testing.T) { |
||||
for _, tcase := range []struct { |
||||
name string |
||||
|
||||
src string |
||||
types []Type |
||||
encBuf EncodeBuffer |
||||
decBuf DecodeBuffer |
||||
expectCompression bool |
||||
expectEncErr error |
||||
}{ |
||||
{ |
||||
name: "empty src; no buffers", |
||||
types: Types(), |
||||
src: "", |
||||
expectCompression: false, |
||||
}, |
||||
{ |
||||
name: "empty src; sync buffers", |
||||
types: Types(), |
||||
encBuf: NewSyncEncodeBuffer(), decBuf: NewSyncDecodeBuffer(), |
||||
src: "", |
||||
expectCompression: false, |
||||
}, |
||||
{ |
||||
name: "empty src; concurrent buffers", |
||||
types: Types(), |
||||
encBuf: NewConcurrentEncodeBuffer(), decBuf: NewConcurrentDecodeBuffer(), |
||||
src: "", |
||||
expectCompression: false, |
||||
}, |
||||
{ |
||||
name: "no buffers", |
||||
types: []Type{None}, |
||||
src: compressible, |
||||
expectCompression: false, |
||||
}, |
||||
{ |
||||
name: "no buffers", |
||||
types: []Type{Snappy}, |
||||
src: compressible, |
||||
expectCompression: true, |
||||
}, |
||||
{ |
||||
name: "no buffers", |
||||
types: []Type{Zstd}, |
||||
src: compressible, |
||||
expectEncErr: errors.New("zstd requested but EncodeBuffer was not provided"), |
||||
}, |
||||
{ |
||||
name: "sync buffers", |
||||
types: []Type{None}, |
||||
encBuf: NewSyncEncodeBuffer(), decBuf: NewSyncDecodeBuffer(), |
||||
src: compressible, |
||||
expectCompression: false, |
||||
}, |
||||
{ |
||||
name: "sync buffers", |
||||
types: Types()[1:], // All but none
|
||||
encBuf: NewSyncEncodeBuffer(), decBuf: NewSyncDecodeBuffer(), |
||||
src: compressible, |
||||
expectCompression: true, |
||||
}, |
||||
{ |
||||
name: "concurrent buffers", |
||||
types: []Type{None}, |
||||
encBuf: NewConcurrentEncodeBuffer(), decBuf: NewConcurrentDecodeBuffer(), |
||||
src: compressible, |
||||
expectCompression: false, |
||||
}, |
||||
{ |
||||
name: "concurrent buffers", |
||||
types: Types()[1:], // All but none
|
||||
encBuf: NewConcurrentEncodeBuffer(), decBuf: NewConcurrentDecodeBuffer(), |
||||
src: compressible, |
||||
expectCompression: true, |
||||
}, |
||||
} { |
||||
require.NotEmpty(t, tcase.types, "must specify at least one type") |
||||
for _, typ := range tcase.types { |
||||
t.Run(fmt.Sprintf("case=%v/type=%v", tcase.name, typ), func(t *testing.T) { |
||||
res, err := Encode(typ, []byte(tcase.src), tcase.encBuf) |
||||
if tcase.expectEncErr != nil { |
||||
require.ErrorContains(t, err, tcase.expectEncErr.Error()) |
||||
return |
||||
} |
||||
require.NoError(t, err) |
||||
if tcase.expectCompression { |
||||
require.Less(t, len(res), len(tcase.src)) |
||||
} |
||||
|
||||
// Decode back.
|
||||
got, err := Decode(typ, res, tcase.decBuf) |
||||
require.NoError(t, err) |
||||
require.Equal(t, tcase.src, string(got)) |
||||
}) |
||||
} |
||||
} |
||||
} |
||||
|
||||
/* |
||||
export bench=encode-v1 && go test ./util/compression/... \
|
||||
-run '^$' -bench '^BenchmarkEncode' \
|
||||
-benchtime 5s -count 6 -cpu 2 -timeout 999m \
|
||||
| tee ${bench}.txt |
||||
*/ |
||||
func BenchmarkEncode(b *testing.B) { |
||||
for _, typ := range Types() { |
||||
b.Run(fmt.Sprintf("type=%v", typ), func(b *testing.B) { |
||||
var buf EncodeBuffer |
||||
compressible := []byte(compressible) |
||||
|
||||
b.ReportAllocs() |
||||
b.ResetTimer() |
||||
for i := 0; i < b.N; i++ { |
||||
if buf == nil { |
||||
buf = NewSyncEncodeBuffer() |
||||
} |
||||
res, err := Encode(typ, compressible, buf) |
||||
require.NoError(b, err) |
||||
b.ReportMetric(float64(len(res)), "B") |
||||
} |
||||
}) |
||||
} |
||||
} |
||||
|
||||
/* |
||||
export bench=decode-v1 && go test ./util/compression/... \
|
||||
-run '^$' -bench '^BenchmarkDecode' \
|
||||
-benchtime 5s -count 6 -cpu 2 -timeout 999m \
|
||||
| tee ${bench}.txt |
||||
*/ |
||||
func BenchmarkDecode(b *testing.B) { |
||||
for _, typ := range Types() { |
||||
b.Run(fmt.Sprintf("type=%v", typ), func(b *testing.B) { |
||||
var buf DecodeBuffer |
||||
res, err := Encode(typ, []byte(compressible), NewConcurrentEncodeBuffer()) |
||||
require.NoError(b, err) |
||||
|
||||
b.ReportAllocs() |
||||
b.ResetTimer() |
||||
for i := 0; i < b.N; i++ { |
||||
if buf == nil { |
||||
buf = NewSyncDecodeBuffer() |
||||
} |
||||
_, err := Decode(typ, res, buf) |
||||
require.NoError(b, err) |
||||
} |
||||
}) |
||||
} |
||||
} |
Loading…
Reference in new issue