chunks: improve readability of compression benchmarks (#7246)

**What this PR does / why we need it**:

### BenchmarkWrite

Use Go runtime facilities to report MB/s for each operation and
compression ratio.

Before:
```
BenchmarkWrite/ordered-none-4               1018           1104885 ns/op         1431962 B/op         33 allocs/op
BenchmarkWrite/ordered-gzip-4                 12          87333201 ns/op         3516699 B/op        696 allocs/op
BenchmarkWrite/ordered-lz4-64k-4              51          21483048 ns/op         3095117 B/op        649 allocs/op
```

After:
```
BenchmarkWrite/ordered-none-4                841           1202007 ns/op        1054353.53 MB/s        101.9 %compressed         1569953 B/op         34 allocs/op
BenchmarkWrite/ordered-gzip-4                 13          86299699 ns/op        3357.25 MB/s             6.891 %compressed       3778896 B/op        702 allocs/op
BenchmarkWrite/ordered-lz4-64k-4              49          22332214 ns/op        34107.61 MB/s            9.880 %compressed       3407522 B/op        661 allocs/op
```

Stop collecting all compressed chunks, as this blows up the memory of
the benchmark and creates unrealistic test conditions for the later
encoding.

Before: `4724260 maxresident KB`; after: `375804 maxresident KB`

### BenchmarkRead

Replace reporting of MB/s which gave several values for each encoding as
Go hunted for the right number of benchmark iterations.

Add stats to the context for decoding, otherwise a new stats object is
created each time round the loop.

Re-order so all unsampled results come before all sampled results.

Before:
```
BenchmarkRead/none_66_kB-4                   278           4555157 ns/op          131744 B/op        717 allocs/op
BenchmarkRead/sample_none_66_kB-4            142           8238861 ns/op          129771 B/op        717 allocs/op
BenchmarkRead/gzip_66_kB-4                     6         179445187 ns/op         2100498 B/op      15010 allocs/op
BenchmarkRead/sample_gzip_66_kB-4              4         261040057 ns/op         2076030 B/op      15028 allocs/op
BenchmarkRead/lz4-64k_66_kB-4                 19          62855240 ns/op         1722979 B/op      14219 allocs/op
BenchmarkRead/sample_lz4-64k_66_kB-4                  10         115577796 ns/op         1706040 B/op      14220 allocs/op
BenchmarkRead/lz4-256k_66_kB-4                        18          66144317 ns/op         1880851 B/op      15078 allocs/op
BenchmarkRead/sample_lz4-256k_66_kB-4                  9         118405993 ns/op         1800396 B/op      15077 allocs/op
BenchmarkRead/lz4-1M_66_kB-4                          18          67832189 ns/op         1821806 B/op      15076 allocs/op
...
none_524 kB: 1828.92 MB/s
none_524 kB: 1809.33 MB/s
none_262 kB: 1790.79 MB/s
none_66 kB: 1673.84 MB/s
...
gzip_66 kB: 647.35 MB/s
...
```

After:
```
BenchmarkRead/none_66_kB-4                   286           4377585 ns/op        1725.23 MB/s       41896 B/op        600 allocs/op
BenchmarkRead/gzip_66_kB-4                     6         180770638 ns/op         655.82 MB/s      723426 B/op      13265 allocs/op
BenchmarkRead/lz4-64k_66_kB-4                 19          65380524 ns/op        1296.10 MB/s      727571 B/op      12925 allocs/op
BenchmarkRead/lz4-256k_66_kB-4                16          63737292 ns/op        1409.75 MB/s      786994 B/op      13707 allocs/op
BenchmarkRead/lz4-1M_66_kB-4                  18          65963825 ns/op        1362.17 MB/s      770441 B/op      13707 allocs/op
BenchmarkRead/lz4_66_kB-4                     18          64955415 ns/op        1383.31 MB/s     1003277 B/op      13707 allocs/op
BenchmarkRead/snappy_66_kB-4                  20          51588959 ns/op        1273.38 MB/s      361872 B/op       5020 allocs/op
BenchmarkRead/flate_66_kB-4                    6         172147502 ns/op         691.74 MB/s      715706 B/op      13242 allocs/op
BenchmarkRead/zstd_66_kB-4                     3         359473273 ns/op         390.07 MB/s    427247248 B/op     18551 allocs/op
...
BenchmarkRead/sample_none_66_kB-4            135           8533283 ns/op         885.04 MB/s       39866 B/op        600 allocs/op
BenchmarkRead/sample_gzip_66_kB-4              5         243016949 ns/op         487.84 MB/s      686358 B/op      13212 allocs/op
BenchmarkRead/sample_lz4-64k_66_kB-4                  10         114330032 ns/op         741.18 MB/s      707104 B/op      12926 allocs/op
BenchmarkRead/sample_lz4-256k_66_kB-4                  9         117294928 ns/op         766.05 MB/s      778461 B/op      13709 allocs/op
...
```

Note "MB" in previous code was 2^^20 while Go uses 10^6.

**Checklist**
- [x] Reviewed the `CONTRIBUTING.md` guide
- NA Documentation added
- [x] Tests updated
- NA `CHANGELOG.md` updated - not user-facing
- NA Changes that require user attention or interaction to upgrade are
documented in `docs/sources/upgrading/_index.md`
pull/7388/head
Bryan Boreham 3 years ago committed by GitHub
parent fb26baa5b1
commit 74d206bf28
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 49
      pkg/chunkenc/memchunk_test.go

@ -675,11 +675,7 @@ func TestIteratorClose(t *testing.T) {
}
}
var result []Chunk
func BenchmarkWrite(b *testing.B) {
chunks := []Chunk{}
entry := &logproto.Entry{
Timestamp: time.Unix(0, 0),
Line: testdata.LogString(0),
@ -689,6 +685,7 @@ func BenchmarkWrite(b *testing.B) {
for _, f := range HeadBlockFmts {
for _, enc := range testEncoding {
b.Run(fmt.Sprintf("%v-%v", f, enc), func(b *testing.B) {
uncompressedBytes, compressedBytes := 0, 0
for n := 0; n < b.N; n++ {
c := NewMemChunk(enc, f, testBlockSize, testTargetSize)
// adds until full so we trigger cut which serialize using gzip
@ -698,9 +695,11 @@ func BenchmarkWrite(b *testing.B) {
entry.Line = testdata.LogString(i)
i++
}
chunks = append(chunks, c)
uncompressedBytes += c.UncompressedSize()
compressedBytes += c.CompressedSize()
}
result = chunks
b.SetBytes(int64(uncompressedBytes) / int64(b.N))
b.ReportMetric(float64(compressedBytes)/float64(uncompressedBytes)*100, "%compressed")
})
}
}
@ -717,23 +716,17 @@ func (nomatchPipeline) ProcessString(_ int64, line string) (string, log.LabelsRe
}
func BenchmarkRead(b *testing.B) {
type res struct {
name string
speed float64
}
result := []res{}
for _, bs := range testBlockSizes {
for _, enc := range testEncoding {
name := fmt.Sprintf("%s_%s", enc.String(), humanize.Bytes(uint64(bs)))
b.Run(name, func(b *testing.B) {
chunks, size := generateData(enc, 5, bs, testTargetSize)
_, ctx := stats.NewContext(context.Background())
b.ResetTimer()
bytesRead := uint64(0)
now := time.Now()
for n := 0; n < b.N; n++ {
for _, c := range chunks {
// use forward iterator for benchmark -- backward iterator does extra allocations by keeping entries in memory
iterator, err := c.Iterator(context.Background(), time.Unix(0, 0), time.Now(), logproto.FORWARD, nomatchPipeline{})
iterator, err := c.Iterator(ctx, time.Unix(0, 0), time.Now(), logproto.FORWARD, nomatchPipeline{})
if err != nil {
panic(err)
}
@ -744,24 +737,23 @@ func BenchmarkRead(b *testing.B) {
b.Fatal(err)
}
}
bytesRead += size
}
result = append(result, res{
name: name,
speed: float64(bytesRead) / time.Since(now).Seconds(),
})
b.SetBytes(int64(size))
})
}
}
name = fmt.Sprintf("sample_%s_%s", enc.String(), humanize.Bytes(uint64(bs)))
for _, bs := range testBlockSizes {
for _, enc := range testEncoding {
name := fmt.Sprintf("sample_%s_%s", enc.String(), humanize.Bytes(uint64(bs)))
b.Run(name, func(b *testing.B) {
chunks, size := generateData(enc, 5, bs, testTargetSize)
_, ctx := stats.NewContext(context.Background())
b.ResetTimer()
bytesRead := uint64(0)
now := time.Now()
for n := 0; n < b.N; n++ {
for _, c := range chunks {
iterator := c.SampleIterator(context.Background(), time.Unix(0, 0), time.Now(), countExtractor)
iterator := c.SampleIterator(ctx, time.Unix(0, 0), time.Now(), countExtractor)
for iterator.Next() {
_ = iterator.Sample()
}
@ -771,19 +763,10 @@ func BenchmarkRead(b *testing.B) {
}
bytesRead += size
}
result = append(result, res{
name: name,
speed: float64(bytesRead) / time.Since(now).Seconds(),
})
b.SetBytes(int64(bytesRead) / int64(b.N))
})
}
}
sort.Slice(result, func(i, j int) bool {
return result[i].speed > result[j].speed
})
for _, r := range result {
fmt.Printf("%s: %.2f MB/s\n", r.name, r.speed/1024/1024)
}
}
func BenchmarkBackwardIterator(b *testing.B) {

Loading…
Cancel
Save