chunks: improve readability of compression benchmarks (#7246)

**What this PR does / why we need it**: ### BenchmarkWrite Use Go runtime facilities to report MB/s for each operation and compression ratio. Before: ``` BenchmarkWrite/ordered-none-4 1018 1104885 ns/op 1431962 B/op 33 allocs/op BenchmarkWrite/ordered-gzip-4 12 87333201 ns/op 3516699 B/op 696 allocs/op BenchmarkWrite/ordered-lz4-64k-4 51 21483048 ns/op 3095117 B/op 649 allocs/op ``` After: ``` BenchmarkWrite/ordered-none-4 841 1202007 ns/op 1054353.53 MB/s 101.9 %compressed 1569953 B/op 34 allocs/op BenchmarkWrite/ordered-gzip-4 13 86299699 ns/op 3357.25 MB/s 6.891 %compressed 3778896 B/op 702 allocs/op BenchmarkWrite/ordered-lz4-64k-4 49 22332214 ns/op 34107.61 MB/s 9.880 %compressed 3407522 B/op 661 allocs/op ``` Stop collecting all compressed chunks, as this blows up the memory of the benchmark and creates unrealistic test conditions for the later encoding. Before: `4724260 maxresident KB`; after: `375804 maxresident KB` ### BenchmarkRead Replace reporting of MB/s which gave several values for each encoding as Go hunted for the right number of benchmark iterations. Add stats to the context for decoding, otherwise a new stats object is created each time round the loop. Re-order so all unsampled results come before all sampled results. Before: ``` BenchmarkRead/none_66_kB-4 278 4555157 ns/op 131744 B/op 717 allocs/op BenchmarkRead/sample_none_66_kB-4 142 8238861 ns/op 129771 B/op 717 allocs/op BenchmarkRead/gzip_66_kB-4 6 179445187 ns/op 2100498 B/op 15010 allocs/op BenchmarkRead/sample_gzip_66_kB-4 4 261040057 ns/op 2076030 B/op 15028 allocs/op BenchmarkRead/lz4-64k_66_kB-4 19 62855240 ns/op 1722979 B/op 14219 allocs/op BenchmarkRead/sample_lz4-64k_66_kB-4 10 115577796 ns/op 1706040 B/op 14220 allocs/op BenchmarkRead/lz4-256k_66_kB-4 18 66144317 ns/op 1880851 B/op 15078 allocs/op BenchmarkRead/sample_lz4-256k_66_kB-4 9 118405993 ns/op 1800396 B/op 15077 allocs/op BenchmarkRead/lz4-1M_66_kB-4 18 67832189 ns/op 1821806 B/op 15076 allocs/op ... none_524 kB: 1828.92 MB/s none_524 kB: 1809.33 MB/s none_262 kB: 1790.79 MB/s none_66 kB: 1673.84 MB/s ... gzip_66 kB: 647.35 MB/s ... ``` After: ``` BenchmarkRead/none_66_kB-4 286 4377585 ns/op 1725.23 MB/s 41896 B/op 600 allocs/op BenchmarkRead/gzip_66_kB-4 6 180770638 ns/op 655.82 MB/s 723426 B/op 13265 allocs/op BenchmarkRead/lz4-64k_66_kB-4 19 65380524 ns/op 1296.10 MB/s 727571 B/op 12925 allocs/op BenchmarkRead/lz4-256k_66_kB-4 16 63737292 ns/op 1409.75 MB/s 786994 B/op 13707 allocs/op BenchmarkRead/lz4-1M_66_kB-4 18 65963825 ns/op 1362.17 MB/s 770441 B/op 13707 allocs/op BenchmarkRead/lz4_66_kB-4 18 64955415 ns/op 1383.31 MB/s 1003277 B/op 13707 allocs/op BenchmarkRead/snappy_66_kB-4 20 51588959 ns/op 1273.38 MB/s 361872 B/op 5020 allocs/op BenchmarkRead/flate_66_kB-4 6 172147502 ns/op 691.74 MB/s 715706 B/op 13242 allocs/op BenchmarkRead/zstd_66_kB-4 3 359473273 ns/op 390.07 MB/s 427247248 B/op 18551 allocs/op ... BenchmarkRead/sample_none_66_kB-4 135 8533283 ns/op 885.04 MB/s 39866 B/op 600 allocs/op BenchmarkRead/sample_gzip_66_kB-4 5 243016949 ns/op 487.84 MB/s 686358 B/op 13212 allocs/op BenchmarkRead/sample_lz4-64k_66_kB-4 10 114330032 ns/op 741.18 MB/s 707104 B/op 12926 allocs/op BenchmarkRead/sample_lz4-256k_66_kB-4 9 117294928 ns/op 766.05 MB/s 778461 B/op 13709 allocs/op ... ``` Note "MB" in previous code was 2^^20 while Go uses 10^6. **Checklist** - [x] Reviewed the `CONTRIBUTING.md` guide - NA Documentation added - [x] Tests updated - NA `CHANGELOG.md` updated - not user-facing - NA Changes that require user attention or interaction to upgrade are documented in `docs/sources/upgrading/_index.md`
3 years ago · 74d206bf28
parent fb26baa5b1
commit 74d206bf28
1 changed files with 16 additions and 33 deletions
--- a/pkg/chunkenc/memchunk_test.go
+++ b/pkg/chunkenc/memchunk_test.go
@ -675,11 +675,7 @@ func TestIteratorClose(t *testing.T) {
 	}
 }

-var result []Chunk
-
 func BenchmarkWrite(b *testing.B) {
-	chunks := []Chunk{}
-
 	entry := &logproto.Entry{
 		Timestamp: time.Unix(0, 0),
 		Line:      testdata.LogString(0),
@ -689,6 +685,7 @@ func BenchmarkWrite(b *testing.B) {
 	for _, f := range HeadBlockFmts {
 		for _, enc := range testEncoding {
 			b.Run(fmt.Sprintf("%v-%v", f, enc), func(b *testing.B) {
+				uncompressedBytes, compressedBytes := 0, 0
 				for n := 0; n < b.N; n++ {
 					c := NewMemChunk(enc, f, testBlockSize, testTargetSize)
 					// adds until full so we trigger cut which serialize using gzip
@ -698,9 +695,11 @@ func BenchmarkWrite(b *testing.B) {
 						entry.Line = testdata.LogString(i)
 						i++
 					}
-					chunks = append(chunks, c)
+					uncompressedBytes += c.UncompressedSize()
+					compressedBytes += c.CompressedSize()
 				}
-				result = chunks
+				b.SetBytes(int64(uncompressedBytes) / int64(b.N))
+				b.ReportMetric(float64(compressedBytes)/float64(uncompressedBytes)*100, "%compressed")
 			})
 		}
 	}
@ -717,23 +716,17 @@ func (nomatchPipeline) ProcessString(_ int64, line string) (string, log.LabelsRe
 }

 func BenchmarkRead(b *testing.B) {
-	type res struct {
-		name  string
-		speed float64
-	}
-	result := []res{}
 	for _, bs := range testBlockSizes {
 		for _, enc := range testEncoding {
 			name := fmt.Sprintf("%s_%s", enc.String(), humanize.Bytes(uint64(bs)))
 			b.Run(name, func(b *testing.B) {
 				chunks, size := generateData(enc, 5, bs, testTargetSize)
+				_, ctx := stats.NewContext(context.Background())
 				b.ResetTimer()
-				bytesRead := uint64(0)
-				now := time.Now()
 				for n := 0; n < b.N; n++ {
 					for _, c := range chunks {
 						// use forward iterator for benchmark -- backward iterator does extra allocations by keeping entries in memory
-						iterator, err := c.Iterator(context.Background(), time.Unix(0, 0), time.Now(), logproto.FORWARD, nomatchPipeline{})
+						iterator, err := c.Iterator(ctx, time.Unix(0, 0), time.Now(), logproto.FORWARD, nomatchPipeline{})
 						if err != nil {
 							panic(err)
 						}
@ -744,24 +737,23 @@ func BenchmarkRead(b *testing.B) {
 							b.Fatal(err)
 						}
 					}
-					bytesRead += size
 				}
-				result = append(result, res{
-					name:  name,
-					speed: float64(bytesRead) / time.Since(now).Seconds(),
-				})
+				b.SetBytes(int64(size))
 			})
+		}
+	}

-			name = fmt.Sprintf("sample_%s_%s", enc.String(), humanize.Bytes(uint64(bs)))
-
+	for _, bs := range testBlockSizes {
+		for _, enc := range testEncoding {
+			name := fmt.Sprintf("sample_%s_%s", enc.String(), humanize.Bytes(uint64(bs)))
 			b.Run(name, func(b *testing.B) {
 				chunks, size := generateData(enc, 5, bs, testTargetSize)
+				_, ctx := stats.NewContext(context.Background())
 				b.ResetTimer()
 				bytesRead := uint64(0)
-				now := time.Now()
 				for n := 0; n < b.N; n++ {
 					for _, c := range chunks {
-						iterator := c.SampleIterator(context.Background(), time.Unix(0, 0), time.Now(), countExtractor)
+						iterator := c.SampleIterator(ctx, time.Unix(0, 0), time.Now(), countExtractor)
 						for iterator.Next() {
 							_ = iterator.Sample()
 						}
@ -771,19 +763,10 @@ func BenchmarkRead(b *testing.B) {
 					}
 					bytesRead += size
 				}
-				result = append(result, res{
-					name:  name,
-					speed: float64(bytesRead) / time.Since(now).Seconds(),
-				})
+				b.SetBytes(int64(bytesRead) / int64(b.N))
 			})
 		}
 	}
-	sort.Slice(result, func(i, j int) bool {
-		return result[i].speed > result[j].speed
-	})
-	for _, r := range result {
-		fmt.Printf("%s: %.2f MB/s\n", r.name, r.speed/1024/1024)
-	}
 }

 func BenchmarkBackwardIterator(b *testing.B) {