mirror of https://github.com/grafana/loki
Storage memory improvement (#713)
* add benchmark for storage queries * improve iterator to load only on next * fix memory retained by lazy chunks * reverse backward lazy iteratorpull/768/head
parent
8b06eb66bf
commit
3346ce1b40
@ -0,0 +1,114 @@ |
||||
package chunkenc |
||||
|
||||
import ( |
||||
"bufio" |
||||
"bytes" |
||||
"compress/gzip" |
||||
|
||||
"io" |
||||
"sync" |
||||
) |
||||
|
||||
// CompressionPool is a pool of CompressionWriter and CompressionReader
|
||||
// This is used by every chunk to avoid unnecessary allocations.
|
||||
type CompressionPool interface { |
||||
GetWriter(io.Writer) CompressionWriter |
||||
PutWriter(CompressionWriter) |
||||
GetReader(io.Reader) CompressionReader |
||||
PutReader(CompressionReader) |
||||
} |
||||
|
||||
var ( |
||||
// Gzip is the gun zip compression pool
|
||||
Gzip GzipPool |
||||
// BufReaderPool is bufio.Reader pool
|
||||
BufReaderPool = &BufioReaderPool{ |
||||
pool: sync.Pool{ |
||||
New: func() interface{} { return bufio.NewReader(nil) }, |
||||
}, |
||||
} |
||||
// BytesBufferPool is a bytes buffer used for lines decompressed.
|
||||
BytesBufferPool = newBufferPoolWithSize(4096) |
||||
) |
||||
|
||||
// GzipPool is a gun zip compression pool
|
||||
type GzipPool struct { |
||||
readers sync.Pool |
||||
writers sync.Pool |
||||
} |
||||
|
||||
// GetReader gets or creates a new CompressionReader and reset it to read from src
|
||||
func (pool *GzipPool) GetReader(src io.Reader) (reader CompressionReader) { |
||||
if r := pool.readers.Get(); r != nil { |
||||
reader = r.(CompressionReader) |
||||
err := reader.Reset(src) |
||||
if err != nil { |
||||
panic(err) |
||||
} |
||||
} else { |
||||
var err error |
||||
reader, err = gzip.NewReader(src) |
||||
if err != nil { |
||||
panic(err) |
||||
} |
||||
} |
||||
return reader |
||||
} |
||||
|
||||
// PutReader places back in the pool a CompressionReader
|
||||
func (pool *GzipPool) PutReader(reader CompressionReader) { |
||||
pool.readers.Put(reader) |
||||
} |
||||
|
||||
// GetWriter gets or creates a new CompressionWriter and reset it to write to dst
|
||||
func (pool *GzipPool) GetWriter(dst io.Writer) (writer CompressionWriter) { |
||||
if w := pool.writers.Get(); w != nil { |
||||
writer = w.(CompressionWriter) |
||||
writer.Reset(dst) |
||||
} else { |
||||
writer = gzip.NewWriter(dst) |
||||
} |
||||
return writer |
||||
} |
||||
|
||||
// PutWriter places back in the pool a CompressionWriter
|
||||
func (pool *GzipPool) PutWriter(writer CompressionWriter) { |
||||
pool.writers.Put(writer) |
||||
} |
||||
|
||||
// BufioReaderPool is a bufio reader that uses sync.Pool.
|
||||
type BufioReaderPool struct { |
||||
pool sync.Pool |
||||
} |
||||
|
||||
// Get returns a bufio.Reader which reads from r. The buffer size is that of the pool.
|
||||
func (bufPool *BufioReaderPool) Get(r io.Reader) *bufio.Reader { |
||||
buf := bufPool.pool.Get().(*bufio.Reader) |
||||
buf.Reset(r) |
||||
return buf |
||||
} |
||||
|
||||
// Put puts the bufio.Reader back into the pool.
|
||||
func (bufPool *BufioReaderPool) Put(b *bufio.Reader) { |
||||
bufPool.pool.Put(b) |
||||
} |
||||
|
||||
type bufferPool struct { |
||||
pool sync.Pool |
||||
} |
||||
|
||||
func newBufferPoolWithSize(size int) *bufferPool { |
||||
return &bufferPool{ |
||||
pool: sync.Pool{ |
||||
New: func() interface{} { return bytes.NewBuffer(make([]byte, size)) }, |
||||
}, |
||||
} |
||||
} |
||||
|
||||
func (bp *bufferPool) Get() *bytes.Buffer { |
||||
return bp.pool.Get().(*bytes.Buffer) |
||||
} |
||||
|
||||
func (bp *bufferPool) Put(b *bytes.Buffer) { |
||||
bp.pool.Put(b) |
||||
} |
||||
@ -0,0 +1,142 @@ |
||||
package main |
||||
|
||||
import ( |
||||
"context" |
||||
"fmt" |
||||
"log" |
||||
"math/rand" |
||||
"os" |
||||
"sync" |
||||
"time" |
||||
|
||||
"github.com/cortexproject/cortex/pkg/chunk" |
||||
"github.com/cortexproject/cortex/pkg/chunk/local" |
||||
"github.com/cortexproject/cortex/pkg/chunk/storage" |
||||
"github.com/cortexproject/cortex/pkg/ingester/client" |
||||
"github.com/cortexproject/cortex/pkg/util/validation" |
||||
"github.com/grafana/loki/pkg/chunkenc" |
||||
"github.com/grafana/loki/pkg/logproto" |
||||
lstore "github.com/grafana/loki/pkg/storage" |
||||
"github.com/grafana/loki/pkg/util" |
||||
"github.com/prometheus/common/model" |
||||
"github.com/prometheus/prometheus/pkg/labels" |
||||
"github.com/weaveworks/common/user" |
||||
) |
||||
|
||||
var ( |
||||
start = model.Time(1523750400000) |
||||
ctx = user.InjectOrgID(context.Background(), "fake") |
||||
maxChunks = 600 // 600 chunks is 1.2bib of data enough to run benchmark
|
||||
) |
||||
|
||||
// fill up the local filesystem store with 1gib of data to run benchmark
|
||||
func main() { |
||||
if _, err := os.Stat("/tmp/benchmark/chunks"); os.IsNotExist(err) { |
||||
if err := fillStore(); err != nil { |
||||
log.Fatal("error filling up storage:", err) |
||||
} |
||||
} |
||||
} |
||||
|
||||
func getStore() (lstore.Store, error) { |
||||
store, err := lstore.NewStore( |
||||
storage.Config{ |
||||
BoltDBConfig: local.BoltDBConfig{Directory: "/tmp/benchmark/index"}, |
||||
FSConfig: local.FSConfig{Directory: "/tmp/benchmark/chunks"}, |
||||
}, |
||||
chunk.StoreConfig{}, |
||||
chunk.SchemaConfig{ |
||||
Configs: []chunk.PeriodConfig{ |
||||
{ |
||||
From: chunk.DayTime{Time: start}, |
||||
IndexType: "boltdb", |
||||
ObjectType: "filesystem", |
||||
Schema: "v9", |
||||
IndexTables: chunk.PeriodicTableConfig{ |
||||
Prefix: "index_", |
||||
Period: time.Hour * 168, |
||||
}, |
||||
}, |
||||
}, |
||||
}, |
||||
&validation.Overrides{}, |
||||
) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
return store, nil |
||||
} |
||||
|
||||
func fillStore() error { |
||||
|
||||
store, err := getStore() |
||||
if err != nil { |
||||
return err |
||||
} |
||||
defer store.Stop() |
||||
|
||||
var wgPush sync.WaitGroup |
||||
var flushCount int |
||||
// insert 5 streams with a random logs every nanoseconds
|
||||
// the string is randomize so chunks are big ~2mb
|
||||
// take ~1min to build 1gib of data
|
||||
for i := 0; i < 5; i++ { |
||||
wgPush.Add(1) |
||||
go func(j int) { |
||||
defer wgPush.Done() |
||||
lbs, err := util.ToClientLabels(fmt.Sprintf("{foo=\"bar\",level=\"%d\"}", j)) |
||||
if err != nil { |
||||
panic(err) |
||||
} |
||||
labelsBuilder := labels.NewBuilder(client.FromLabelAdaptersToLabels(lbs)) |
||||
labelsBuilder.Set(labels.MetricName, "logs") |
||||
metric := labelsBuilder.Labels() |
||||
fp := client.FastFingerprint(lbs) |
||||
chunkEnc := chunkenc.NewMemChunkSize(chunkenc.EncGZIP, 262144) |
||||
for ts := start.UnixNano(); ts < start.UnixNano()+time.Hour.Nanoseconds(); ts = ts + time.Millisecond.Nanoseconds() { |
||||
entry := &logproto.Entry{ |
||||
Timestamp: time.Unix(0, ts), |
||||
Line: randString(250), |
||||
} |
||||
if chunkEnc.SpaceFor(entry) { |
||||
_ = chunkEnc.Append(entry) |
||||
} else { |
||||
from, to := chunkEnc.Bounds() |
||||
c := chunk.NewChunk("fake", fp, metric, chunkenc.NewFacade(chunkEnc), model.TimeFromUnixNano(from.UnixNano()), model.TimeFromUnixNano(to.UnixNano())) |
||||
if err := c.Encode(); err != nil { |
||||
panic(err) |
||||
} |
||||
err := store.Put(ctx, []chunk.Chunk{c}) |
||||
if err != nil { |
||||
panic(err) |
||||
} |
||||
flushCount++ |
||||
log.Println("flushed ", flushCount, from.UnixNano(), to.UnixNano(), metric) |
||||
if flushCount >= maxChunks { |
||||
return |
||||
} |
||||
chunkEnc = chunkenc.NewMemChunkSize(chunkenc.EncGZIP, 262144) |
||||
} |
||||
} |
||||
|
||||
}(i) |
||||
|
||||
} |
||||
wgPush.Wait() |
||||
return nil |
||||
} |
||||
|
||||
const charset = "abcdefghijklmnopqrstuvwxyz" + |
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" |
||||
|
||||
func randStringWithCharset(length int, charset string) string { |
||||
b := make([]byte, length) |
||||
for i := range b { |
||||
b[i] = charset[rand.Intn(len(charset)-1)] |
||||
} |
||||
return string(b) |
||||
} |
||||
|
||||
func randString(length int) string { |
||||
return randStringWithCharset(length, charset) |
||||
} |
||||
@ -0,0 +1,163 @@ |
||||
package storage |
||||
|
||||
import ( |
||||
"context" |
||||
"log" |
||||
"runtime" |
||||
"testing" |
||||
"time" |
||||
|
||||
"net/http" |
||||
_ "net/http/pprof" |
||||
|
||||
"github.com/cortexproject/cortex/pkg/chunk" |
||||
"github.com/cortexproject/cortex/pkg/chunk/local" |
||||
"github.com/cortexproject/cortex/pkg/chunk/storage" |
||||
"github.com/cortexproject/cortex/pkg/util/validation" |
||||
"github.com/grafana/loki/pkg/logproto" |
||||
"github.com/prometheus/common/model" |
||||
"github.com/weaveworks/common/user" |
||||
) |
||||
|
||||
var ( |
||||
start = model.Time(1523750400000) |
||||
m runtime.MemStats |
||||
ctx = user.InjectOrgID(context.Background(), "fake") |
||||
chunkStore = getStore() |
||||
) |
||||
|
||||
//go test -bench=. -benchmem -memprofile memprofile.out -cpuprofile profile.out
|
||||
func Benchmark_store_LazyQueryRegexBackward(b *testing.B) { |
||||
benchmarkStoreQuery(b, &logproto.QueryRequest{ |
||||
Query: "{foo=\"bar\"}", |
||||
Regex: "fuzz", |
||||
Limit: 1000, |
||||
Start: time.Unix(0, start.UnixNano()), |
||||
End: time.Unix(0, (24*time.Hour.Nanoseconds())+start.UnixNano()), |
||||
Direction: logproto.BACKWARD, |
||||
}) |
||||
} |
||||
|
||||
func Benchmark_store_LazyQueryLogQLBackward(b *testing.B) { |
||||
benchmarkStoreQuery(b, &logproto.QueryRequest{ |
||||
Query: "{foo=\"bar\"} |= \"test\" != \"toto\"", |
||||
Regex: "fuzz", |
||||
Limit: 1000, |
||||
Start: time.Unix(0, start.UnixNano()), |
||||
End: time.Unix(0, (24*time.Hour.Nanoseconds())+start.UnixNano()), |
||||
Direction: logproto.BACKWARD, |
||||
}) |
||||
} |
||||
|
||||
func Benchmark_store_LazyQueryRegexForward(b *testing.B) { |
||||
benchmarkStoreQuery(b, &logproto.QueryRequest{ |
||||
Query: "{foo=\"bar\"}", |
||||
Regex: "fuzz", |
||||
Limit: 1000, |
||||
Start: time.Unix(0, start.UnixNano()), |
||||
End: time.Unix(0, (24*time.Hour.Nanoseconds())+start.UnixNano()), |
||||
Direction: logproto.FORWARD, |
||||
}) |
||||
} |
||||
|
||||
func Benchmark_store_LazyQueryForward(b *testing.B) { |
||||
benchmarkStoreQuery(b, &logproto.QueryRequest{ |
||||
Query: "{foo=\"bar\"}", |
||||
Limit: 1000, |
||||
Start: time.Unix(0, start.UnixNano()), |
||||
End: time.Unix(0, (24*time.Hour.Nanoseconds())+start.UnixNano()), |
||||
Direction: logproto.FORWARD, |
||||
}) |
||||
} |
||||
|
||||
func Benchmark_store_LazyQueryBackward(b *testing.B) { |
||||
benchmarkStoreQuery(b, &logproto.QueryRequest{ |
||||
Query: "{foo=\"bar\"}", |
||||
Limit: 1000, |
||||
Start: time.Unix(0, start.UnixNano()), |
||||
End: time.Unix(0, (24*time.Hour.Nanoseconds())+start.UnixNano()), |
||||
Direction: logproto.BACKWARD, |
||||
}) |
||||
} |
||||
|
||||
func benchmarkStoreQuery(b *testing.B, query *logproto.QueryRequest) { |
||||
b.ReportAllocs() |
||||
// force to run gc 10x more often this can be useful to detect fast allocation vs leak.
|
||||
//debug.SetGCPercent(10)
|
||||
stop := make(chan struct{}) |
||||
go func() { |
||||
_ = http.ListenAndServe(":6060", http.DefaultServeMux) |
||||
}() |
||||
go func() { |
||||
ticker := time.NewTicker(time.Millisecond) |
||||
for { |
||||
select { |
||||
case <-ticker.C: |
||||
// print and capture the max in use heap size
|
||||
printHeap(b, false) |
||||
case <-stop: |
||||
ticker.Stop() |
||||
return |
||||
} |
||||
} |
||||
}() |
||||
for i := 0; i < b.N; i++ { |
||||
iter, err := chunkStore.LazyQuery(ctx, query) |
||||
if err != nil { |
||||
b.Fatal(err) |
||||
} |
||||
res := []logproto.Entry{} |
||||
printHeap(b, false) |
||||
j := uint32(0) |
||||
for iter.Next() { |
||||
j++ |
||||
printHeap(b, false) |
||||
res = append(res, iter.Entry()) |
||||
// limit result like the querier would do.
|
||||
if j == query.Limit { |
||||
break |
||||
} |
||||
} |
||||
iter.Close() |
||||
printHeap(b, true) |
||||
log.Println("line fetched", len(res)) |
||||
} |
||||
close(stop) |
||||
} |
||||
|
||||
var maxHeapInuse uint64 |
||||
|
||||
func printHeap(b *testing.B, show bool) { |
||||
runtime.ReadMemStats(&m) |
||||
if m.HeapInuse > maxHeapInuse { |
||||
maxHeapInuse = m.HeapInuse |
||||
} |
||||
if show { |
||||
log.Printf("Benchmark %d maxHeapInuse: %d Mbytes\n", b.N, maxHeapInuse/1024/1024) |
||||
log.Printf("Benchmark %d currentHeapInuse: %d Mbytes\n", b.N, m.HeapInuse/1024/1024) |
||||
} |
||||
} |
||||
|
||||
func getStore() Store { |
||||
store, err := NewStore(storage.Config{ |
||||
BoltDBConfig: local.BoltDBConfig{Directory: "/tmp/benchmark/index"}, |
||||
FSConfig: local.FSConfig{Directory: "/tmp/benchmark/chunks"}, |
||||
}, chunk.StoreConfig{}, chunk.SchemaConfig{ |
||||
Configs: []chunk.PeriodConfig{ |
||||
{ |
||||
From: chunk.DayTime{Time: start}, |
||||
IndexType: "boltdb", |
||||
ObjectType: "filesystem", |
||||
Schema: "v9", |
||||
IndexTables: chunk.PeriodicTableConfig{ |
||||
Prefix: "index_", |
||||
Period: time.Hour * 168, |
||||
}, |
||||
}, |
||||
}, |
||||
}, &validation.Overrides{}) |
||||
if err != nil { |
||||
panic(err) |
||||
} |
||||
return store |
||||
} |
||||
Loading…
Reference in new issue