mirror of https://github.com/grafana/loki
chore: refactor how distributor checks ingest-limits (#17042)
parent
e90e0c41fb
commit
ffa9656f7f
@ -0,0 +1,154 @@ |
|||||||
|
package distributor |
||||||
|
|
||||||
|
import ( |
||||||
|
"context" |
||||||
|
"encoding/binary" |
||||||
|
"fmt" |
||||||
|
"hash/fnv" |
||||||
|
|
||||||
|
"github.com/grafana/dskit/ring" |
||||||
|
ring_client "github.com/grafana/dskit/ring/client" |
||||||
|
"github.com/prometheus/client_golang/prometheus" |
||||||
|
"github.com/prometheus/client_golang/prometheus/promauto" |
||||||
|
|
||||||
|
limits_frontend_client "github.com/grafana/loki/v3/pkg/limits/frontend/client" |
||||||
|
"github.com/grafana/loki/v3/pkg/logproto" |
||||||
|
) |
||||||
|
|
||||||
|
// ingestLimitsFrontendClient is used for tests.
|
||||||
|
type ingestLimitsFrontendClient interface { |
||||||
|
ExceedsLimits(context.Context, *logproto.ExceedsLimitsRequest) (*logproto.ExceedsLimitsResponse, error) |
||||||
|
} |
||||||
|
|
||||||
|
// ingestLimitsFrontendRingClient uses the ring to query ingest-limits frontends.
|
||||||
|
type ingestLimitsFrontendRingClient struct { |
||||||
|
ring ring.ReadRing |
||||||
|
pool *ring_client.Pool |
||||||
|
} |
||||||
|
|
||||||
|
func newIngestLimitsFrontendRingClient(ring ring.ReadRing, pool *ring_client.Pool) *ingestLimitsFrontendRingClient { |
||||||
|
return &ingestLimitsFrontendRingClient{ |
||||||
|
ring: ring, |
||||||
|
pool: pool, |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Implements the ingestLimitsFrontendClient interface.
|
||||||
|
func (c *ingestLimitsFrontendRingClient) ExceedsLimits(ctx context.Context, req *logproto.ExceedsLimitsRequest) (*logproto.ExceedsLimitsResponse, error) { |
||||||
|
// We use an FNV-1 of all stream hashes in the request to load balance requests
|
||||||
|
// to limits-frontends instances.
|
||||||
|
h := fnv.New32() |
||||||
|
for _, stream := range req.Streams { |
||||||
|
// Add the stream hash to FNV-1.
|
||||||
|
buf := make([]byte, binary.MaxVarintLen64) |
||||||
|
binary.PutUvarint(buf, stream.StreamHash) |
||||||
|
_, _ = h.Write(buf) |
||||||
|
} |
||||||
|
// Get the limits-frontend instances from the ring.
|
||||||
|
var descs [5]ring.InstanceDesc |
||||||
|
rs, err := c.ring.Get(h.Sum32(), limits_frontend_client.LimitsRead, descs[0:], nil, nil) |
||||||
|
if err != nil { |
||||||
|
return nil, fmt.Errorf("failed to get limits-frontend instances from ring: %w", err) |
||||||
|
} |
||||||
|
var lastErr error |
||||||
|
// Send the request to the limits-frontend to see if it exceeds the tenant
|
||||||
|
// limits. If the RPC fails, failover to the next instance in the ring.
|
||||||
|
for _, instance := range rs.Instances { |
||||||
|
select { |
||||||
|
case <-ctx.Done(): |
||||||
|
return nil, ctx.Err() |
||||||
|
default: |
||||||
|
} |
||||||
|
c, err := c.pool.GetClientFor(instance.Addr) |
||||||
|
if err != nil { |
||||||
|
lastErr = err |
||||||
|
continue |
||||||
|
} |
||||||
|
client := c.(logproto.IngestLimitsFrontendClient) |
||||||
|
resp, err := client.ExceedsLimits(ctx, req) |
||||||
|
if err != nil { |
||||||
|
lastErr = err |
||||||
|
continue |
||||||
|
} |
||||||
|
return resp, nil |
||||||
|
} |
||||||
|
return nil, lastErr |
||||||
|
} |
||||||
|
|
||||||
|
// exceedsIngestLimitsResult contains the reasons a stream exceeds per-tenant
|
||||||
|
// ingest limits.
|
||||||
|
type exceedsIngestLimitsResult struct { |
||||||
|
hash uint64 |
||||||
|
reasons []string |
||||||
|
} |
||||||
|
|
||||||
|
type ingestLimits struct { |
||||||
|
client ingestLimitsFrontendClient |
||||||
|
limitsFailures prometheus.Counter |
||||||
|
} |
||||||
|
|
||||||
|
func newIngestLimits(client ingestLimitsFrontendClient, r prometheus.Registerer) *ingestLimits { |
||||||
|
return &ingestLimits{ |
||||||
|
client: client, |
||||||
|
limitsFailures: promauto.With(r).NewCounter(prometheus.CounterOpts{ |
||||||
|
Name: "loki_distributor_ingest_limits_failures_total", |
||||||
|
Help: "The total number of failures checking ingest limits.", |
||||||
|
}), |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// ExceedsLimits returns true if at least one stream exceeds the per-tenant
|
||||||
|
// limits, otherwise false. It also returns a slice containing the streams
|
||||||
|
// that exceeded the per-tenant limits, and for each stream the reasons it
|
||||||
|
// exceeded the limits. This slice can be nil. An error is returned if the
|
||||||
|
// limits could not be checked.
|
||||||
|
func (l *ingestLimits) ExceedsLimits(ctx context.Context, tenant string, streams []KeyedStream) (bool, []exceedsIngestLimitsResult, error) { |
||||||
|
req, err := newExceedsLimitsRequest(tenant, streams) |
||||||
|
if err != nil { |
||||||
|
return false, nil, err |
||||||
|
} |
||||||
|
resp, err := l.client.ExceedsLimits(ctx, req) |
||||||
|
if err != nil { |
||||||
|
return false, nil, err |
||||||
|
} |
||||||
|
if len(resp.Results) == 0 { |
||||||
|
return false, nil, nil |
||||||
|
} |
||||||
|
// A stream can exceed limits for multiple reasons. For example, exceeding
|
||||||
|
// both per-tenant stream limit and rate limits. We organize the reasons
|
||||||
|
// for each stream into a slice, and then add that to the results.
|
||||||
|
reasonsForHashes := make(map[uint64][]string) |
||||||
|
for _, result := range resp.Results { |
||||||
|
reasons := reasonsForHashes[result.StreamHash] |
||||||
|
reasons = append(reasons, result.Reason) |
||||||
|
reasonsForHashes[result.StreamHash] = reasons |
||||||
|
} |
||||||
|
result := make([]exceedsIngestLimitsResult, 0, len(reasonsForHashes)) |
||||||
|
for hash, reasons := range reasonsForHashes { |
||||||
|
result = append(result, exceedsIngestLimitsResult{ |
||||||
|
hash: hash, |
||||||
|
reasons: reasons, |
||||||
|
}) |
||||||
|
} |
||||||
|
return true, result, nil |
||||||
|
} |
||||||
|
|
||||||
|
func newExceedsLimitsRequest(tenant string, streams []KeyedStream) (*logproto.ExceedsLimitsRequest, error) { |
||||||
|
// The distributor sends the hashes of all streams in the request to the
|
||||||
|
// limits-frontend. The limits-frontend is responsible for deciding if
|
||||||
|
// the request would exceed the tenants limits, and if so, which streams
|
||||||
|
// from the request caused it to exceed its limits.
|
||||||
|
streamMetadata := make([]*logproto.StreamMetadata, 0, len(streams)) |
||||||
|
for _, stream := range streams { |
||||||
|
entriesSize, structuredMetadataSize := calculateStreamSizes(stream.Stream) |
||||||
|
streamMetadata = append(streamMetadata, &logproto.StreamMetadata{ |
||||||
|
StreamHash: stream.HashKeyNoShard, |
||||||
|
EntriesSize: entriesSize, |
||||||
|
StructuredMetadataSize: structuredMetadataSize, |
||||||
|
}) |
||||||
|
} |
||||||
|
return &logproto.ExceedsLimitsRequest{ |
||||||
|
Tenant: tenant, |
||||||
|
Streams: streamMetadata, |
||||||
|
}, nil |
||||||
|
} |
@ -0,0 +1,124 @@ |
|||||||
|
package distributor |
||||||
|
|
||||||
|
import ( |
||||||
|
"context" |
||||||
|
"errors" |
||||||
|
"testing" |
||||||
|
"time" |
||||||
|
|
||||||
|
"github.com/prometheus/client_golang/prometheus" |
||||||
|
"github.com/stretchr/testify/require" |
||||||
|
|
||||||
|
"github.com/grafana/loki/v3/pkg/logproto" |
||||||
|
) |
||||||
|
|
||||||
|
// mockIngestLimitsFrontendClient mocks the RPC calls for tests.
|
||||||
|
type mockIngestLimitsFrontendClient struct { |
||||||
|
t *testing.T |
||||||
|
expectedRequest *logproto.ExceedsLimitsRequest |
||||||
|
response *logproto.ExceedsLimitsResponse |
||||||
|
responseErr error |
||||||
|
} |
||||||
|
|
||||||
|
// Implements the ingestLimitsFrontendClient interface.
|
||||||
|
func (c *mockIngestLimitsFrontendClient) ExceedsLimits(_ context.Context, r *logproto.ExceedsLimitsRequest) (*logproto.ExceedsLimitsResponse, error) { |
||||||
|
require.Equal(c.t, c.expectedRequest, r) |
||||||
|
if c.responseErr != nil { |
||||||
|
return nil, c.responseErr |
||||||
|
} |
||||||
|
return c.response, nil |
||||||
|
} |
||||||
|
|
||||||
|
// This test asserts that when checking ingest limits the expected proto
|
||||||
|
// message is sent, and that for a given response, the result contains the
|
||||||
|
// expected streams each with their expected reasons.
|
||||||
|
func TestIngestLimits_ExceedsLimits(t *testing.T) { |
||||||
|
tests := []struct { |
||||||
|
name string |
||||||
|
tenant string |
||||||
|
streams []KeyedStream |
||||||
|
expectedRequest *logproto.ExceedsLimitsRequest |
||||||
|
response *logproto.ExceedsLimitsResponse |
||||||
|
responseErr error |
||||||
|
expected []exceedsIngestLimitsResult |
||||||
|
expectedErr string |
||||||
|
}{{ |
||||||
|
name: "error should be returned if limits cannot be checked", |
||||||
|
tenant: "test", |
||||||
|
streams: []KeyedStream{{ |
||||||
|
HashKeyNoShard: 1, |
||||||
|
}}, |
||||||
|
expectedRequest: &logproto.ExceedsLimitsRequest{ |
||||||
|
Tenant: "test", |
||||||
|
Streams: []*logproto.StreamMetadata{{ |
||||||
|
StreamHash: 1, |
||||||
|
}}, |
||||||
|
}, |
||||||
|
responseErr: errors.New("failed to check limits"), |
||||||
|
expectedErr: "failed to check limits", |
||||||
|
}, { |
||||||
|
name: "exceeds limits", |
||||||
|
tenant: "test", |
||||||
|
streams: []KeyedStream{{ |
||||||
|
HashKeyNoShard: 1, |
||||||
|
}}, |
||||||
|
expectedRequest: &logproto.ExceedsLimitsRequest{ |
||||||
|
Tenant: "test", |
||||||
|
Streams: []*logproto.StreamMetadata{{ |
||||||
|
StreamHash: 1, |
||||||
|
}}, |
||||||
|
}, |
||||||
|
response: &logproto.ExceedsLimitsResponse{ |
||||||
|
Tenant: "test", |
||||||
|
Results: []*logproto.ExceedsLimitsResult{{ |
||||||
|
StreamHash: 1, |
||||||
|
Reason: "test", |
||||||
|
}}, |
||||||
|
}, |
||||||
|
expected: []exceedsIngestLimitsResult{{ |
||||||
|
hash: 1, |
||||||
|
reasons: []string{"test"}, |
||||||
|
}}, |
||||||
|
}, { |
||||||
|
name: "does not exceed limits", |
||||||
|
tenant: "test", |
||||||
|
streams: []KeyedStream{{ |
||||||
|
HashKeyNoShard: 1, |
||||||
|
}}, |
||||||
|
expectedRequest: &logproto.ExceedsLimitsRequest{ |
||||||
|
Tenant: "test", |
||||||
|
Streams: []*logproto.StreamMetadata{{ |
||||||
|
StreamHash: 1, |
||||||
|
}}, |
||||||
|
}, |
||||||
|
response: &logproto.ExceedsLimitsResponse{ |
||||||
|
Tenant: "test", |
||||||
|
Results: []*logproto.ExceedsLimitsResult{}, |
||||||
|
}, |
||||||
|
expected: nil, |
||||||
|
}} |
||||||
|
|
||||||
|
for _, test := range tests { |
||||||
|
t.Run(test.name, func(t *testing.T) { |
||||||
|
mockClient := mockIngestLimitsFrontendClient{ |
||||||
|
t: t, |
||||||
|
expectedRequest: test.expectedRequest, |
||||||
|
response: test.response, |
||||||
|
responseErr: test.responseErr, |
||||||
|
} |
||||||
|
l := newIngestLimits(&mockClient, prometheus.NewRegistry()) |
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) |
||||||
|
defer cancel() |
||||||
|
exceedsLimits, rejectedStreams, err := l.ExceedsLimits(ctx, test.tenant, test.streams) |
||||||
|
if test.expectedErr != "" { |
||||||
|
require.EqualError(t, err, test.expectedErr) |
||||||
|
require.False(t, exceedsLimits) |
||||||
|
require.Empty(t, rejectedStreams) |
||||||
|
} else { |
||||||
|
require.Nil(t, err) |
||||||
|
require.Equal(t, test.expected, rejectedStreams) |
||||||
|
require.Equal(t, len(test.expected) > 0, exceedsLimits) |
||||||
|
} |
||||||
|
}) |
||||||
|
} |
||||||
|
} |
Loading…
Reference in new issue