Remove call to set default resolver (#11580)

**What this PR does / why we need it**:

**Which issue(s) this PR fixes**:
Fixes #<issue number>

**Special notes for your reviewer**:

**Checklist**
- [ ] Reviewed the
[`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md)
guide (**required**)
- [ ] Documentation added
- [ ] Tests updated
- [ ] `CHANGELOG.md` updated
- [ ] If the change is worth mentioning in the release notes, add
`add-to-release-notes` label
- [ ] Changes that require user attention or interaction to upgrade are
documented in `docs/sources/setup/upgrade/_index.md`
- [ ] For Helm chart changes bump the Helm chart version in
`production/helm/loki/Chart.yaml` and update
`production/helm/loki/CHANGELOG.md` and
`production/helm/loki/README.md`. [Example
PR](d10549e3ec)
- [ ] If the change is deprecating or removing a configuration option,
update the `deprecated-config.yaml` and `deleted-config.yaml` files
respectively in the `tools/deprecated-config-checker` directory.
[Example
PR](0d4416a4b0)
pull/11600/head
Paul Rogers 1 year ago committed by GitHub
parent 599eed7c52
commit 6c4699d8f7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 5
      go.mod
  2. 10
      go.sum
  3. 5
      pkg/bloomgateway/client_test.go
  4. 4
      pkg/compactor/deletion/request_handler_test.go
  5. 8
      pkg/loki/modules.go
  6. 4
      pkg/querier/http_test.go
  7. 12
      pkg/querier/multi_tenant_querier_test.go
  8. 6
      pkg/querier/querier_mock_test.go
  9. 6
      pkg/util/ring/ring_test.go
  10. 1
      tools/tsdb/bloom-tester/lib.go
  11. 3
      tools/tsdb/bloom-tester/metrics.go
  12. 3
      tools/tsdb/bloom-tester/tokenizer.go
  13. 37
      vendor/github.com/grafana/dskit/cancellation/error.go
  14. 29
      vendor/github.com/grafana/dskit/httpgrpc/server/server.go
  15. 88
      vendor/github.com/grafana/dskit/middleware/http_tracing.go
  16. 15
      vendor/github.com/grafana/dskit/ring/batch.go
  17. 44
      vendor/github.com/grafana/dskit/ring/client/pool.go
  18. 52
      vendor/github.com/grafana/dskit/ring/replication_set.go
  19. 64
      vendor/github.com/grafana/dskit/ring/replication_set_tracker.go
  20. 80
      vendor/github.com/grafana/dskit/ring/ring.go
  21. 19
      vendor/github.com/grafana/dskit/ring/token_generator.go
  22. 153
      vendor/github.com/grafana/dskit/ring/token_range.go
  23. 53
      vendor/github.com/grafana/dskit/server/server.go
  24. 132
      vendor/github.com/grafana/dskit/tenant/resolver.go
  25. 64
      vendor/github.com/grafana/dskit/tenant/tenant.go
  26. 1
      vendor/github.com/grafana/pyroscope-go/godeltaprof/.gitignore
  27. 203
      vendor/github.com/grafana/pyroscope-go/godeltaprof/LICENSE
  28. 98
      vendor/github.com/grafana/pyroscope-go/godeltaprof/README.md
  29. 119
      vendor/github.com/grafana/pyroscope-go/godeltaprof/block.go
  30. 81
      vendor/github.com/grafana/pyroscope-go/godeltaprof/heap.go
  31. 50
      vendor/github.com/grafana/pyroscope-go/godeltaprof/http/pprof/pprof.go
  32. 118
      vendor/github.com/grafana/pyroscope-go/godeltaprof/internal/pprof/delta_heap.go
  33. 59
      vendor/github.com/grafana/pyroscope-go/godeltaprof/internal/pprof/delta_mutex.go
  34. 109
      vendor/github.com/grafana/pyroscope-go/godeltaprof/internal/pprof/elf.go
  35. 18
      vendor/github.com/grafana/pyroscope-go/godeltaprof/internal/pprof/gzip_go16.go
  36. 19
      vendor/github.com/grafana/pyroscope-go/godeltaprof/internal/pprof/gzip_go17.go
  37. 96
      vendor/github.com/grafana/pyroscope-go/godeltaprof/internal/pprof/map.go
  38. 27
      vendor/github.com/grafana/pyroscope-go/godeltaprof/internal/pprof/mutex_scale_go19.go
  39. 17
      vendor/github.com/grafana/pyroscope-go/godeltaprof/internal/pprof/mutex_scale_go20.go
  40. 715
      vendor/github.com/grafana/pyroscope-go/godeltaprof/internal/pprof/proto.go
  41. 141
      vendor/github.com/grafana/pyroscope-go/godeltaprof/internal/pprof/protobuf.go
  42. 17
      vendor/github.com/grafana/pyroscope-go/godeltaprof/internal/pprof/stub.go
  43. 16
      vendor/github.com/grafana/pyroscope-go/godeltaprof/internal/pprof/stub_go20.go
  44. 21
      vendor/github.com/grafana/pyroscope-go/godeltaprof/internal/pprof/stub_go21.go
  45. 9
      vendor/github.com/grafana/pyroscope-go/godeltaprof/proto.go
  46. 20
      vendor/github.com/klauspost/compress/.goreleaser.yml
  47. 27
      vendor/github.com/klauspost/compress/README.md
  48. 29
      vendor/github.com/klauspost/compress/flate/deflate.go
  49. 23
      vendor/github.com/klauspost/compress/flate/fast_encoder.go
  50. 66
      vendor/github.com/klauspost/compress/flate/inflate.go
  51. 34
      vendor/github.com/klauspost/compress/flate/inflate_gen.go
  52. 398
      vendor/github.com/klauspost/compress/flate/level5.go
  53. 16
      vendor/github.com/klauspost/compress/flate/matchlen_amd64.go
  54. 68
      vendor/github.com/klauspost/compress/flate/matchlen_amd64.s
  55. 33
      vendor/github.com/klauspost/compress/flate/matchlen_generic.go
  56. 3
      vendor/github.com/klauspost/compress/fse/bitwriter.go
  57. 5
      vendor/github.com/klauspost/compress/fse/compress.go
  58. 1
      vendor/github.com/klauspost/compress/gzip/gunzip.go
  59. 21
      vendor/github.com/klauspost/compress/gzip/gzip.go
  60. 3
      vendor/github.com/klauspost/compress/huff0/bitwriter.go
  61. 20
      vendor/github.com/klauspost/compress/huff0/compress.go
  62. 19
      vendor/github.com/klauspost/compress/s2/dict.go
  63. 2
      vendor/github.com/klauspost/compress/s2/encode.go
  64. 3
      vendor/github.com/klauspost/compress/s2/encode_best.go
  65. 2
      vendor/github.com/klauspost/compress/s2/encode_go.go
  66. 1616
      vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s
  67. 20
      vendor/github.com/klauspost/compress/s2/index.go
  68. 34
      vendor/github.com/klauspost/compress/zstd/bitreader.go
  69. 3
      vendor/github.com/klauspost/compress/zstd/bitwriter.go
  70. 29
      vendor/github.com/klauspost/compress/zstd/blockenc.go
  71. 379
      vendor/github.com/klauspost/compress/zstd/dict.go
  72. 55
      vendor/github.com/klauspost/compress/zstd/enc_best.go
  73. 17
      vendor/github.com/klauspost/compress/zstd/enc_better.go
  74. 13
      vendor/github.com/klauspost/compress/zstd/encoder.go
  75. 4
      vendor/github.com/klauspost/compress/zstd/frameenc.go
  76. 17
      vendor/github.com/klauspost/compress/zstd/seqdec.go
  77. 128
      vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
  78. 2
      vendor/github.com/klauspost/compress/zstd/seqdec_generic.go
  79. 5
      vendor/github.com/klauspost/compress/zstd/snappy.go
  80. 12
      vendor/modules.txt

@ -51,7 +51,7 @@ require (
github.com/gorilla/mux v1.8.0
github.com/gorilla/websocket v1.5.0
github.com/grafana/cloudflare-go v0.0.0-20230110200409-c627cf6792f2
github.com/grafana/dskit v0.0.0-20231120170505-765e343eda4f
github.com/grafana/dskit v0.0.0-20240104111617-ea101a3b86eb
github.com/grafana/go-gelf/v2 v2.0.1
github.com/grafana/gomemcache v0.0.0-20231204155601-7de47a8c3cb0
github.com/grafana/regexp v0.0.0-20221122212121-6b5c0a4cb7fd
@ -66,7 +66,7 @@ require (
github.com/jmespath/go-jmespath v0.4.0
github.com/joncrlsn/dque v0.0.0-20211108142734-c2ef48c5192a
github.com/json-iterator/go v1.1.12
github.com/klauspost/compress v1.16.7
github.com/klauspost/compress v1.17.3
github.com/klauspost/pgzip v1.2.5
github.com/mattn/go-ieproxy v0.0.1
github.com/minio/minio-go/v7 v7.0.61
@ -235,6 +235,7 @@ require (
github.com/googleapis/enterprise-certificate-proxy v0.2.5 // indirect
github.com/googleapis/gax-go/v2 v2.12.0 // indirect
github.com/gophercloud/gophercloud v1.5.0 // indirect
github.com/grafana/pyroscope-go/godeltaprof v0.1.6 // indirect
github.com/hailocab/go-hostpool v0.0.0-20160125115350-e80d13ce29ed // indirect
github.com/hashicorp/errwrap v1.1.0 // indirect
github.com/hashicorp/go-cleanhttp v0.5.2 // indirect

@ -995,8 +995,8 @@ github.com/gorilla/websocket v1.5.0 h1:PPwGk2jz7EePpoHN/+ClbZu8SPxiqlu12wZP/3sWm
github.com/gorilla/websocket v1.5.0/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
github.com/grafana/cloudflare-go v0.0.0-20230110200409-c627cf6792f2 h1:qhugDMdQ4Vp68H0tp/0iN17DM2ehRo1rLEdOFe/gB8I=
github.com/grafana/cloudflare-go v0.0.0-20230110200409-c627cf6792f2/go.mod h1:w/aiO1POVIeXUQyl0VQSZjl5OAGDTL5aX+4v0RA1tcw=
github.com/grafana/dskit v0.0.0-20231120170505-765e343eda4f h1:gyojr97YeWZ70pKNakWv5/tKwBHuLy3icnIeCo9gQr4=
github.com/grafana/dskit v0.0.0-20231120170505-765e343eda4f/go.mod h1:8dsy5tQOkeNQyjXpm5mQsbCu3H5uzeBD35MzRQFznKU=
github.com/grafana/dskit v0.0.0-20240104111617-ea101a3b86eb h1:AWE6+kvtE18HP+lRWNUCyvymyrFSXs6TcS2vXIXGIuw=
github.com/grafana/dskit v0.0.0-20240104111617-ea101a3b86eb/go.mod h1:kkWM4WUV230bNG3urVRWPBnSJHs64y/0RmWjftnnn0c=
github.com/grafana/go-gelf/v2 v2.0.1 h1:BOChP0h/jLeD+7F9mL7tq10xVkDG15he3T1zHuQaWak=
github.com/grafana/go-gelf/v2 v2.0.1/go.mod h1:lexHie0xzYGwCgiRGcvZ723bSNyNI8ZRD4s0CLobh90=
github.com/grafana/gocql v0.0.0-20200605141915-ba5dc39ece85 h1:xLuzPoOzdfNb/RF/IENCw+oLVdZB4G21VPhkHBgwSHY=
@ -1005,6 +1005,8 @@ github.com/grafana/gomemcache v0.0.0-20231204155601-7de47a8c3cb0 h1:aLBiDMjTtXx2
github.com/grafana/gomemcache v0.0.0-20231204155601-7de47a8c3cb0/go.mod h1:PGk3RjYHpxMM8HFPhKKo+vve3DdlPUELZLSDEFehPuU=
github.com/grafana/memberlist v0.3.1-0.20220714140823-09ffed8adbbe h1:yIXAAbLswn7VNWBIvM71O2QsgfgW9fRXZNR0DXe6pDU=
github.com/grafana/memberlist v0.3.1-0.20220714140823-09ffed8adbbe/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE=
github.com/grafana/pyroscope-go/godeltaprof v0.1.6 h1:nEdZ8louGAplSvIJi1HVp7kWvFvdiiYg3COLlTwJiFo=
github.com/grafana/pyroscope-go/godeltaprof v0.1.6/go.mod h1:Tk376Nbldo4Cha9RgiU7ik8WKFkNpfds98aUzS8omLE=
github.com/grafana/regexp v0.0.0-20221122212121-6b5c0a4cb7fd h1:PpuIBO5P3e9hpqBD0O/HjhShYuM6XE0i/lbE6J94kww=
github.com/grafana/regexp v0.0.0-20221122212121-6b5c0a4cb7fd/go.mod h1:M5qHK+eWfAv8VR/265dIuEpL3fNfeC21tXXp9itM24A=
github.com/grafana/tail v0.0.0-20230510142333-77b18831edf0 h1:bjh0PVYSVVFxzINqPFYJmAmJNrWPgnVjuSdYJGHmtFU=
@ -1239,8 +1241,8 @@ github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+o
github.com/klauspost/compress v1.9.5/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/compress v1.11.0/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I=
github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
github.com/klauspost/compress v1.17.3 h1:qkRjuerhUU1EmXLYGkSH6EZL+vPSxIrYjLNAK4slzwA=
github.com/klauspost/compress v1.17.3/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM=
github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.2.5 h1:0E5MSMDEoAulmXNFquVs//DdoomxaoTY1kUhbc/qbZg=

@ -426,3 +426,8 @@ func (*mockRing) ShuffleShardWithLookback(_ string, _ int, _ time.Duration, _ ti
func (*mockRing) CleanupShuffleShardCache(_ string) {
panic("unimplemented")
}
func (r *mockRing) GetTokenRangesForInstance(_ string) (ring.TokenRanges, error) {
tr := ring.TokenRanges{0, math.MaxUint32}
return tr, nil
}

@ -245,7 +245,7 @@ func TestCancelDeleteRequestHandler(t *testing.T) {
store.getErr = errors.New("something bad")
h := NewDeleteRequestHandler(store, 0, nil)
req := buildRequest("org id", ``, "", "")
req := buildRequest("orgid", ``, "", "")
params := req.URL.Query()
params.Set("request_id", "test-request")
req.URL.RawQuery = params.Encode()
@ -411,7 +411,7 @@ func TestGetAllDeleteRequestsHandler(t *testing.T) {
store.getAllErr = errors.New("something bad")
h := NewDeleteRequestHandler(store, 0, nil)
req := buildRequest("org id", ``, "", "")
req := buildRequest("orgid", ``, "", "")
params := req.URL.Query()
params.Set("request_id", "test-request")
req.URL.RawQuery = params.Encode()

@ -26,7 +26,6 @@ import (
"github.com/grafana/dskit/runtimeconfig"
"github.com/grafana/dskit/server"
"github.com/grafana/dskit/services"
"github.com/grafana/dskit/tenant"
"github.com/grafana/dskit/user"
gerrors "github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
@ -332,12 +331,6 @@ func (t *Loki) initDistributor() (services.Service, error) {
logproto.RegisterPusherServer(t.Server.GRPC, t.distributor)
}
// If the querier module is not part of this process we need to check if multi-tenant queries are enabled.
// If the querier module is part of this process the querier module will configure everything.
if !t.Cfg.isModuleEnabled(Querier) && t.Cfg.Querier.MultiTenantQueriesEnabled {
tenant.WithDefaultResolver(tenant.NewMultiResolver())
}
httpPushHandlerMiddleware := middleware.Merge(
serverutil.RecoveryHTTPMiddleware,
t.HTTPAuthMiddleware,
@ -383,7 +376,6 @@ func (t *Loki) initQuerier() (services.Service, error) {
if t.Cfg.Querier.MultiTenantQueriesEnabled {
t.Querier = querier.NewMultiTenantQuerier(q, util_log.Logger)
tenant.WithDefaultResolver(tenant.NewMultiResolver())
} else {
t.Querier = q
}

@ -16,15 +16,12 @@ import (
"github.com/grafana/loki/pkg/validation"
"github.com/go-kit/log"
"github.com/grafana/dskit/tenant"
"github.com/grafana/dskit/user"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
)
func TestTailHandler(t *testing.T) {
tenant.WithDefaultResolver(tenant.NewMultiResolver())
defaultLimits := defaultLimitsTestConfig()
limits, err := validation.NewOverrides(defaultLimits, nil)
require.NoError(t, err)
@ -78,7 +75,6 @@ func (s *slowConnectionSimulator) ServeHTTP(_ http.ResponseWriter, r *http.Reque
}
func TestQueryWrapperMiddleware(t *testing.T) {
tenant.WithDefaultResolver(tenant.NewMultiResolver())
shortestTimeout := time.Millisecond * 5
t.Run("request timeout is the shortest one", func(t *testing.T) {

@ -15,8 +15,6 @@ import (
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/require"
"github.com/grafana/dskit/tenant"
"github.com/grafana/loki/pkg/iter"
"github.com/grafana/loki/pkg/logproto"
"github.com/grafana/loki/pkg/logql"
@ -25,8 +23,6 @@ import (
)
func TestMultiTenantQuerier_SelectLogs(t *testing.T) {
tenant.WithDefaultResolver(tenant.NewMultiResolver())
for _, tc := range []struct {
desc string
orgID string
@ -110,8 +106,6 @@ func TestMultiTenantQuerier_SelectLogs(t *testing.T) {
}
func TestMultiTenantQuerier_SelectSamples(t *testing.T) {
tenant.WithDefaultResolver(tenant.NewMultiResolver())
for _, tc := range []struct {
desc string
orgID string
@ -295,8 +289,6 @@ func TestMultiTenantQuerier_Label(t *testing.T) {
}
}
tenant.WithDefaultResolver(tenant.NewMultiResolver())
for _, tc := range []struct {
desc string
name string
@ -354,8 +346,6 @@ func TestMultiTenantQuerier_Label(t *testing.T) {
}
func TestMultiTenantQuerierSeries(t *testing.T) {
tenant.WithDefaultResolver(tenant.NewMultiResolver())
for _, tc := range []struct {
desc string
orgID string
@ -418,8 +408,6 @@ func TestMultiTenantQuerierSeries(t *testing.T) {
}
func TestVolume(t *testing.T) {
tenant.WithDefaultResolver(tenant.NewMultiResolver())
for _, tc := range []struct {
desc string
orgID string

@ -4,6 +4,7 @@ import (
"context"
"errors"
"fmt"
"math"
"time"
"github.com/grafana/loki/pkg/logql/log"
@ -454,6 +455,11 @@ func (r *readRingMock) GetInstanceState(_ string) (ring.InstanceState, error) {
return 0, nil
}
func (r *readRingMock) GetTokenRangesForInstance(_ string) (ring.TokenRanges, error) {
tr := ring.TokenRanges{0, math.MaxUint32}
return tr, nil
}
func mockReadRingWithOneActiveIngester() *readRingMock {
return newReadRingMock([]ring.InstanceDesc{
{Addr: "test", Timestamp: time.Now().UnixNano(), State: ring.ACTIVE, Tokens: []uint32{1, 2, 3}},

@ -1,6 +1,7 @@
package ring
import (
"math"
"testing"
"time"
@ -89,6 +90,11 @@ func (r *readRingMock) GetInstanceState(_ string) (ring.InstanceState, error) {
return 0, nil
}
func (r *readRingMock) GetTokenRangesForInstance(_ string) (ring.TokenRanges, error) {
tr := ring.TokenRanges{0, math.MaxUint32}
return tr, nil
}
type readLifecyclerMock struct {
mock.Mock
addr string

@ -6,6 +6,7 @@ import (
"context"
"flag"
"fmt"
"github.com/grafana/loki/pkg/storage/bloom/v1/filter"
tsdbindex "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/tsdb/index"

@ -1,10 +1,11 @@
package main
import (
v1 "github.com/grafana/loki/pkg/storage/bloom/v1"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
v1 "github.com/grafana/loki/pkg/storage/bloom/v1"
"github.com/grafana/loki/pkg/storage/bloom/v1/filter"
)

@ -2,10 +2,11 @@ package main
import (
"context"
v1 "github.com/grafana/loki/pkg/storage/bloom/v1"
"math"
"time"
v1 "github.com/grafana/loki/pkg/storage/bloom/v1"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/grafana/loki/pkg/util/constants"

@ -0,0 +1,37 @@
package cancellation
import (
"context"
"fmt"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
)
type cancellationError struct {
inner error
}
func NewError(err error) error {
return cancellationError{err}
}
func NewErrorf(format string, args ...any) error {
return NewError(fmt.Errorf(format, args...))
}
func (e cancellationError) Error() string {
return "context canceled: " + e.inner.Error()
}
func (e cancellationError) Is(err error) bool {
return err == context.Canceled
}
func (e cancellationError) Unwrap() error {
return e.inner
}
func (e cancellationError) GRPCStatus() *status.Status {
return status.New(codes.Canceled, e.Error())
}

@ -32,17 +32,29 @@ var (
DoNotLogErrorHeaderKey = http.CanonicalHeaderKey("X-DoNotLogError")
)
type Option func(*Server)
func WithReturn4XXErrors(s *Server) {
s.return4XXErrors = true
}
func applyServerOptions(s *Server, opts ...Option) *Server {
for _, opt := range opts {
opt(s)
}
return s
}
// Server implements HTTPServer. HTTPServer is a generated interface that gRPC
// servers must implement.
type Server struct {
handler http.Handler
handler http.Handler
return4XXErrors bool
}
// NewServer makes a new Server.
func NewServer(handler http.Handler) *Server {
return &Server{
handler: handler,
}
func NewServer(handler http.Handler, opts ...Option) *Server {
return applyServerOptions(&Server{handler: handler}, opts...)
}
// Handle implements HTTPServer.
@ -67,7 +79,7 @@ func (s Server) Handle(ctx context.Context, r *httpgrpc.HTTPRequest) (*httpgrpc.
Headers: httpgrpc.FromHeader(header),
Body: recorder.Body.Bytes(),
}
if recorder.Code/100 == 5 {
if s.shouldReturnError(resp) {
err := httpgrpc.ErrorFromHTTPResponse(resp)
if doNotLogError {
err = middleware.DoNotLogError{Err: err}
@ -77,6 +89,11 @@ func (s Server) Handle(ctx context.Context, r *httpgrpc.HTTPRequest) (*httpgrpc.
return resp, nil
}
func (s Server) shouldReturnError(resp *httpgrpc.HTTPResponse) bool {
mask := resp.GetCode() / 100
return mask == 5 || (s.return4XXErrors && mask == 4)
}
// Client is a http.Handler that forwards the request over gRPC.
type Client struct {
client httpgrpc.HTTPClient

@ -5,13 +5,17 @@
package middleware
import (
"context"
"fmt"
"net/http"
"github.com/grafana/dskit/httpgrpc"
"github.com/gorilla/mux"
"github.com/opentracing-contrib/go-stdlib/nethttp"
"github.com/opentracing/opentracing-go"
"github.com/opentracing/opentracing-go/ext"
"google.golang.org/grpc"
)
// Dummy dependency to enforce that we have a nethttp version newer
@ -46,27 +50,8 @@ func (t Tracer) Wrap(next http.Handler) http.Handler {
return nethttp.Middleware(opentracing.GlobalTracer(), next, options...)
}
// HTTPGRPCTracer is a middleware which traces incoming httpgrpc requests.
type HTTPGRPCTracer struct {
RouteMatcher RouteMatcher
}
// InitHTTPGRPCMiddleware initializes gorilla/mux-compatible HTTP middleware
//
// HTTPGRPCTracer is specific to the server-side handling of HTTP requests which were
// wrapped into gRPC requests and routed through the httpgrpc.HTTP/Handle gRPC.
//
// HTTPGRPCTracer.Wrap must be attached to the same mux.Router assigned to dskit/server.Config.Router
// but it does not need to be attached to dskit/server.Config.HTTPMiddleware.
// dskit/server.Config.HTTPMiddleware is applied to direct HTTP requests not routed through gRPC;
// the server utilizes the default http middleware Tracer.Wrap for those standard http requests.
func InitHTTPGRPCMiddleware(router *mux.Router) *mux.Router {
middleware := HTTPGRPCTracer{RouteMatcher: router}
router.Use(middleware.Wrap)
return router
}
// Wrap creates and decorates server-side tracing spans for httpgrpc requests
// HTTPGRPCTracingInterceptor adds additional information about the encapsulated HTTP request
// to httpgrpc trace spans.
//
// The httpgrpc client wraps HTTP requests up into a generic httpgrpc.HTTP/Handle gRPC method.
// The httpgrpc server unwraps httpgrpc.HTTP/Handle gRPC requests into HTTP requests
@ -80,39 +65,52 @@ func InitHTTPGRPCMiddleware(router *mux.Router) *mux.Router {
// and attaches the HTTP server span tags to the parent httpgrpc.HTTP/Handle gRPC span, allowing
// tracing tooling to differentiate the HTTP requests represented by the httpgrpc.HTTP/Handle spans.
//
// opentracing-contrib/go-stdlib/nethttp.Middleware could not be used here
// as it does not expose options to access and tag the incoming parent span.
func (hgt HTTPGRPCTracer) Wrap(next http.Handler) http.Handler {
httpOperationNameFunc := makeHTTPOperationNameFunc(hgt.RouteMatcher)
fn := func(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
tracer := opentracing.GlobalTracer()
// Note that we cannot do this in the httpgrpc Server implementation, as some applications (eg.
// Mimir's queriers) call Server.Handle() directly, which means we'd attach HTTP-request related
// span tags to whatever parent span is active in the caller, rather than the /httpgrpc.HTTP/Handle
// span created by the tracing middleware for requests that arrive over the network.
func HTTPGRPCTracingInterceptor(router *mux.Router) grpc.UnaryServerInterceptor {
return func(ctx context.Context, req any, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (resp any, err error) {
if info.FullMethod != "/httpgrpc.HTTP/Handle" {
return handler(ctx, req)
}
httpgrpcRequest, ok := req.(*httpgrpc.HTTPRequest)
if !ok {
return handler(ctx, req)
}
httpRequest, err := httpgrpc.ToHTTPRequest(ctx, httpgrpcRequest)
if err != nil {
return handler(ctx, req)
}
tracer := opentracing.GlobalTracer()
parentSpan := opentracing.SpanFromContext(ctx)
// extract relevant span & tag data from request
method := r.Method
matchedRoute := getRouteName(hgt.RouteMatcher, r)
urlPath := r.URL.Path
userAgent := r.Header.Get("User-Agent")
method := httpRequest.Method
routeName := getRouteName(router, httpRequest)
urlPath := httpRequest.URL.Path
userAgent := httpRequest.Header.Get("User-Agent")
// tag parent httpgrpc.HTTP/Handle server span, if it exists
if parentSpan != nil {
parentSpan.SetTag(string(ext.HTTPUrl), urlPath)
parentSpan.SetTag(string(ext.HTTPMethod), method)
parentSpan.SetTag("http.route", matchedRoute)
parentSpan.SetTag("http.route", routeName)
parentSpan.SetTag("http.user_agent", userAgent)
}
// create and start child HTTP span
// mirroring opentracing-contrib/go-stdlib/nethttp.Middleware span name and tags
childSpanName := httpOperationNameFunc(r)
childSpanName := getOperationName(routeName, httpRequest)
startSpanOpts := []opentracing.StartSpanOption{
ext.SpanKindRPCServer,
opentracing.Tag{Key: string(ext.Component), Value: "net/http"},
opentracing.Tag{Key: string(ext.HTTPUrl), Value: urlPath},
opentracing.Tag{Key: string(ext.HTTPMethod), Value: method},
opentracing.Tag{Key: "http.route", Value: matchedRoute},
opentracing.Tag{Key: "http.route", Value: routeName},
opentracing.Tag{Key: "http.user_agent", Value: userAgent},
}
if parentSpan != nil {
@ -127,19 +125,21 @@ func (hgt HTTPGRPCTracer) Wrap(next http.Handler) http.Handler {
childSpan := tracer.StartSpan(childSpanName, startSpanOpts...)
defer childSpan.Finish()
r = r.WithContext(opentracing.ContextWithSpan(r.Context(), childSpan))
next.ServeHTTP(w, r)
ctx = opentracing.ContextWithSpan(ctx, childSpan)
return handler(ctx, req)
}
return http.HandlerFunc(fn)
}
func makeHTTPOperationNameFunc(routeMatcher RouteMatcher) func(r *http.Request) string {
return func(r *http.Request) string {
op := getRouteName(routeMatcher, r)
if op == "" {
return "HTTP " + r.Method
}
return fmt.Sprintf("HTTP %s - %s", r.Method, op)
routeName := getRouteName(routeMatcher, r)
return getOperationName(routeName, r)
}
}
func getOperationName(routeName string, r *http.Request) string {
if routeName == "" {
return "HTTP " + r.Method
}
return fmt.Sprintf("HTTP %s - %s", r.Method, routeName)
}

@ -111,6 +111,15 @@ func DoBatchWithOptions(ctx context.Context, op Operation, r ReadRing, keys []ui
bufZones [GetBufferSize]string
)
for i, key := range keys {
// Get call below takes ~1 microsecond for ~500 instances.
// Checking every 10K calls would be every 10ms.
if i%10e3 == 0 {
if err := ctx.Err(); err != nil {
o.Cleanup()
return err
}
}
replicationSet, err := r.Get(key, op, bufDescs[:0], bufHosts[:0], bufZones[:0])
if err != nil {
o.Cleanup()
@ -134,6 +143,12 @@ func DoBatchWithOptions(ctx context.Context, op Operation, r ReadRing, keys []ui
}
}
// One last check before calling the callbacks: it doesn't make sense if context is canceled.
if err := ctx.Err(); err != nil {
o.Cleanup()
return err
}
tracker := batchTracker{
done: make(chan struct{}, 1),
err: make(chan error, 1),

@ -157,15 +157,43 @@ func (p *Pool) RemoveClientFor(addr string) {
client, ok := p.clients[addr]
if ok {
delete(p.clients, addr)
if p.clientsMetric != nil {
p.clientsMetric.Add(-1)
p.closeClient(addr, client)
}
}
func (p *Pool) closeClient(addr string, client PoolClient) {
if p.clientsMetric != nil {
p.clientsMetric.Add(-1)
}
// Close in the background since this operation may take awhile and we have a mutex
go func(addr string, closer PoolClient) {
if err := closer.Close(); err != nil {
level.Error(p.logger).Log("msg", fmt.Sprintf("error closing connection to %s", p.clientName), "addr", addr, "err", err)
}
// Close in the background since this operation may take awhile and we have a mutex
go func(addr string, closer PoolClient) {
if err := closer.Close(); err != nil {
level.Error(p.logger).Log("msg", fmt.Sprintf("error closing connection to %s", p.clientName), "addr", addr, "err", err)
}
}(addr, client)
}(addr, client)
}
// RemoveClient removes the client instance from the pool if it is still there and not cleaned up by health check.
// The value of client needs to be the same as returned by GetClientForInstance or GetClientFor.
// If addr is not empty and contains the same addr passed when obtaining the client, then the operation is sped up.
func (p *Pool) RemoveClient(client PoolClient, addr string) {
p.Lock()
defer p.Unlock()
if addr != "" {
if p.clients[addr] != client {
return
}
delete(p.clients, addr)
p.closeClient(addr, client)
return
}
for addr, cachedClient := range p.clients {
if cachedClient != client {
continue
}
delete(p.clients, addr)
p.closeClient(addr, client)
return
}
}

@ -11,6 +11,7 @@ import (
"github.com/go-kit/log/level"
"github.com/opentracing/opentracing-go/ext"
"github.com/grafana/dskit/cancellation"
"github.com/grafana/dskit/spanlogger"
)
@ -37,7 +38,7 @@ func (r ReplicationSet) Do(ctx context.Context, delay time.Duration, f func(cont
// Initialise the result tracker, which is use to keep track of successes and failures.
var tracker replicationSetResultTracker
if r.MaxUnavailableZones > 0 {
tracker = newZoneAwareResultTracker(r.Instances, r.MaxUnavailableZones, kitlog.NewNopLogger())
tracker = newZoneAwareResultTracker(r.Instances, r.MaxUnavailableZones, nil, kitlog.NewNopLogger())
} else {
tracker = newDefaultResultTracker(r.Instances, r.MaxErrors, kitlog.NewNopLogger())
}
@ -123,6 +124,18 @@ type DoUntilQuorumConfig struct {
// total response size across all instances is reached, making further requests to other
// instances would not be worthwhile.
IsTerminalError func(error) bool
// ZoneSorter orders the provided zones in preference order, for use when MinimizeRequests is true
// and DoUntilQuorum is operating in zone-aware mode. If not set, zones will be used in a
// randomly-selected order.
//
// Earlier zones will be used first.
// The function can modify the provided slice of zones in place.
// All provided zones must be returned exactly once.
//
// This can be used to prioritise zones that are more likely to succeed, or are expected to complete
// faster, for example.
ZoneSorter ZoneSorter
}
func (c DoUntilQuorumConfig) Validate() error {
@ -168,8 +181,12 @@ func (c DoUntilQuorumConfig) Validate() error {
// r.MaxUnavailableZones is 1 and there are three zones, DoUntilQuorum will initially only call f for instances in two
// zones, and only call f for instances in the remaining zone if a request in the initial two zones fails.
//
// DoUntilQuorum will randomly select available zones / instances such that calling DoUntilQuorum multiple times with
// the same ReplicationSet should evenly distribute requests across all zones / instances.
// If cfg.ZoneSorter is non-nil and DoUntilQuorum is operating in zone-aware mode, DoUntilQuorum will initiate requests
// to zones in the order returned by the sorter.
//
// If cfg.ZoneSorter is nil, or DoUntilQuorum is operating in non-zone-aware mode, DoUntilQuorum will randomly select
// available zones / instances such that calling DoUntilQuorum multiple times with the same ReplicationSet should evenly
// distribute requests across all zones / instances.
//
// If cfg.HedgingDelay is non-zero, DoUntilQuorum will call f for an additional zone's instances (if zone-aware) / an
// additional instance (if not zone-aware) every cfg.HedgingDelay until one of the termination conditions above is
@ -197,7 +214,7 @@ func DoUntilQuorum[T any](ctx context.Context, r ReplicationSet, cfg DoUntilQuor
ctx, cancel := context.WithCancel(ctx)
defer cancel()
wrappedF := func(ctx context.Context, desc *InstanceDesc, _ context.CancelFunc) (T, error) {
wrappedF := func(ctx context.Context, desc *InstanceDesc, _ context.CancelCauseFunc) (T, error) {
return f(ctx, desc)
}
@ -216,7 +233,7 @@ func DoUntilQuorum[T any](ctx context.Context, r ReplicationSet, cfg DoUntilQuor
// DoUntilQuorumWithoutSuccessfulContextCancellation
//
// Failing to do this may result in a memory leak.
func DoUntilQuorumWithoutSuccessfulContextCancellation[T any](ctx context.Context, r ReplicationSet, cfg DoUntilQuorumConfig, f func(context.Context, *InstanceDesc, context.CancelFunc) (T, error), cleanupFunc func(T)) ([]T, error) {
func DoUntilQuorumWithoutSuccessfulContextCancellation[T any](ctx context.Context, r ReplicationSet, cfg DoUntilQuorumConfig, f func(context.Context, *InstanceDesc, context.CancelCauseFunc) (T, error), cleanupFunc func(T)) ([]T, error) {
if err := cfg.Validate(); err != nil {
return nil, err
}
@ -249,7 +266,7 @@ func DoUntilQuorumWithoutSuccessfulContextCancellation[T any](ctx context.Contex
var resultTracker replicationSetResultTracker
var contextTracker replicationSetContextTracker
if r.MaxUnavailableZones > 0 || r.ZoneAwarenessEnabled {
resultTracker = newZoneAwareResultTracker(r.Instances, r.MaxUnavailableZones, logger)
resultTracker = newZoneAwareResultTracker(r.Instances, r.MaxUnavailableZones, cfg.ZoneSorter, logger)
contextTracker = newZoneAwareContextTracker(ctx, r.Instances)
} else {
resultTracker = newDefaultResultTracker(r.Instances, r.MaxErrors, logger)
@ -293,12 +310,12 @@ func DoUntilQuorumWithoutSuccessfulContextCancellation[T any](ctx context.Contex
}
}
terminate := func(err error) ([]T, error) {
if cfg.Logger != nil {
terminate := func(err error, cause string) ([]T, error) {
if cfg.Logger != nil && !errors.Is(err, context.Canceled) { // Cancellation is not an error.
ext.Error.Set(cfg.Logger.Span, true)
}
contextTracker.cancelAllContexts()
contextTracker.cancelAllContexts(cancellation.NewErrorf(cause))
cleanupResultsAlreadyReceived()
return nil, err
}
@ -314,12 +331,13 @@ func DoUntilQuorumWithoutSuccessfulContextCancellation[T any](ctx context.Contex
for !resultTracker.succeeded() {
select {
case <-ctx.Done():
level.Debug(logger).Log("msg", "parent context done, returning", "err", ctx.Err())
err := context.Cause(ctx)
level.Debug(logger).Log("msg", "parent context done, returning", "err", err)
// No need to cancel individual instance contexts, as they inherit the cancellation from ctx.
cleanupResultsAlreadyReceived()
return nil, ctx.Err()
return nil, err
case <-hedgingTrigger:
resultTracker.startAdditionalRequests()
case result := <-resultsChan:
@ -328,7 +346,7 @@ func DoUntilQuorumWithoutSuccessfulContextCancellation[T any](ctx context.Contex
if result.err != nil && cfg.IsTerminalError != nil && cfg.IsTerminalError(result.err) {
level.Warn(logger).Log("msg", "cancelling all outstanding requests because a terminal error occurred", "err", result.err)
// We must return before calling resultTracker.done() below, otherwise done() might start further requests if request minimisation is enabled.
return terminate(result.err)
return terminate(result.err, "a terminal error occurred")
}
resultTracker.done(result.instance, result.err)
@ -336,11 +354,11 @@ func DoUntilQuorumWithoutSuccessfulContextCancellation[T any](ctx context.Contex
if result.err == nil {
resultsMap[result.instance] = result.result
} else {
contextTracker.cancelContextFor(result.instance)
contextTracker.cancelContextFor(result.instance, cancellation.NewErrorf("this instance returned an error: %w", result.err))
if resultTracker.failed() {
level.Error(logger).Log("msg", "cancelling all requests because quorum cannot be reached")
return terminate(result.err)
level.Error(logger).Log("msg", "cancelling all outstanding requests because quorum cannot be reached")
return terminate(result.err, "quorum cannot be reached")
}
}
}
@ -358,12 +376,12 @@ func DoUntilQuorumWithoutSuccessfulContextCancellation[T any](ctx context.Contex
if resultTracker.shouldIncludeResultFrom(instance) {
results = append(results, result)
} else {
contextTracker.cancelContextFor(instance)
contextTracker.cancelContextFor(instance, cancellation.NewErrorf("quorum reached, result not required from this instance"))
cleanupFunc(result)
}
} else {
// Nothing to clean up (yet) - this will be handled by deferred call above.
contextTracker.cancelContextFor(instance)
contextTracker.cancelContextFor(instance, cancellation.NewErrorf("quorum reached, result not required from this instance"))
}
}

@ -63,15 +63,15 @@ type replicationSetContextTracker interface {
// The context.CancelFunc will only cancel the context for this instance (ie. if this tracker
// is zone-aware, calling the context.CancelFunc should not cancel contexts for other instances
// in the same zone).
contextFor(instance *InstanceDesc) (context.Context, context.CancelFunc)
contextFor(instance *InstanceDesc) (context.Context, context.CancelCauseFunc)
// Cancels the context for instance previously obtained with contextFor.
// This method may cancel the context for other instances if those other instances are part of
// the same zone and this tracker is zone-aware.
cancelContextFor(instance *InstanceDesc)
cancelContextFor(instance *InstanceDesc, cause error)
// Cancels all contexts previously obtained with contextFor.
cancelAllContexts()
cancelAllContexts(cause error)
}
var errResultNotNeeded = errors.New("result from this instance is not needed")
@ -196,7 +196,7 @@ func (t *defaultResultTracker) startAllRequests() {
func (t *defaultResultTracker) awaitStart(ctx context.Context, instance *InstanceDesc) error {
select {
case <-ctx.Done():
return ctx.Err()
return context.Cause(ctx)
case _, ok := <-t.instanceRelease[instance]:
if ok {
return nil
@ -208,32 +208,32 @@ func (t *defaultResultTracker) awaitStart(ctx context.Context, instance *Instanc
type defaultContextTracker struct {
ctx context.Context
cancelFuncs map[*InstanceDesc]context.CancelFunc
cancelFuncs map[*InstanceDesc]context.CancelCauseFunc
}
func newDefaultContextTracker(ctx context.Context, instances []InstanceDesc) *defaultContextTracker {
return &defaultContextTracker{
ctx: ctx,
cancelFuncs: make(map[*InstanceDesc]context.CancelFunc, len(instances)),
cancelFuncs: make(map[*InstanceDesc]context.CancelCauseFunc, len(instances)),
}
}
func (t *defaultContextTracker) contextFor(instance *InstanceDesc) (context.Context, context.CancelFunc) {
ctx, cancel := context.WithCancel(t.ctx)
func (t *defaultContextTracker) contextFor(instance *InstanceDesc) (context.Context, context.CancelCauseFunc) {
ctx, cancel := context.WithCancelCause(t.ctx)
t.cancelFuncs[instance] = cancel
return ctx, cancel
}
func (t *defaultContextTracker) cancelContextFor(instance *InstanceDesc) {
func (t *defaultContextTracker) cancelContextFor(instance *InstanceDesc, cause error) {
if cancel, ok := t.cancelFuncs[instance]; ok {
cancel()
cancel(cause)
delete(t.cancelFuncs, instance)
}
}
func (t *defaultContextTracker) cancelAllContexts() {
func (t *defaultContextTracker) cancelAllContexts(cause error) {
for instance, cancel := range t.cancelFuncs {
cancel()
cancel(cause)
delete(t.cancelFuncs, instance)
}
}
@ -248,14 +248,18 @@ type zoneAwareResultTracker struct {
zoneRelease map[string]chan struct{}
zoneShouldStart map[string]*atomic.Bool
pendingZones []string
zoneSorter ZoneSorter
logger log.Logger
}
func newZoneAwareResultTracker(instances []InstanceDesc, maxUnavailableZones int, logger log.Logger) *zoneAwareResultTracker {
type ZoneSorter func(zones []string) []string
func newZoneAwareResultTracker(instances []InstanceDesc, maxUnavailableZones int, zoneSorter ZoneSorter, logger log.Logger) *zoneAwareResultTracker {
t := &zoneAwareResultTracker{
waitingByZone: make(map[string]int),
failuresByZone: make(map[string]int),
maxUnavailableZones: maxUnavailableZones,
zoneSorter: zoneSorter,
logger: logger,
}
@ -269,9 +273,21 @@ func newZoneAwareResultTracker(instances []InstanceDesc, maxUnavailableZones int
t.minSuccessfulZones = 0
}
if t.zoneSorter == nil {
t.zoneSorter = defaultZoneSorter
}
return t
}
func defaultZoneSorter(zones []string) []string {
rand.Shuffle(len(zones), func(i, j int) {
zones[i], zones[j] = zones[j], zones[i]
})
return zones
}
func (t *zoneAwareResultTracker) done(instance *InstanceDesc, err error) {
t.waitingByZone[instance.Zone]--
@ -338,9 +354,7 @@ func (t *zoneAwareResultTracker) startMinimumRequests() {
allZones = append(allZones, zone)
}
rand.Shuffle(len(allZones), func(i, j int) {
allZones[i], allZones[j] = allZones[j], allZones[i]
})
allZones = t.zoneSorter(allZones)
for i := 0; i < t.minSuccessfulZones; i++ {
level.Debug(t.logger).Log("msg", "starting requests to zone", "reason", "initial requests", "zone", allZones[i])
@ -396,7 +410,7 @@ func (t *zoneAwareResultTracker) releaseZone(zone string, shouldStart bool) {
func (t *zoneAwareResultTracker) awaitStart(ctx context.Context, instance *InstanceDesc) error {
select {
case <-ctx.Done():
return ctx.Err()
return context.Cause(ctx)
case <-t.zoneRelease[instance.Zone]:
if t.zoneShouldStart[instance.Zone].Load() {
return nil
@ -408,18 +422,18 @@ func (t *zoneAwareResultTracker) awaitStart(ctx context.Context, instance *Insta
type zoneAwareContextTracker struct {
contexts map[*InstanceDesc]context.Context
cancelFuncs map[*InstanceDesc]context.CancelFunc
cancelFuncs map[*InstanceDesc]context.CancelCauseFunc
}
func newZoneAwareContextTracker(ctx context.Context, instances []InstanceDesc) *zoneAwareContextTracker {
t := &zoneAwareContextTracker{
contexts: make(map[*InstanceDesc]context.Context, len(instances)),
cancelFuncs: make(map[*InstanceDesc]context.CancelFunc, len(instances)),
cancelFuncs: make(map[*InstanceDesc]context.CancelCauseFunc, len(instances)),
}
for i := range instances {
instance := &instances[i]
ctx, cancel := context.WithCancel(ctx)
ctx, cancel := context.WithCancelCause(ctx)
t.contexts[instance] = ctx
t.cancelFuncs[instance] = cancel
}
@ -427,26 +441,26 @@ func newZoneAwareContextTracker(ctx context.Context, instances []InstanceDesc) *
return t
}
func (t *zoneAwareContextTracker) contextFor(instance *InstanceDesc) (context.Context, context.CancelFunc) {
func (t *zoneAwareContextTracker) contextFor(instance *InstanceDesc) (context.Context, context.CancelCauseFunc) {
return t.contexts[instance], t.cancelFuncs[instance]
}
func (t *zoneAwareContextTracker) cancelContextFor(instance *InstanceDesc) {
func (t *zoneAwareContextTracker) cancelContextFor(instance *InstanceDesc, cause error) {
// Why not create a per-zone parent context to make this easier?
// If we create a per-zone parent context, we'd need to have some way to cancel the per-zone context when the last of the individual
// contexts in a zone are cancelled using the context.CancelFunc returned from contextFor.
for i, cancel := range t.cancelFuncs {
if i.Zone == instance.Zone {
cancel()
cancel(cause)
delete(t.contexts, i)
delete(t.cancelFuncs, i)
}
}
}
func (t *zoneAwareContextTracker) cancelAllContexts() {
func (t *zoneAwareContextTracker) cancelAllContexts(cause error) {
for instance, cancel := range t.cancelFuncs {
cancel()
cancel(cause)
delete(t.contexts, instance)
delete(t.cancelFuncs, instance)
}

@ -75,6 +75,9 @@ type ReadRing interface {
// CleanupShuffleShardCache should delete cached shuffle-shard subrings for given identifier.
CleanupShuffleShardCache(identifier string)
// GetTokenRangesForInstance returns the token ranges owned by an instance in the ring
GetTokenRangesForInstance(instanceID string) (TokenRanges, error)
}
var (
@ -360,6 +363,26 @@ func (r *Ring) Get(key uint32, op Operation, bufDescs []InstanceDesc, bufHosts,
return ReplicationSet{}, ErrEmptyRing
}
instances, err := r.findInstancesForKey(key, op, bufDescs, bufHosts, bufZones, nil)
if err != nil {
return ReplicationSet{}, err
}
healthyInstances, maxFailure, err := r.strategy.Filter(instances, op, r.cfg.ReplicationFactor, r.cfg.HeartbeatTimeout, r.cfg.ZoneAwarenessEnabled)
if err != nil {
return ReplicationSet{}, err
}
return ReplicationSet{
Instances: healthyInstances,
MaxErrors: maxFailure,
}, nil
}
// Returns instances for given key and operation. Instances are not filtered through ReplicationStrategy.
// InstanceFilter can ignore uninteresting instances that would otherwise be part of the output, and can also stop search early.
// This function needs to be called with read lock on the ring.
func (r *Ring) findInstancesForKey(key uint32, op Operation, bufDescs []InstanceDesc, bufHosts []string, bufZones []string, instanceFilter func(instanceID string) (include, keepGoing bool)) ([]InstanceDesc, error) {
var (
n = r.cfg.ReplicationFactor
instances = bufDescs[:0]
@ -382,7 +405,7 @@ func (r *Ring) Get(key uint32, op Operation, bufDescs []InstanceDesc, bufHosts,
info, ok := r.ringInstanceByToken[token]
if !ok {
// This should never happen unless a bug in the ring code.
return ReplicationSet{}, ErrInconsistentTokensInfo
return nil, ErrInconsistentTokensInfo
}
// We want n *distinct* instances && distinct zones.
@ -410,18 +433,18 @@ func (r *Ring) Get(key uint32, op Operation, bufDescs []InstanceDesc, bufHosts,
distinctZones = append(distinctZones, info.Zone)
}
instances = append(instances, instance)
}
healthyInstances, maxFailure, err := r.strategy.Filter(instances, op, r.cfg.ReplicationFactor, r.cfg.HeartbeatTimeout, r.cfg.ZoneAwarenessEnabled)
if err != nil {
return ReplicationSet{}, err
include, keepGoing := true, true
if instanceFilter != nil {
include, keepGoing = instanceFilter(info.InstanceID)
}
if include {
instances = append(instances, instance)
}
if !keepGoing {
break
}
}
return ReplicationSet{
Instances: healthyInstances,
MaxErrors: maxFailure,
}, nil
return instances, nil
}
// GetAllHealthy implements ReadRing.
@ -1078,3 +1101,36 @@ func (op Operation) ShouldExtendReplicaSetOnState(s InstanceState) bool {
// All states are healthy, no states extend replica set.
var allStatesRingOperation = Operation(0x0000ffff)
// numberOfKeysOwnedByInstance returns how many of the supplied keys are owned by given instance.
func (r *Ring) numberOfKeysOwnedByInstance(keys []uint32, op Operation, instanceID string, bufDescs []InstanceDesc, bufHosts []string, bufZones []string) (int, error) {
r.mtx.RLock()
defer r.mtx.RUnlock()
if r.ringDesc == nil || len(r.ringTokens) == 0 {
return 0, ErrEmptyRing
}
// Instance is not in this ring, it can't own any key.
if _, ok := r.ringDesc.Ingesters[instanceID]; !ok {
return 0, nil
}
owned := 0
for _, tok := range keys {
i, err := r.findInstancesForKey(tok, op, bufDescs, bufHosts, bufZones, func(foundInstanceID string) (include, keepGoing bool) {
if foundInstanceID == instanceID {
// If we've found our instance, we can stop.
return true, false
}
return false, true
})
if err != nil {
return 0, err
}
if len(i) > 0 {
owned++
}
}
return owned, nil
}

@ -3,6 +3,7 @@ package ring
import (
"math/rand"
"sort"
"sync"
"time"
)
@ -21,10 +22,17 @@ type TokenGenerator interface {
CanJoinEnabled() bool
}
type RandomTokenGenerator struct{}
type RandomTokenGenerator struct {
m sync.Mutex
r *rand.Rand
}
func NewRandomTokenGenerator() *RandomTokenGenerator {
return &RandomTokenGenerator{}
return &RandomTokenGenerator{r: rand.New(rand.NewSource(time.Now().UnixNano()))}
}
func NewRandomTokenGeneratorWithSeed(seed int64) *RandomTokenGenerator {
return &RandomTokenGenerator{r: rand.New(rand.NewSource(seed))}
}
// GenerateTokens generates at most requestedTokensCount unique random tokens, none of which clashes with
@ -35,8 +43,6 @@ func (t *RandomTokenGenerator) GenerateTokens(requestedTokensCount int, allTaken
return []uint32{}
}
r := rand.New(rand.NewSource(time.Now().UnixNano()))
used := make(map[uint32]bool, len(allTakenTokens))
for _, v := range allTakenTokens {
used[v] = true
@ -44,7 +50,10 @@ func (t *RandomTokenGenerator) GenerateTokens(requestedTokensCount int, allTaken
tokens := make([]uint32, 0, requestedTokensCount)
for i := 0; i < requestedTokensCount; {
candidate := r.Uint32()
t.m.Lock()
candidate := t.r.Uint32()
t.m.Unlock()
if used[candidate] {
continue
}

@ -0,0 +1,153 @@
package ring
import (
"math"
"github.com/pkg/errors"
"golang.org/x/exp/slices" // using exp/slices until moving to go 1.21.
)
// TokenRanges describes token ranges owned by an instance.
// It consists of [start, end] pairs, where both start and end are inclusive.
// For example TokenRanges with values [5, 10, 20, 30] covers tokens [5..10] and [20..30].
type TokenRanges []uint32
func (tr TokenRanges) IncludesKey(key uint32) bool {
switch {
case len(tr) == 0:
return false
case key < tr[0]:
// key comes before the first range
return false
case key > tr[len(tr)-1]:
// key comes after the last range
return false
}
index, found := slices.BinarySearch(tr, key)
switch {
case found:
// ranges are closed
return true
case index%2 == 1:
// hash would be inserted after the start of a range (even index)
return true
default:
return false
}
}
func (tr TokenRanges) Equal(other TokenRanges) bool {
if len(tr) != len(other) {
return false
}
for i := 0; i < len(tr); i++ {
if tr[i] != other[i] {
return false
}
}
return true
}
// GetTokenRangesForInstance returns the token ranges owned by an instance in the ring.
//
// Current implementation only works with multizone setup, where number of zones is equal to replication factor.
func (r *Ring) GetTokenRangesForInstance(instanceID string) (TokenRanges, error) {
r.mtx.RLock()
defer r.mtx.RUnlock()
instance, ok := r.ringDesc.Ingesters[instanceID]
if !ok {
return nil, ErrInstanceNotFound
}
if instance.Zone == "" {
return nil, errors.New("zone not set")
}
rf := r.cfg.ReplicationFactor
numZones := len(r.ringTokensByZone)
// To simplify computation of token ranges, we currently only support case where zone-awareness is enabled,
// and replicaction factor is equal to number of zones.
if !r.cfg.ZoneAwarenessEnabled || rf != numZones {
// if zoneAwareness is disabled we need to treat the whole ring as one big zone, and we would
// need to walk the ring backwards looking for RF-1 tokens from other instances to determine the range.
return nil, errors.New("can't use ring configuration for computing token ranges")
}
// at this point zone-aware replication is enabled, and rf == numZones
// this means that we will write to one replica in each zone, so we can just consider the zonal ring for our instance
subringTokens, ok := r.ringTokensByZone[instance.Zone]
if !ok || len(subringTokens) == 0 {
return nil, errors.New("no tokens for zone")
}
// 1 range (2 values) per token + one additional if we need to split the rollover range.
ranges := make(TokenRanges, 0, 2*(len(instance.Tokens)+1))
// non-zero value means we're now looking for start of the range. Zero value means we're looking for next end of range (ie. token owned by this instance).
rangeEnd := uint32(0)
// if this instance claimed the first token, it owns the wrap-around range, which we'll break into two separate ranges
firstToken := subringTokens[0]
firstTokenInfo, ok := r.ringInstanceByToken[firstToken]
if !ok {
// This should never happen unless there's a bug in the ring code.
return nil, ErrInconsistentTokensInfo
}
if firstTokenInfo.InstanceID == instanceID {
// we'll start by looking for the beginning of the range that ends with math.MaxUint32
rangeEnd = math.MaxUint32
}
// walk the ring backwards, alternating looking for ends and starts of ranges
for i := len(subringTokens) - 1; i > 0; i-- {
token := subringTokens[i]
info, ok := r.ringInstanceByToken[token]
if !ok {
// This should never happen unless a bug in the ring code.
return nil, ErrInconsistentTokensInfo
}
if rangeEnd == 0 {
// we're looking for the end of the next range
if info.InstanceID == instanceID {
rangeEnd = token - 1
}
} else {
// we have a range end, and are looking for the start of the range
if info.InstanceID != instanceID {
ranges = append(ranges, rangeEnd, token)
rangeEnd = 0
}
}
}
// finally look at the first token again
// - if we have a range end, check if we claimed token 0
// - if we don't, we have our start
// - if we do, the start is 0
// - if we don't have a range end, check if we claimed token 0
// - if we don't, do nothing
// - if we do, add the range of [0, token-1]
// - BUT, if the token itself is 0, do nothing, because we don't own the tokens themselves (we should be covered by the already added range that ends with MaxUint32)
if rangeEnd == 0 {
if firstTokenInfo.InstanceID == instanceID && firstToken != 0 {
ranges = append(ranges, firstToken-1, 0)
}
} else {
if firstTokenInfo.InstanceID == instanceID {
ranges = append(ranges, rangeEnd, 0)
} else {
ranges = append(ranges, rangeEnd, firstToken)
}
}
// Ensure returned ranges are sorted.
slices.Sort(ranges)
return ranges, nil
}

@ -17,6 +17,8 @@ import (
"strings"
"time"
_ "github.com/grafana/pyroscope-go/godeltaprof/http/pprof" // anonymous import to get godelatprof handlers registered
gokit_log "github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/gorilla/mux"
@ -92,10 +94,11 @@ type Config struct {
HTTPTLSConfig TLSConfig `yaml:"http_tls_config"`
GRPCTLSConfig TLSConfig `yaml:"grpc_tls_config"`
RegisterInstrumentation bool `yaml:"register_instrumentation"`
ReportGRPCCodesInInstrumentationLabel bool `yaml:"report_grpc_codes_in_instrumentation_label_enabled"`
ExcludeRequestInLog bool `yaml:"-"`
DisableRequestSuccessLog bool `yaml:"-"`
RegisterInstrumentation bool `yaml:"register_instrumentation"`
ReportGRPCCodesInInstrumentationLabel bool `yaml:"report_grpc_codes_in_instrumentation_label_enabled"`
ReportHTTP4XXCodesInInstrumentationLabel bool `yaml:"-"`
ExcludeRequestInLog bool `yaml:"-"`
DisableRequestSuccessLog bool `yaml:"-"`
ServerGracefulShutdownTimeout time.Duration `yaml:"graceful_shutdown_timeout"`
HTTPServerReadTimeout time.Duration `yaml:"http_server_read_timeout"`
@ -350,6 +353,22 @@ func newServer(cfg Config, metrics *Metrics) (*Server, error) {
level.Info(logger).Log("msg", "server listening on addresses", "http", httpListener.Addr(), "grpc", grpcListener.Addr())
// Setup HTTP server
var router *mux.Router
if cfg.Router != nil {
router = cfg.Router
} else {
router = mux.NewRouter()
}
if cfg.PathPrefix != "" {
// Expect metrics and pprof handlers to be prefixed with server's path prefix.
// e.g. /loki/metrics or /loki/debug/pprof
router = router.PathPrefix(cfg.PathPrefix).Subrouter()
}
if cfg.RegisterInstrumentation {
RegisterInstrumentationWithGatherer(router, gatherer)
}
// Setup gRPC server
serverLog := middleware.GRPCServerLog{
Log: logger,
@ -363,6 +382,7 @@ func newServer(cfg Config, metrics *Metrics) (*Server, error) {
grpcMiddleware := []grpc.UnaryServerInterceptor{
serverLog.UnaryServerInterceptor,
otgrpc.OpenTracingServerInterceptor(opentracing.GlobalTracer()),
middleware.HTTPGRPCTracingInterceptor(router), // This must appear after the OpenTracingServerInterceptor.
middleware.UnaryServerInstrumentInterceptor(metrics.RequestDuration, reportGRPCStatusesOptions...),
}
grpcMiddleware = append(grpcMiddleware, cfg.GRPCMiddleware...)
@ -419,22 +439,6 @@ func newServer(cfg Config, metrics *Metrics) (*Server, error) {
grpcServer := grpc.NewServer(grpcOptions...)
grpcOnHTTPServer := grpc.NewServer(grpcOptions...)
// Setup HTTP server
var router *mux.Router
if cfg.Router != nil {
router = cfg.Router
} else {
router = mux.NewRouter()
}
if cfg.PathPrefix != "" {
// Expect metrics and pprof handlers to be prefixed with server's path prefix.
// e.g. /loki/metrics or /loki/debug/pprof
router = router.PathPrefix(cfg.PathPrefix).Subrouter()
}
if cfg.RegisterInstrumentation {
RegisterInstrumentationWithGatherer(router, gatherer)
}
sourceIPs, err := middleware.NewSourceIPs(cfg.LogSourceIPsHeader, cfg.LogSourceIPsRegex)
if err != nil {
return nil, fmt.Errorf("error setting up source IP extraction: %v", err)
@ -547,9 +551,12 @@ func (s *Server) Run() error {
}
}()
// Setup gRPC server
// for HTTP over gRPC, ensure we don't double-count the middleware
httpgrpc.RegisterHTTPServer(s.GRPC, httpgrpc_server.NewServer(s.HTTP))
serverOptions := make([]httpgrpc_server.Option, 0, 1)
if s.cfg.ReportHTTP4XXCodesInInstrumentationLabel {
serverOptions = append(serverOptions, httpgrpc_server.WithReturn4XXErrors)
}
// Setup gRPC server for HTTP over gRPC, ensure we don't double-count the middleware
httpgrpc.RegisterHTTPServer(s.GRPC, httpgrpc_server.NewServer(s.HTTP, serverOptions...))
go func() {
err := s.GRPC.Serve(s.grpcListener)

@ -2,20 +2,11 @@ package tenant
import (
"context"
"errors"
"net/http"
"strings"
"github.com/grafana/dskit/user"
)
var defaultResolver Resolver = NewSingleResolver()
// WithDefaultResolver updates the resolver used for the package methods.
func WithDefaultResolver(r Resolver) {
defaultResolver = r
}
// TenantID returns exactly a single tenant ID from the context. It should be
// used when a certain endpoint should only support exactly a single
// tenant ID. It returns an error user.ErrNoOrgID if there is no tenant ID
@ -25,7 +16,16 @@ func WithDefaultResolver(r Resolver) {
//
//nolint:revive
func TenantID(ctx context.Context) (string, error) {
return defaultResolver.TenantID(ctx)
orgIDs, err := TenantIDs(ctx)
if err != nil {
return "", err
}
if len(orgIDs) > 1 {
return "", user.ErrTooManyOrgIDs
}
return orgIDs[0], nil
}
// TenantIDs returns all tenant IDs from the context. It should return
@ -36,7 +36,20 @@ func TenantID(ctx context.Context) (string, error) {
//
//nolint:revive
func TenantIDs(ctx context.Context) ([]string, error) {
return defaultResolver.TenantIDs(ctx)
//lint:ignore faillint wrapper around upstream method
orgID, err := user.ExtractOrgID(ctx)
if err != nil {
return nil, err
}
orgIDs := strings.Split(orgID, tenantIDsSeparator)
for _, id := range orgIDs {
if err := ValidTenantID(id); err != nil {
return nil, err
}
}
return NormalizeTenantIDs(orgIDs), nil
}
type Resolver interface {
@ -52,109 +65,20 @@ type Resolver interface {
TenantIDs(context.Context) ([]string, error)
}
// NewSingleResolver creates a tenant resolver, which restricts all requests to
// be using a single tenant only. This allows a wider set of characters to be
// used within the tenant ID and should not impose a breaking change.
func NewSingleResolver() *SingleResolver {
return &SingleResolver{}
}
type SingleResolver struct {
}
// containsUnsafePathSegments will return true if the string is a directory
// reference like `.` and `..` or if any path separator character like `/` and
// `\` can be found.
func containsUnsafePathSegments(id string) bool {
// handle the relative reference to current and parent path.
if id == "." || id == ".." {
return true
}
return strings.ContainsAny(id, "\\/")
}
var errInvalidTenantID = errors.New("invalid tenant ID")
func (t *SingleResolver) TenantID(ctx context.Context) (string, error) {
//lint:ignore faillint wrapper around upstream method
id, err := user.ExtractOrgID(ctx)
if err != nil {
return "", err
}
if containsUnsafePathSegments(id) {
return "", errInvalidTenantID
}
return id, nil
}
func (t *SingleResolver) TenantIDs(ctx context.Context) ([]string, error) {
orgID, err := t.TenantID(ctx)
if err != nil {
return nil, err
}
return []string{orgID}, err
}
type MultiResolver struct {
}
type MultiResolver struct{}
// NewMultiResolver creates a tenant resolver, which allows request to have
// multiple tenant ids submitted separated by a '|' character. This enforces
// further limits on the character set allowed within tenants as detailed here:
// https://cortexmetrics.io/docs/guides/limitations/#tenant-id-naming)
// https://grafana.com/docs/mimir/latest/configure/about-tenant-ids/
func NewMultiResolver() *MultiResolver {
return &MultiResolver{}
}
func (t *MultiResolver) TenantID(ctx context.Context) (string, error) {
orgIDs, err := t.TenantIDs(ctx)
if err != nil {
return "", err
}
if len(orgIDs) > 1 {
return "", user.ErrTooManyOrgIDs
}
return orgIDs[0], nil
return TenantID(ctx)
}
func (t *MultiResolver) TenantIDs(ctx context.Context) ([]string, error) {
//lint:ignore faillint wrapper around upstream method
orgID, err := user.ExtractOrgID(ctx)
if err != nil {
return nil, err
}
orgIDs := strings.Split(orgID, tenantIDsLabelSeparator)
for _, orgID := range orgIDs {
if err := ValidTenantID(orgID); err != nil {
return nil, err
}
if containsUnsafePathSegments(orgID) {
return nil, errInvalidTenantID
}
}
return NormalizeTenantIDs(orgIDs), nil
}
// ExtractTenantIDFromHTTPRequest extracts a single TenantID through a given
// resolver directly from a HTTP request.
func ExtractTenantIDFromHTTPRequest(req *http.Request) (string, context.Context, error) {
//lint:ignore faillint wrapper around upstream method
_, ctx, err := user.ExtractOrgIDFromHTTPRequest(req)
if err != nil {
return "", nil, err
}
tenantID, err := defaultResolver.TenantID(ctx)
if err != nil {
return "", nil, err
}
return tenantID, ctx, nil
return TenantIDs(ctx)
}

@ -4,14 +4,23 @@ import (
"context"
"errors"
"fmt"
"net/http"
"sort"
"strings"
"github.com/grafana/dskit/user"
)
const (
// MaxTenantIDLength is the max length of single tenant ID in bytes
MaxTenantIDLength = 150
tenantIDsSeparator = "|"
)
var (
errTenantIDTooLong = errors.New("tenant ID is too long: max 150 characters")
errTenantIDTooLong = fmt.Errorf("tenant ID is too long: max %d characters", MaxTenantIDLength)
errUnsafeTenantID = errors.New("tenant ID is '.' or '..'")
)
type errTenantIDUnsupportedCharacter struct {
@ -27,9 +36,7 @@ func (e *errTenantIDUnsupportedCharacter) Error() string {
)
}
const tenantIDsLabelSeparator = "|"
// NormalizeTenantIDs is creating a normalized form by sortiing and de-duplicating the list of tenantIDs
// NormalizeTenantIDs creates a normalized form by sorting and de-duplicating the list of tenantIDs
func NormalizeTenantIDs(tenantIDs []string) []string {
sort.Strings(tenantIDs)
@ -49,7 +56,7 @@ func NormalizeTenantIDs(tenantIDs []string) []string {
return tenantIDs[0:posOut]
}
// ValidTenantID
// ValidTenantID returns an error if the single tenant ID is invalid, nil otherwise
func ValidTenantID(s string) error {
// check if it contains invalid runes
for pos, r := range s {
@ -61,19 +68,49 @@ func ValidTenantID(s string) error {
}
}
if len(s) > 150 {
if len(s) > MaxTenantIDLength {
return errTenantIDTooLong
}
if containsUnsafePathSegments(s) {
return errUnsafeTenantID
}
return nil
}
// JoinTenantIDs returns all tenant IDs concatenated with the separator character `|`
func JoinTenantIDs(tenantIDs []string) string {
return strings.Join(tenantIDs, tenantIDsLabelSeparator)
return strings.Join(tenantIDs, tenantIDsSeparator)
}
// ExtractTenantIDFromHTTPRequest extracts a single tenant ID directly from a HTTP request.
func ExtractTenantIDFromHTTPRequest(req *http.Request) (string, context.Context, error) {
//lint:ignore faillint wrapper around upstream method
_, ctx, err := user.ExtractOrgIDFromHTTPRequest(req)
if err != nil {
return "", nil, err
}
tenantID, err := TenantID(ctx)
if err != nil {
return "", nil, err
}
return tenantID, ctx, nil
}
// TenantIDsFromOrgID extracts different tenants from an orgID string value
//
// ignore stutter warning
//
//nolint:revive
func TenantIDsFromOrgID(orgID string) ([]string, error) {
return TenantIDs(user.InjectOrgID(context.TODO(), orgID))
}
// this checks if a rune is supported in tenant IDs (according to
// https://cortexmetrics.io/docs/guides/limitations/#tenant-id-naming)
// https://grafana.com/docs/mimir/latest/configure/about-tenant-ids/
func isSupported(c rune) bool {
// characters
if ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') {
@ -96,11 +133,8 @@ func isSupported(c rune) bool {
c == ')'
}
// TenantIDsFromOrgID extracts different tenants from an orgID string value
//
// ignore stutter warning
//
//nolint:revive
func TenantIDsFromOrgID(orgID string) ([]string, error) {
return TenantIDs(user.InjectOrgID(context.TODO(), orgID))
// containsUnsafePathSegments will return true if the string is a directory
// reference like `.` and `..`
func containsUnsafePathSegments(id string) bool {
return id == "." || id == ".."
}

@ -0,0 +1,203 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright 2020 Pyroscope
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

@ -0,0 +1,98 @@
# godeltaprof
godeltaprof is an efficient delta profiler for memory, mutex, and block.
# Why
In Golang, allocation, mutex and block profiles are cumulative. They only grow over time and show allocations that happened since the beginning of the running program.
Not only values grow, but the size of the profile itself grows as well. It could grow up to megabytes in size for long-running processes. These megabytes profiles are called huge profiles in this document.
In many cases, it's more useful to see the differences between two points in time.
You can use the original runtime/pprof package, called a delta profile, to see these differences.
Using the delta profile requires passing seconds argument to the pprof endpoint query.
```
go tool pprof http://localhost:6060/debug/pprof/heap?seconds=30
```
What this does:
1. Dump profile `p0`
2. Sleep
3. Dump profile `p1`
4. Decompress and parse protobuf `p0`
5. Decompress and parse protobuf `p1`
6. Subtract `p0` from `p1`
7. Serialize protobuf and compress the result
The resulting profile is *usually* much smaller (`p0` may be megabytes, while result is usually tens of kilobytes).
There are number of issues with this approach:
1. Heap profile contains both allocation values and in-use values. In-use values are not cumulative. In-use values are corrupted by the subtraction.
**Note:** It can be fixed if runtime/pprof package uses `p0.ScaleN([]float64{-1,-1,0,0})`, instead of `p0.Scale(-1)` - that would subtract allocation values and zero out in-use values in `p0`.
2. It requires dumping two profiles.
3. It produces a lot of allocations putting pressure on GC.
## DataDog's fastdelta
DataDog's [fastdelta profiler](https://github.com/DataDog/dd-trace-go/blob/30e1406c2cb62af749df03d559853e1d1de0e3bf/profiler/internal/fastdelta/fd.go#L75) uses another approach.
It improves the runtime/pprof approach by keeping a copy of the previous profile and subtracting the current profile from it.
The fastdelta profiler uses a custom protobuf pprof parser that doesn't allocate as much memory.
This approach is more efficient, faster, and produces less garbage. It also doesn't require using two profiles.
However, the fastdelta profiler still parses huge profiles up to megabytes, just to discard most of it.
## godeltaprof
godeltaprof does a similar job but slightly differently.
Delta computation happens before serializing any pprof files using `runtime.MemprofileRecord` and `BlockProfileRecord`.
This way, huge profiles don't need to be parsed. The delta is computed on raw records, all zeros are rejected, and results are serialized and compressed.
The source code for godeltaprof is based (forked) on the original [runtime/pprof package](https://github.com/golang/go/tree/master/src/runtime/pprof).
godeltaprof is modified to include delta computation before serialization and to expose the new endpoints.
There are other small improvements and benefits:
- Using `github.com/klauspost/compress/gzip` instead of `compress/gzip`
- Optional lazy mappings reading (they don't change over time for most applications)
- Separate package from runtime, so updated independently
# benchmarks
These benchmarks used memory profiles from the [pyroscope](https://github.com/grafana/pyroscope) server.
BenchmarkOG - dumps memory profile with runtime/pprof package
BenchmarkFastDelta - dumps memory profile with runtime/pprof package and computes delta using fastdelta
BenchmarkGodeltaprof - does not dump profile with runtime/pprof, computes delta, outputs it results
Each benchmark also outputs produced profile sizes.
```
BenchmarkOG
63 181862189 ns/op
profile sizes: [209117 209107 209077 209089 209095 209076 209088 209082 209090 209092]
BenchmarkFastDelta
43 273936764 ns/op
profile sizes: [169300 10815 8969 9511 9752 9376 9545 8959 10357 9536]
BenchmarkGodeltaprof
366 31148264 ns/op
profile sizes: [208898 11485 9347 9967 10291 9848 10085 9285 11033 9986]
```
Notice how BenchmarkOG profiles sizes are ~200k and BenchmarkGodeltaprof and BenchmarkFastDelta are ~10k - that is because a lof of samples
with zero values are discarded after delta computation.
Source code of benchmarks could be found [here](https://github.com/grafana/pyroscope/compare/godeltaprofbench?expand=1)
CPU profiles: [BenchmarkOG](https://flamegraph.com/share/a8f68312-98c7-11ee-a502-466f68d203a5), [BenchmarkFastDelta](https://flamegraph.com/share/c23821f3-98c7-11ee-a502-466f68d203a5), [BenchmarkGodeltaprof]( https://flamegraph.com/share/ea66df36-98c7-11ee-9a0d-f2c25703e557)
# upstreaming
TODO(korniltsev): create golang issue and ask if godeltaprof is something that could be considered merging to upstream golang repo
in some way(maybe not as is, maybe with different APIs)

@ -0,0 +1,119 @@
package godeltaprof
import (
"io"
"runtime"
"sort"
"sync"
"github.com/grafana/pyroscope-go/godeltaprof/internal/pprof"
)
// BlockProfiler is a stateful profiler for goroutine blocking events and mutex contention in Go programs.
// Depending on the function used to create the BlockProfiler, it uses either runtime.BlockProfile or runtime.MutexProfile.
// The BlockProfiler provides similar functionality to pprof.Lookup("block").WriteTo and pprof.Lookup("mutex").WriteTo,
// but with some key differences.
//
// The BlockProfiler tracks the delta of blocking events or mutex contention since the last
// profile was written, effectively providing a snapshot of the changes
// between two points in time. This is in contrast to the
// pprof.Lookup functions, which accumulate profiling data
// and result in profiles that represent the entire lifetime of the program.
//
// The BlockProfiler is safe for concurrent use, as it serializes access to
// its internal state using a sync.Mutex. This ensures that multiple goroutines
// can call the Profile method without causing any data race issues.
type BlockProfiler struct {
impl pprof.DeltaMutexProfiler
mutex sync.Mutex
runtimeProfile func([]runtime.BlockProfileRecord) (int, bool)
scaleProfile pprof.MutexProfileScaler
}
// NewMutexProfiler creates a new BlockProfiler instance for profiling mutex contention.
// The resulting BlockProfiler uses runtime.MutexProfile as its data source.
//
// Usage:
//
// mp := godeltaprof.NewMutexProfiler()
// ...
// err := mp.Profile(someWriter)
func NewMutexProfiler() *BlockProfiler {
return &BlockProfiler{
runtimeProfile: runtime.MutexProfile,
scaleProfile: pprof.ScalerMutexProfile,
impl: pprof.DeltaMutexProfiler{
Options: pprof.ProfileBuilderOptions{
GenericsFrames: true,
LazyMapping: true,
},
},
}
}
func NewMutexProfilerWithOptions(options ProfileOptions) *BlockProfiler {
return &BlockProfiler{
runtimeProfile: runtime.MutexProfile,
scaleProfile: pprof.ScalerMutexProfile,
impl: pprof.DeltaMutexProfiler{
Options: pprof.ProfileBuilderOptions{
GenericsFrames: options.GenericsFrames,
LazyMapping: options.LazyMappings,
},
},
}
}
// NewBlockProfiler creates a new BlockProfiler instance for profiling goroutine blocking events.
// The resulting BlockProfiler uses runtime.BlockProfile as its data source.
//
// Usage:
//
// bp := godeltaprof.NewBlockProfiler()
// ...
// err := bp.Profile(someWriter)
func NewBlockProfiler() *BlockProfiler {
return &BlockProfiler{
runtimeProfile: runtime.BlockProfile,
scaleProfile: pprof.ScalerBlockProfile,
impl: pprof.DeltaMutexProfiler{
Options: pprof.ProfileBuilderOptions{
GenericsFrames: true,
LazyMapping: true,
},
},
}
}
func NewBlockProfilerWithOptions(options ProfileOptions) *BlockProfiler {
return &BlockProfiler{
runtimeProfile: runtime.BlockProfile,
scaleProfile: pprof.ScalerBlockProfile,
impl: pprof.DeltaMutexProfiler{
Options: pprof.ProfileBuilderOptions{
GenericsFrames: options.GenericsFrames,
LazyMapping: options.LazyMappings,
},
},
}
}
func (d *BlockProfiler) Profile(w io.Writer) error {
d.mutex.Lock()
defer d.mutex.Unlock()
var p []runtime.BlockProfileRecord
n, ok := d.runtimeProfile(nil)
for {
p = make([]runtime.BlockProfileRecord, n+50)
n, ok = d.runtimeProfile(p)
if ok {
p = p[:n]
break
}
}
sort.Slice(p, func(i, j int) bool { return p[i].Cycles > p[j].Cycles })
return d.impl.PrintCountCycleProfile(w, "contentions", "delay", d.scaleProfile, p)
}

@ -0,0 +1,81 @@
package godeltaprof
import (
"io"
"runtime"
"sync"
"github.com/grafana/pyroscope-go/godeltaprof/internal/pprof"
)
// HeapProfiler is a stateful profiler for heap allocations in Go programs.
// It is based on runtime.MemProfile and provides similar functionality to
// pprof.WriteHeapProfile, but with some key differences.
//
// The HeapProfiler tracks the delta of heap allocations since the last
// profile was written, effectively providing a snapshot of the changes
// in heap usage between two points in time. This is in contrast to the
// pprof.WriteHeapProfile function, which accumulates profiling data
// and results in profiles that represent the entire lifetime of the program.
//
// The HeapProfiler is safe for concurrent use, as it serializes access to
// its internal state using a sync.Mutex. This ensures that multiple goroutines
// can call the Profile method without causing any data race issues.
//
// Usage:
//
// hp := godeltaprof.NewHeapProfiler()
// ...
// err := hp.Profile(someWriter)
type HeapProfiler struct {
impl pprof.DeltaHeapProfiler
mutex sync.Mutex
}
func NewHeapProfiler() *HeapProfiler {
return &HeapProfiler{
impl: pprof.DeltaHeapProfiler{
Options: pprof.ProfileBuilderOptions{
GenericsFrames: true,
LazyMapping: true,
},
}}
}
func NewHeapProfilerWithOptions(options ProfileOptions) *HeapProfiler {
return &HeapProfiler{
impl: pprof.DeltaHeapProfiler{
Options: pprof.ProfileBuilderOptions{
GenericsFrames: options.GenericsFrames,
LazyMapping: options.LazyMappings,
},
}}
}
func (d *HeapProfiler) Profile(w io.Writer) error {
d.mutex.Lock()
defer d.mutex.Unlock()
// Find out how many records there are (MemProfile(nil, true)),
// allocate that many records, and get the data.
// There's a race—more records might be added between
// the two calls—so allocate a few extra records for safety
// and also try again if we're very unlucky.
// The loop should only execute one iteration in the common case.
var p []runtime.MemProfileRecord
n, ok := runtime.MemProfile(nil, true)
for {
// Allocate room for a slightly bigger profile,
// in case a few more entries have been added
// since the call to MemProfile.
p = make([]runtime.MemProfileRecord, n+50)
n, ok = runtime.MemProfile(p, true)
if ok {
p = p[0:n]
break
}
// Profile grew; try again.
}
return d.impl.WriteHeapProto(w, p, int64(runtime.MemProfileRate), "")
}

@ -0,0 +1,50 @@
package pprof
import (
"fmt"
"io"
"net/http"
"runtime"
"strconv"
"github.com/grafana/pyroscope-go/godeltaprof"
)
var (
deltaHeapProfiler = godeltaprof.NewHeapProfiler()
deltaBlockProfiler = godeltaprof.NewBlockProfiler()
deltaMutexProfiler = godeltaprof.NewMutexProfiler()
)
type deltaProfiler interface {
Profile(w io.Writer) error
}
func init() {
http.HandleFunc("/debug/pprof/delta_heap", Heap)
http.HandleFunc("/debug/pprof/delta_block", Block)
http.HandleFunc("/debug/pprof/delta_mutex", Mutex)
}
func Heap(w http.ResponseWriter, r *http.Request) {
gc, _ := strconv.Atoi(r.FormValue("gc"))
if gc > 0 {
runtime.GC()
}
writeDeltaProfile(deltaHeapProfiler, "heap", w)
}
func Block(w http.ResponseWriter, r *http.Request) {
writeDeltaProfile(deltaBlockProfiler, "block", w)
}
func Mutex(w http.ResponseWriter, r *http.Request) {
writeDeltaProfile(deltaMutexProfiler, "mutex", w)
}
func writeDeltaProfile(p deltaProfiler, name string, w http.ResponseWriter) {
w.Header().Set("X-Content-Type-Options", "nosniff")
w.Header().Set("Content-Type", "application/octet-stream")
w.Header().Set("Content-Disposition", fmt.Sprintf(`attachment; filename="%s.pprof.gz"`, name))
_ = p.Profile(w)
}

@ -0,0 +1,118 @@
package pprof
import (
"io"
"math"
"runtime"
"strings"
)
type DeltaHeapProfiler struct {
m profMap
mem []memMap
Options ProfileBuilderOptions
}
// WriteHeapProto writes the current heap profile in protobuf format to w.
func (d *DeltaHeapProfiler) WriteHeapProto(w io.Writer, p []runtime.MemProfileRecord, rate int64, defaultSampleType string) error {
if d.mem == nil || !d.Options.LazyMapping {
d.mem = readMapping()
}
b := newProfileBuilder(w, d.Options, d.mem)
b.pbValueType(tagProfile_PeriodType, "space", "bytes")
b.pb.int64Opt(tagProfile_Period, rate)
b.pbValueType(tagProfile_SampleType, "alloc_objects", "count")
b.pbValueType(tagProfile_SampleType, "alloc_space", "bytes")
b.pbValueType(tagProfile_SampleType, "inuse_objects", "count")
b.pbValueType(tagProfile_SampleType, "inuse_space", "bytes")
if defaultSampleType != "" {
b.pb.int64Opt(tagProfile_DefaultSampleType, b.stringIndex(defaultSampleType))
}
values := []int64{0, 0, 0, 0}
var locs []uint64
for _, r := range p {
// do the delta
if r.AllocBytes == 0 && r.AllocObjects == 0 && r.FreeObjects == 0 && r.FreeBytes == 0 {
// it is a fresh bucket and it will be published after next 1-2 gc cycles
continue
}
var blockSize int64
if r.AllocObjects > 0 {
blockSize = r.AllocBytes / r.AllocObjects
}
entry := d.m.Lookup(r.Stack(), uintptr(blockSize))
if (r.AllocObjects - entry.count.v1) < 0 {
continue
}
AllocObjects := r.AllocObjects - entry.count.v1
AllocBytes := r.AllocBytes - entry.count.v2
entry.count.v1 = r.AllocObjects
entry.count.v2 = r.AllocBytes
values[0], values[1] = scaleHeapSample(AllocObjects, AllocBytes, rate)
values[2], values[3] = scaleHeapSample(r.InUseObjects(), r.InUseBytes(), rate)
if values[0] == 0 && values[1] == 0 && values[2] == 0 && values[3] == 0 {
continue
}
hideRuntime := true
for tries := 0; tries < 2; tries++ {
stk := r.Stack()
// For heap profiles, all stack
// addresses are return PCs, which is
// what appendLocsForStack expects.
if hideRuntime {
for i, addr := range stk {
if f := runtime.FuncForPC(addr); f != nil && strings.HasPrefix(f.Name(), "runtime.") {
continue
}
// Found non-runtime. Show any runtime uses above it.
stk = stk[i:]
break
}
}
locs = b.appendLocsForStack(locs[:0], stk)
if len(locs) > 0 {
break
}
hideRuntime = false // try again, and show all frames next time.
}
b.pbSample(values, locs, func() {
if blockSize != 0 {
b.pbLabel(tagSample_Label, "bytes", "", blockSize)
}
})
}
b.build()
return nil
}
// scaleHeapSample adjusts the data from a heap Sample to
// account for its probability of appearing in the collected
// data. heap profiles are a sampling of the memory allocations
// requests in a program. We estimate the unsampled value by dividing
// each collected sample by its probability of appearing in the
// profile. heap profiles rely on a poisson process to determine
// which samples to collect, based on the desired average collection
// rate R. The probability of a sample of size S to appear in that
// profile is 1-exp(-S/R).
func scaleHeapSample(count, size, rate int64) (int64, int64) {
if count == 0 || size == 0 {
return 0, 0
}
if rate <= 1 {
// if rate==1 all samples were collected so no adjustment is needed.
// if rate<1 treat as unknown and skip scaling.
return count, size
}
avgSize := float64(size) / float64(count)
scale := 1 / (1 - math.Exp(-avgSize/float64(rate)))
return int64(float64(count) * scale), int64(float64(size) * scale)
}

@ -0,0 +1,59 @@
package pprof
import (
"io"
"runtime"
)
type DeltaMutexProfiler struct {
m profMap
mem []memMap
Options ProfileBuilderOptions
}
// PrintCountCycleProfile outputs block profile records (for block or mutex profiles)
// as the pprof-proto format output. Translations from cycle count to time duration
// are done because The proto expects count and time (nanoseconds) instead of count
// and the number of cycles for block, contention profiles.
// Possible 'scaler' functions are scaleBlockProfile and scaleMutexProfile.
func (d *DeltaMutexProfiler) PrintCountCycleProfile(w io.Writer, countName, cycleName string, scaler MutexProfileScaler, records []runtime.BlockProfileRecord) error {
if d.mem == nil || !d.Options.LazyMapping {
d.mem = readMapping()
}
// Output profile in protobuf form.
b := newProfileBuilder(w, d.Options, d.mem)
b.pbValueType(tagProfile_PeriodType, countName, "count")
b.pb.int64Opt(tagProfile_Period, 1)
b.pbValueType(tagProfile_SampleType, countName, "count")
b.pbValueType(tagProfile_SampleType, cycleName, "nanoseconds")
cpuGHz := float64(runtime_cyclesPerSecond()) / 1e9
values := []int64{0, 0}
var locs []uint64
for _, r := range records {
count, nanosec := ScaleMutexProfile(scaler, r.Count, float64(r.Cycles)/cpuGHz)
inanosec := int64(nanosec)
// do the delta
entry := d.m.Lookup(r.Stack(), 0)
values[0] = count - entry.count.v1
values[1] = inanosec - entry.count.v2
entry.count.v1 = count
entry.count.v2 = inanosec
if values[0] < 0 || values[1] < 0 {
continue
}
if values[0] == 0 && values[1] == 0 {
continue
}
// For count profiles, all stack addresses are
// return PCs, which is what appendLocsForStack expects.
locs = b.appendLocsForStack(locs[:0], r.Stack())
b.pbSample(values, locs, nil)
}
b.build()
return nil
}

@ -0,0 +1,109 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package pprof
import (
"encoding/binary"
"errors"
"fmt"
"os"
)
var (
errBadELF = errors.New("malformed ELF binary")
errNoBuildID = errors.New("no NT_GNU_BUILD_ID found in ELF binary")
)
// elfBuildID returns the GNU build ID of the named ELF binary,
// without introducing a dependency on debug/elf and its dependencies.
func elfBuildID(file string) (string, error) {
buf := make([]byte, 256)
f, err := os.Open(file)
if err != nil {
return "", err
}
defer f.Close()
if _, err := f.ReadAt(buf[:64], 0); err != nil {
return "", err
}
// ELF file begins with \x7F E L F.
if buf[0] != 0x7F || buf[1] != 'E' || buf[2] != 'L' || buf[3] != 'F' {
return "", errBadELF
}
var byteOrder binary.ByteOrder
switch buf[5] {
default:
return "", errBadELF
case 1: // little-endian
byteOrder = binary.LittleEndian
case 2: // big-endian
byteOrder = binary.BigEndian
}
var shnum int
var shoff, shentsize int64
switch buf[4] {
default:
return "", errBadELF
case 1: // 32-bit file header
shoff = int64(byteOrder.Uint32(buf[32:]))
shentsize = int64(byteOrder.Uint16(buf[46:]))
if shentsize != 40 {
return "", errBadELF
}
shnum = int(byteOrder.Uint16(buf[48:]))
case 2: // 64-bit file header
shoff = int64(byteOrder.Uint64(buf[40:]))
shentsize = int64(byteOrder.Uint16(buf[58:]))
if shentsize != 64 {
return "", errBadELF
}
shnum = int(byteOrder.Uint16(buf[60:]))
}
for i := 0; i < shnum; i++ {
if _, err := f.ReadAt(buf[:shentsize], shoff+int64(i)*shentsize); err != nil {
return "", err
}
if typ := byteOrder.Uint32(buf[4:]); typ != 7 { // SHT_NOTE
continue
}
var off, size int64
if shentsize == 40 {
// 32-bit section header
off = int64(byteOrder.Uint32(buf[16:]))
size = int64(byteOrder.Uint32(buf[20:]))
} else {
// 64-bit section header
off = int64(byteOrder.Uint64(buf[24:]))
size = int64(byteOrder.Uint64(buf[32:]))
}
size += off
for off < size {
if _, err := f.ReadAt(buf[:16], off); err != nil { // room for header + name GNU\x00
return "", err
}
nameSize := int(byteOrder.Uint32(buf[0:]))
descSize := int(byteOrder.Uint32(buf[4:]))
noteType := int(byteOrder.Uint32(buf[8:]))
descOff := off + int64(12+(nameSize+3)&^3)
off = descOff + int64((descSize+3)&^3)
if nameSize != 4 || noteType != 3 || buf[12] != 'G' || buf[13] != 'N' || buf[14] != 'U' || buf[15] != '\x00' { // want name GNU\x00 type 3 (NT_GNU_BUILD_ID)
continue
}
if descSize > len(buf) {
return "", errBadELF
}
if _, err := f.ReadAt(buf[:descSize], descOff); err != nil {
return "", err
}
return fmt.Sprintf("%x", buf[:descSize]), nil
}
}
return "", errNoBuildID
}

@ -0,0 +1,18 @@
//go:build go1.16 && !go1.17
// +build go1.16,!go1.17
package pprof
import (
"compress/gzip"
"io"
)
type gzipWriter struct {
*gzip.Writer
}
func newGzipWriter(w io.Writer) gzipWriter {
zw, _ := gzip.NewWriterLevel(w, gzip.BestSpeed)
return gzipWriter{zw}
}

@ -0,0 +1,19 @@
//go:build go1.17
// +build go1.17
package pprof
import (
"io"
"github.com/klauspost/compress/gzip"
)
type gzipWriter struct {
*gzip.Writer
}
func newGzipWriter(w io.Writer) gzipWriter {
zw, _ := gzip.NewWriterLevel(w, gzip.BestSpeed)
return gzipWriter{zw}
}

@ -0,0 +1,96 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package pprof
import "unsafe"
// A profMap is a map from (stack, tag) to mapEntry.
// It grows without bound, but that's assumed to be OK.
type profMap struct {
hash map[uintptr]*profMapEntry
all *profMapEntry
last *profMapEntry
free []profMapEntry
freeStk []uintptr
}
type count struct {
// alloc_objects, alloc_bytes for heap
// mutex_count, mutex_duration for mutex
v1, v2 int64
}
// A profMapEntry is a single entry in the profMap.
type profMapEntry struct {
nextHash *profMapEntry // next in hash list
nextAll *profMapEntry // next in list of all entries
stk []uintptr
tag uintptr
count count
}
func (m *profMap) Lookup(stk []uintptr, tag uintptr) *profMapEntry {
// Compute hash of (stk, tag).
h := uintptr(0)
for _, x := range stk {
h = h<<8 | (h >> (8 * (unsafe.Sizeof(h) - 1)))
h += uintptr(x) * 41
}
h = h<<8 | (h >> (8 * (unsafe.Sizeof(h) - 1)))
h += uintptr(tag) * 41
// Find entry if present.
var last *profMapEntry
Search:
for e := m.hash[h]; e != nil; last, e = e, e.nextHash {
if len(e.stk) != len(stk) || e.tag != tag {
continue
}
for j := range stk {
if e.stk[j] != uintptr(stk[j]) {
continue Search
}
}
// Move to front.
if last != nil {
last.nextHash = e.nextHash
e.nextHash = m.hash[h]
m.hash[h] = e
}
return e
}
// Add new entry.
if len(m.free) < 1 {
m.free = make([]profMapEntry, 128)
}
e := &m.free[0]
m.free = m.free[1:]
e.nextHash = m.hash[h]
e.tag = tag
if len(m.freeStk) < len(stk) {
m.freeStk = make([]uintptr, 1024)
}
// Limit cap to prevent append from clobbering freeStk.
e.stk = m.freeStk[:len(stk):len(stk)]
m.freeStk = m.freeStk[len(stk):]
for j := range stk {
e.stk[j] = uintptr(stk[j])
}
if m.hash == nil {
m.hash = make(map[uintptr]*profMapEntry)
}
m.hash[h] = e
if m.all == nil {
m.all = e
m.last = e
} else {
m.last.nextAll = e
m.last = e
}
return e
}

@ -0,0 +1,27 @@
//go:build go1.16 && !go1.20
// +build go1.16,!go1.20
package pprof
import "runtime"
type MutexProfileScaler struct {
f func(cnt int64, ns float64) (int64, float64)
}
func ScaleMutexProfile(scaler MutexProfileScaler, cnt int64, ns float64) (int64, float64) {
return scaler.f(cnt, ns)
}
var ScalerMutexProfile = MutexProfileScaler{func(cnt int64, ns float64) (int64, float64) {
period := runtime.SetMutexProfileFraction(-1)
return cnt * int64(period), ns * float64(period)
}}
var ScalerBlockProfile = MutexProfileScaler{func(cnt int64, ns float64) (int64, float64) {
// Do nothing.
// The current way of block profile sampling makes it
// hard to compute the unsampled number. The legacy block
// profile parse doesn't attempt to scale or unsample.
return cnt, ns
}}

@ -0,0 +1,17 @@
//go:build go1.20
// +build go1.20
package pprof
type MutexProfileScaler struct {
}
// ScaleMutexProfile is a no-op for go1.20+.
// https://github.com/golang/go/commit/30b1af00ff142a3f1a5e2a0f32cf04a649bd5e65
func ScaleMutexProfile(_ MutexProfileScaler, cnt int64, ns float64) (int64, float64) {
return cnt, ns
}
var ScalerMutexProfile = MutexProfileScaler{}
var ScalerBlockProfile = MutexProfileScaler{}

@ -0,0 +1,715 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package pprof
import (
"bytes"
"io"
"os"
"runtime"
"strconv"
"strings"
"time"
)
// lostProfileEvent is the function to which lost profiling
// events are attributed.
// (The name shows up in the pprof graphs.)
func lostProfileEvent() { lostProfileEvent() }
type ProfileBuilderOptions struct {
// for go1.21+ if true - use runtime_FrameSymbolName - produces frames with generic types, for example [go.shape.int]
// for go1.21+ if false - use runtime.Frame->Function - produces frames with generic types ommited [...]
// pre 1.21 - always use runtime.Frame->Function - produces frames with generic types ommited [...]
GenericsFrames bool
LazyMapping bool
}
// A profileBuilder writes a profile incrementally from a
// stream of profile samples delivered by the runtime.
type profileBuilder struct {
start time.Time
end time.Time
havePeriod bool
period int64
// encoding state
w io.Writer
zw gzipWriter
pb protobuf
strings []string
stringMap map[string]int
locs map[uintptr]locInfo // list of locInfo starting with the given PC.
funcs map[string]int // Package path-qualified function name to Function.ID
mem []memMap
deck pcDeck
opt ProfileBuilderOptions
}
type memMap struct {
// initialized as reading mapping
start uintptr // Address at which the binary (or DLL) is loaded into memory.
end uintptr // The limit of the address range occupied by this mapping.
offset uint64 // Offset in the binary that corresponds to the first mapped address.
file string // The object this entry is loaded from.
buildID string // A string that uniquely identifies a particular program version with high probability.
funcs symbolizeFlag
fake bool // map entry was faked; /proc/self/maps wasn't available
}
// symbolizeFlag keeps track of symbolization result.
//
// 0 : no symbol lookup was performed
// 1<<0 (lookupTried) : symbol lookup was performed
// 1<<1 (lookupFailed): symbol lookup was performed but failed
type symbolizeFlag uint8
const (
lookupTried symbolizeFlag = 1 << iota
lookupFailed symbolizeFlag = 1 << iota
)
const (
// message Profile
tagProfile_SampleType = 1 // repeated ValueType
tagProfile_Sample = 2 // repeated Sample
tagProfile_Mapping = 3 // repeated Mapping
tagProfile_Location = 4 // repeated Location
tagProfile_Function = 5 // repeated Function
tagProfile_StringTable = 6 // repeated string
tagProfile_DropFrames = 7 // int64 (string table index)
tagProfile_KeepFrames = 8 // int64 (string table index)
tagProfile_TimeNanos = 9 // int64
tagProfile_DurationNanos = 10 // int64
tagProfile_PeriodType = 11 // ValueType (really optional string???)
tagProfile_Period = 12 // int64
tagProfile_Comment = 13 // repeated int64
tagProfile_DefaultSampleType = 14 // int64
// message ValueType
tagValueType_Type = 1 // int64 (string table index)
tagValueType_Unit = 2 // int64 (string table index)
// message Sample
tagSample_Location = 1 // repeated uint64
tagSample_Value = 2 // repeated int64
tagSample_Label = 3 // repeated Label
// message Label
tagLabel_Key = 1 // int64 (string table index)
tagLabel_Str = 2 // int64 (string table index)
tagLabel_Num = 3 // int64
// message Mapping
tagMapping_ID = 1 // uint64
tagMapping_Start = 2 // uint64
tagMapping_Limit = 3 // uint64
tagMapping_Offset = 4 // uint64
tagMapping_Filename = 5 // int64 (string table index)
tagMapping_BuildID = 6 // int64 (string table index)
tagMapping_HasFunctions = 7 // bool
tagMapping_HasFilenames = 8 // bool
tagMapping_HasLineNumbers = 9 // bool
tagMapping_HasInlineFrames = 10 // bool
// message Location
tagLocation_ID = 1 // uint64
tagLocation_MappingID = 2 // uint64
tagLocation_Address = 3 // uint64
tagLocation_Line = 4 // repeated Line
// message Line
tagLine_FunctionID = 1 // uint64
tagLine_Line = 2 // int64
// message Function
tagFunction_ID = 1 // uint64
tagFunction_Name = 2 // int64 (string table index)
tagFunction_SystemName = 3 // int64 (string table index)
tagFunction_Filename = 4 // int64 (string table index)
tagFunction_StartLine = 5 // int64
)
// stringIndex adds s to the string table if not already present
// and returns the index of s in the string table.
func (b *profileBuilder) stringIndex(s string) int64 {
id, ok := b.stringMap[s]
if !ok {
id = len(b.strings)
b.strings = append(b.strings, s)
b.stringMap[s] = id
}
return int64(id)
}
func (b *profileBuilder) flush() {
const dataFlush = 4096
if b.pb.nest == 0 && len(b.pb.data) > dataFlush {
b.zw.Write(b.pb.data)
b.pb.data = b.pb.data[:0]
}
}
// pbValueType encodes a ValueType message to b.pb.
func (b *profileBuilder) pbValueType(tag int, typ, unit string) {
start := b.pb.startMessage()
b.pb.int64(tagValueType_Type, b.stringIndex(typ))
b.pb.int64(tagValueType_Unit, b.stringIndex(unit))
b.pb.endMessage(tag, start)
}
// pbSample encodes a Sample message to b.pb.
func (b *profileBuilder) pbSample(values []int64, locs []uint64, labels func()) {
start := b.pb.startMessage()
b.pb.int64s(tagSample_Value, values)
b.pb.uint64s(tagSample_Location, locs)
if labels != nil {
labels()
}
b.pb.endMessage(tagProfile_Sample, start)
b.flush()
}
// pbLabel encodes a Label message to b.pb.
func (b *profileBuilder) pbLabel(tag int, key, str string, num int64) {
start := b.pb.startMessage()
b.pb.int64Opt(tagLabel_Key, b.stringIndex(key))
b.pb.int64Opt(tagLabel_Str, b.stringIndex(str))
b.pb.int64Opt(tagLabel_Num, num)
b.pb.endMessage(tag, start)
}
// pbLine encodes a Line message to b.pb.
func (b *profileBuilder) pbLine(tag int, funcID uint64, line int64) {
start := b.pb.startMessage()
b.pb.uint64Opt(tagLine_FunctionID, funcID)
b.pb.int64Opt(tagLine_Line, line)
b.pb.endMessage(tag, start)
}
// pbMapping encodes a Mapping message to b.pb.
func (b *profileBuilder) pbMapping(tag int, id, base, limit, offset uint64, file, buildID string, hasFuncs bool) {
start := b.pb.startMessage()
b.pb.uint64Opt(tagMapping_ID, id)
b.pb.uint64Opt(tagMapping_Start, base)
b.pb.uint64Opt(tagMapping_Limit, limit)
b.pb.uint64Opt(tagMapping_Offset, offset)
b.pb.int64Opt(tagMapping_Filename, b.stringIndex(file))
b.pb.int64Opt(tagMapping_BuildID, b.stringIndex(buildID))
// TODO: we set HasFunctions if all symbols from samples were symbolized (hasFuncs).
// Decide what to do about HasInlineFrames and HasLineNumbers.
// Also, another approach to handle the mapping entry with
// incomplete symbolization results is to dupliace the mapping
// entry (but with different Has* fields values) and use
// different entries for symbolized locations and unsymbolized locations.
if hasFuncs {
b.pb.bool(tagMapping_HasFunctions, true)
}
b.pb.endMessage(tag, start)
}
func allFrames(addr uintptr) ([]runtime.Frame, symbolizeFlag) {
// Expand this one address using CallersFrames so we can cache
// each expansion. In general, CallersFrames takes a whole
// stack, but in this case we know there will be no skips in
// the stack and we have return PCs anyway.
frames := runtime.CallersFrames([]uintptr{addr})
frame, more := frames.Next()
if frame.Function == "runtime.goexit" {
// Short-circuit if we see runtime.goexit so the loop
// below doesn't allocate a useless empty location.
return nil, 0
}
symbolizeResult := lookupTried
if frame.PC == 0 || frame.Function == "" || frame.File == "" || frame.Line == 0 {
symbolizeResult |= lookupFailed
}
if frame.PC == 0 {
// If we failed to resolve the frame, at least make up
// a reasonable call PC. This mostly happens in tests.
frame.PC = addr - 1
}
ret := []runtime.Frame{frame}
for frame.Function != "runtime.goexit" && more {
frame, more = frames.Next()
ret = append(ret, frame)
}
return ret, symbolizeResult
}
type locInfo struct {
// location id assigned by the profileBuilder
id uint64
// sequence of PCs, including the fake PCs returned by the traceback
// to represent inlined functions
// https://github.com/golang/go/blob/d6f2f833c93a41ec1c68e49804b8387a06b131c5/src/runtime/traceback.go#L347-L368
pcs []uintptr
// firstPCFrames and firstPCSymbolizeResult hold the results of the
// allFrames call for the first (leaf-most) PC this locInfo represents
firstPCFrames []runtime.Frame
firstPCSymbolizeResult symbolizeFlag
}
// newProfileBuilder returns a new profileBuilder.
// CPU profiling data obtained from the runtime can be added
// by calling b.addCPUData, and then the eventual profile
// can be obtained by calling b.finish.
func newProfileBuilder(w io.Writer, opt ProfileBuilderOptions, mapping []memMap) *profileBuilder {
zw := newGzipWriter(w)
b := &profileBuilder{
w: w,
zw: zw,
start: time.Now(),
strings: []string{""},
stringMap: map[string]int{"": 0},
locs: map[uintptr]locInfo{},
funcs: map[string]int{},
opt: opt,
}
b.mem = mapping
return b
}
// build completes and returns the constructed profile.
func (b *profileBuilder) build() {
b.end = time.Now()
b.pb.int64Opt(tagProfile_TimeNanos, b.start.UnixNano())
if b.havePeriod { // must be CPU profile
b.pbValueType(tagProfile_SampleType, "samples", "count")
b.pbValueType(tagProfile_SampleType, "cpu", "nanoseconds")
b.pb.int64Opt(tagProfile_DurationNanos, b.end.Sub(b.start).Nanoseconds())
b.pbValueType(tagProfile_PeriodType, "cpu", "nanoseconds")
b.pb.int64Opt(tagProfile_Period, b.period)
}
for i, m := range b.mem {
hasFunctions := m.funcs == lookupTried // lookupTried but not lookupFailed
b.pbMapping(tagProfile_Mapping, uint64(i+1), uint64(m.start), uint64(m.end), m.offset, m.file, m.buildID, hasFunctions)
}
// TODO: Anything for tagProfile_DropFrames?
// TODO: Anything for tagProfile_KeepFrames?
b.pb.strings(tagProfile_StringTable, b.strings)
b.zw.Write(b.pb.data)
b.zw.Close()
}
// appendLocsForStack appends the location IDs for the given stack trace to the given
// location ID slice, locs. The addresses in the stack are return PCs or 1 + the PC of
// an inline marker as the runtime traceback function returns.
//
// It may return an empty slice even if locs is non-empty, for example if locs consists
// solely of runtime.goexit. We still count these empty stacks in profiles in order to
// get the right cumulative sample count.
//
// It may emit to b.pb, so there must be no message encoding in progress.
func (b *profileBuilder) appendLocsForStack(locs []uint64, stk []uintptr) (newLocs []uint64) {
b.deck.reset()
// The last frame might be truncated. Recover lost inline frames.
stk = runtime_expandFinalInlineFrame(stk)
for len(stk) > 0 {
addr := stk[0]
if l, ok := b.locs[addr]; ok {
// When generating code for an inlined function, the compiler adds
// NOP instructions to the outermost function as a placeholder for
// each layer of inlining. When the runtime generates tracebacks for
// stacks that include inlined functions, it uses the addresses of
// those NOPs as "fake" PCs on the stack as if they were regular
// function call sites. But if a profiling signal arrives while the
// CPU is executing one of those NOPs, its PC will show up as a leaf
// in the profile with its own Location entry. So, always check
// whether addr is a "fake" PC in the context of the current call
// stack by trying to add it to the inlining deck before assuming
// that the deck is complete.
if len(b.deck.pcs) > 0 {
if added := b.deck.tryAdd(addr, l.firstPCFrames, l.firstPCSymbolizeResult); added {
stk = stk[1:]
continue
}
}
// first record the location if there is any pending accumulated info.
if id := b.emitLocation(); id > 0 {
locs = append(locs, id)
}
// then, record the cached location.
locs = append(locs, l.id)
// Skip the matching pcs.
//
// Even if stk was truncated due to the stack depth
// limit, expandFinalInlineFrame above has already
// fixed the truncation, ensuring it is long enough.
stk = stk[len(l.pcs):]
continue
}
frames, symbolizeResult := allFrames(addr)
if len(frames) == 0 { // runtime.goexit.
if id := b.emitLocation(); id > 0 {
locs = append(locs, id)
}
stk = stk[1:]
continue
}
if added := b.deck.tryAdd(addr, frames, symbolizeResult); added {
stk = stk[1:]
continue
}
// add failed because this addr is not inlined with the
// existing PCs in the deck. Flush the deck and retry handling
// this pc.
if id := b.emitLocation(); id > 0 {
locs = append(locs, id)
}
// check cache again - previous emitLocation added a new entry
if l, ok := b.locs[addr]; ok {
locs = append(locs, l.id)
stk = stk[len(l.pcs):] // skip the matching pcs.
} else {
b.deck.tryAdd(addr, frames, symbolizeResult) // must succeed.
stk = stk[1:]
}
}
if id := b.emitLocation(); id > 0 { // emit remaining location.
locs = append(locs, id)
}
return locs
}
// Here's an example of how Go 1.17 writes out inlined functions, compiled for
// linux/amd64. The disassembly of main.main shows two levels of inlining: main
// calls b, b calls a, a does some work.
//
// inline.go:9 0x4553ec 90 NOPL // func main() { b(v) }
// inline.go:6 0x4553ed 90 NOPL // func b(v *int) { a(v) }
// inline.go:5 0x4553ee 48c7002a000000 MOVQ $0x2a, 0(AX) // func a(v *int) { *v = 42 }
//
// If a profiling signal arrives while executing the MOVQ at 0x4553ee (for line
// 5), the runtime will report the stack as the MOVQ frame being called by the
// NOPL at 0x4553ed (for line 6) being called by the NOPL at 0x4553ec (for line
// 9).
//
// The role of pcDeck is to collapse those three frames back into a single
// location at 0x4553ee, with file/line/function symbolization info representing
// the three layers of calls. It does that via sequential calls to pcDeck.tryAdd
// starting with the leaf-most address. The fourth call to pcDeck.tryAdd will be
// for the caller of main.main. Because main.main was not inlined in its caller,
// the deck will reject the addition, and the fourth PC on the stack will get
// its own location.
// pcDeck is a helper to detect a sequence of inlined functions from
// a stack trace returned by the runtime.
//
// The stack traces returned by runtime's trackback functions are fully
// expanded (at least for Go functions) and include the fake pcs representing
// inlined functions. The profile proto expects the inlined functions to be
// encoded in one Location message.
// https://github.com/google/pprof/blob/5e965273ee43930341d897407202dd5e10e952cb/proto/profile.proto#L177-L184
//
// Runtime does not directly expose whether a frame is for an inlined function
// and looking up debug info is not ideal, so we use a heuristic to filter
// the fake pcs and restore the inlined and entry functions. Inlined functions
// have the following properties:
//
// Frame's Func is nil (note: also true for non-Go functions), and
// Frame's Entry matches its entry function frame's Entry (note: could also be true for recursive calls and non-Go functions), and
// Frame's Name does not match its entry function frame's name (note: inlined functions cannot be directly recursive).
//
// As reading and processing the pcs in a stack trace one by one (from leaf to the root),
// we use pcDeck to temporarily hold the observed pcs and their expanded frames
// until we observe the entry function frame.
type pcDeck struct {
pcs []uintptr
frames []runtime.Frame
symbolizeResult symbolizeFlag
// firstPCFrames indicates the number of frames associated with the first
// (leaf-most) PC in the deck
firstPCFrames int
// firstPCSymbolizeResult holds the results of the allFrames call for the
// first (leaf-most) PC in the deck
firstPCSymbolizeResult symbolizeFlag
}
func (d *pcDeck) reset() {
d.pcs = d.pcs[:0]
d.frames = d.frames[:0]
d.symbolizeResult = 0
d.firstPCFrames = 0
d.firstPCSymbolizeResult = 0
}
// tryAdd tries to add the pc and Frames expanded from it (most likely one,
// since the stack trace is already fully expanded) and the symbolizeResult
// to the deck. If it fails the caller needs to flush the deck and retry.
func (d *pcDeck) tryAdd(pc uintptr, frames []runtime.Frame, symbolizeResult symbolizeFlag) (success bool) {
if existing := len(d.frames); existing > 0 {
// 'd.frames' are all expanded from one 'pc' and represent all
// inlined functions so we check only the last one.
newFrame := frames[0]
last := d.frames[existing-1]
if last.Func != nil { // the last frame can't be inlined. Flush.
return false
}
if last.Entry == 0 || newFrame.Entry == 0 { // Possibly not a Go function. Don't try to merge.
return false
}
if last.Entry != newFrame.Entry { // newFrame is for a different function.
return false
}
if last.Function == newFrame.Function { // maybe recursion.
return false
}
}
d.pcs = append(d.pcs, pc)
d.frames = append(d.frames, frames...)
d.symbolizeResult |= symbolizeResult
if len(d.pcs) == 1 {
d.firstPCFrames = len(d.frames)
d.firstPCSymbolizeResult = symbolizeResult
}
return true
}
// emitLocation emits the new location and function information recorded in the deck
// and returns the location ID encoded in the profile protobuf.
// It emits to b.pb, so there must be no message encoding in progress.
// It resets the deck.
func (b *profileBuilder) emitLocation() uint64 {
if len(b.deck.pcs) == 0 {
return 0
}
defer b.deck.reset()
addr := b.deck.pcs[0]
firstFrame := b.deck.frames[0]
// We can't write out functions while in the middle of the
// Location message, so record new functions we encounter and
// write them out after the Location.
type newFunc struct {
id uint64
name, file string
startLine int64
}
newFuncs := make([]newFunc, 0, 8)
id := uint64(len(b.locs)) + 1
b.locs[addr] = locInfo{
id: id,
pcs: append([]uintptr{}, b.deck.pcs...),
firstPCSymbolizeResult: b.deck.firstPCSymbolizeResult,
firstPCFrames: append([]runtime.Frame{}, b.deck.frames[:b.deck.firstPCFrames]...),
}
start := b.pb.startMessage()
b.pb.uint64Opt(tagLocation_ID, id)
b.pb.uint64Opt(tagLocation_Address, uint64(firstFrame.PC))
for _, frame := range b.deck.frames {
// Write out each line in frame expansion.
funcID := uint64(b.funcs[frame.Function])
if funcID == 0 {
funcID = uint64(len(b.funcs)) + 1
b.funcs[frame.Function] = int(funcID)
var name string
if b.opt.GenericsFrames {
name = runtime_FrameSymbolName(&frame)
} else {
name = frame.Function
}
newFuncs = append(newFuncs, newFunc{
id: funcID,
name: name,
file: frame.File,
startLine: int64(runtime_FrameStartLine(&frame)),
})
}
b.pbLine(tagLocation_Line, funcID, int64(frame.Line))
}
for i := range b.mem {
if b.mem[i].start <= addr && addr < b.mem[i].end || b.mem[i].fake {
b.pb.uint64Opt(tagLocation_MappingID, uint64(i+1))
m := b.mem[i]
m.funcs |= b.deck.symbolizeResult
b.mem[i] = m
break
}
}
b.pb.endMessage(tagProfile_Location, start)
// Write out functions we found during frame expansion.
for _, fn := range newFuncs {
start := b.pb.startMessage()
b.pb.uint64Opt(tagFunction_ID, fn.id)
b.pb.int64Opt(tagFunction_Name, b.stringIndex(fn.name))
b.pb.int64Opt(tagFunction_SystemName, b.stringIndex(fn.name))
b.pb.int64Opt(tagFunction_Filename, b.stringIndex(fn.file))
b.pb.int64Opt(tagFunction_StartLine, fn.startLine)
b.pb.endMessage(tagProfile_Function, start)
}
b.flush()
return id
}
func readMapping() []memMap {
data, _ := os.ReadFile("/proc/self/maps")
var mem []memMap
parseProcSelfMaps(data, func(lo, hi, offset uint64, file, buildID string) {
mem = append(mem, memMap{
start: uintptr(lo),
end: uintptr(hi),
offset: offset,
file: file,
buildID: buildID,
fake: false,
})
})
if len(mem) == 0 { // pprof expects a map entry, so fake one.
mem = []memMap{{
start: uintptr(0),
end: uintptr(0),
offset: 0,
file: "",
buildID: "",
fake: true,
}}
}
return mem
}
var space = []byte(" ")
var newline = []byte("\n")
func parseProcSelfMaps(data []byte, addMapping func(lo, hi, offset uint64, file, buildID string)) {
// $ cat /proc/self/maps
// 00400000-0040b000 r-xp 00000000 fc:01 787766 /bin/cat
// 0060a000-0060b000 r--p 0000a000 fc:01 787766 /bin/cat
// 0060b000-0060c000 rw-p 0000b000 fc:01 787766 /bin/cat
// 014ab000-014cc000 rw-p 00000000 00:00 0 [heap]
// 7f7d76af8000-7f7d7797c000 r--p 00000000 fc:01 1318064 /usr/lib/locale/locale-archive
// 7f7d7797c000-7f7d77b36000 r-xp 00000000 fc:01 1180226 /lib/x86_64-linux-gnu/libc-2.19.so
// 7f7d77b36000-7f7d77d36000 ---p 001ba000 fc:01 1180226 /lib/x86_64-linux-gnu/libc-2.19.so
// 7f7d77d36000-7f7d77d3a000 r--p 001ba000 fc:01 1180226 /lib/x86_64-linux-gnu/libc-2.19.so
// 7f7d77d3a000-7f7d77d3c000 rw-p 001be000 fc:01 1180226 /lib/x86_64-linux-gnu/libc-2.19.so
// 7f7d77d3c000-7f7d77d41000 rw-p 00000000 00:00 0
// 7f7d77d41000-7f7d77d64000 r-xp 00000000 fc:01 1180217 /lib/x86_64-linux-gnu/ld-2.19.so
// 7f7d77f3f000-7f7d77f42000 rw-p 00000000 00:00 0
// 7f7d77f61000-7f7d77f63000 rw-p 00000000 00:00 0
// 7f7d77f63000-7f7d77f64000 r--p 00022000 fc:01 1180217 /lib/x86_64-linux-gnu/ld-2.19.so
// 7f7d77f64000-7f7d77f65000 rw-p 00023000 fc:01 1180217 /lib/x86_64-linux-gnu/ld-2.19.so
// 7f7d77f65000-7f7d77f66000 rw-p 00000000 00:00 0
// 7ffc342a2000-7ffc342c3000 rw-p 00000000 00:00 0 [stack]
// 7ffc34343000-7ffc34345000 r-xp 00000000 00:00 0 [vdso]
// ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]
var line []byte
// next removes and returns the next field in the line.
// It also removes from line any spaces following the field.
next := func() []byte {
var f []byte
f, line, _ = bytesCut(line, space)
line = bytes.TrimLeft(line, " ")
return f
}
for len(data) > 0 {
line, data, _ = bytesCut(data, newline)
addr := next()
loStr, hiStr, ok := stringsCut(string(addr), "-")
if !ok {
continue
}
lo, err := strconv.ParseUint(loStr, 16, 64)
if err != nil {
continue
}
hi, err := strconv.ParseUint(hiStr, 16, 64)
if err != nil {
continue
}
perm := next()
if len(perm) < 4 || perm[2] != 'x' {
// Only interested in executable mappings.
continue
}
offset, err := strconv.ParseUint(string(next()), 16, 64)
if err != nil {
continue
}
next() // dev
inode := next() // inode
if line == nil {
continue
}
file := string(line)
// Trim deleted file marker.
deletedStr := " (deleted)"
deletedLen := len(deletedStr)
if len(file) >= deletedLen && file[len(file)-deletedLen:] == deletedStr {
file = file[:len(file)-deletedLen]
}
if len(inode) == 1 && inode[0] == '0' && file == "" {
// Huge-page text mappings list the initial fragment of
// mapped but unpopulated memory as being inode 0.
// Don't report that part.
// But [vdso] and [vsyscall] are inode 0, so let non-empty file names through.
continue
}
// TODO: pprof's remapMappingIDs makes one adjustment:
// 1. If there is an /anon_hugepage mapping first and it is
// consecutive to a next mapping, drop the /anon_hugepage.
// There's no indication why this is needed.
// Let's try not doing this and see what breaks.
// If we do need it, it would go here, before we
// enter the mappings into b.mem in the first place.
buildID, _ := elfBuildID(file)
addMapping(lo, hi, offset, file, buildID)
}
}
// Cut slices s around the first instance of sep,
// returning the text before and after sep.
// The found result reports whether sep appears in s.
// If sep does not appear in s, cut returns s, nil, false.
//
// Cut returns slices of the original slice s, not copies.
func bytesCut(s, sep []byte) (before, after []byte, found bool) {
if i := bytes.Index(s, sep); i >= 0 {
return s[:i], s[i+len(sep):], true
}
return s, nil, false
}
// Cut slices s around the first instance of sep,
// returning the text before and after sep.
// The found result reports whether sep appears in s.
// If sep does not appear in s, cut returns s, "", false.
func stringsCut(s, sep string) (before, after string, found bool) {
if i := strings.Index(s, sep); i >= 0 {
return s[:i], s[i+len(sep):], true
}
return s, "", false
}

@ -0,0 +1,141 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package pprof
// A protobuf is a simple protocol buffer encoder.
type protobuf struct {
data []byte
tmp [16]byte
nest int
}
func (b *protobuf) varint(x uint64) {
for x >= 128 {
b.data = append(b.data, byte(x)|0x80)
x >>= 7
}
b.data = append(b.data, byte(x))
}
func (b *protobuf) length(tag int, len int) {
b.varint(uint64(tag)<<3 | 2)
b.varint(uint64(len))
}
func (b *protobuf) uint64(tag int, x uint64) {
// append varint to b.data
b.varint(uint64(tag)<<3 | 0)
b.varint(x)
}
func (b *protobuf) uint64s(tag int, x []uint64) {
if len(x) > 2 {
// Use packed encoding
n1 := len(b.data)
for _, u := range x {
b.varint(u)
}
n2 := len(b.data)
b.length(tag, n2-n1)
n3 := len(b.data)
copy(b.tmp[:], b.data[n2:n3])
copy(b.data[n1+(n3-n2):], b.data[n1:n2])
copy(b.data[n1:], b.tmp[:n3-n2])
return
}
for _, u := range x {
b.uint64(tag, u)
}
}
func (b *protobuf) uint64Opt(tag int, x uint64) {
if x == 0 {
return
}
b.uint64(tag, x)
}
func (b *protobuf) int64(tag int, x int64) {
u := uint64(x)
b.uint64(tag, u)
}
func (b *protobuf) int64Opt(tag int, x int64) {
if x == 0 {
return
}
b.int64(tag, x)
}
func (b *protobuf) int64s(tag int, x []int64) {
if len(x) > 2 {
// Use packed encoding
n1 := len(b.data)
for _, u := range x {
b.varint(uint64(u))
}
n2 := len(b.data)
b.length(tag, n2-n1)
n3 := len(b.data)
copy(b.tmp[:], b.data[n2:n3])
copy(b.data[n1+(n3-n2):], b.data[n1:n2])
copy(b.data[n1:], b.tmp[:n3-n2])
return
}
for _, u := range x {
b.int64(tag, u)
}
}
func (b *protobuf) string(tag int, x string) {
b.length(tag, len(x))
b.data = append(b.data, x...)
}
func (b *protobuf) strings(tag int, x []string) {
for _, s := range x {
b.string(tag, s)
}
}
func (b *protobuf) stringOpt(tag int, x string) {
if x == "" {
return
}
b.string(tag, x)
}
func (b *protobuf) bool(tag int, x bool) {
if x {
b.uint64(tag, 1)
} else {
b.uint64(tag, 0)
}
}
func (b *protobuf) boolOpt(tag int, x bool) {
if x == false {
return
}
b.bool(tag, x)
}
type msgOffset int
func (b *protobuf) startMessage() msgOffset {
b.nest++
return msgOffset(len(b.data))
}
func (b *protobuf) endMessage(tag int, start msgOffset) {
n1 := int(start)
n2 := len(b.data)
b.length(tag, n2-n1)
n3 := len(b.data)
copy(b.tmp[:], b.data[n2:n3])
copy(b.data[n1+(n3-n2):], b.data[n1:n2])
copy(b.data[n1:], b.tmp[:n3-n2])
b.nest--
}

@ -0,0 +1,17 @@
//go:build go1.16 && !go1.23
// +build go1.16,!go1.23
package pprof
// unsafe is required for go:linkname
import _ "unsafe"
//go:linkname runtime_expandFinalInlineFrame runtime/pprof.runtime_expandFinalInlineFrame
func runtime_expandFinalInlineFrame(stk []uintptr) []uintptr
//go:linkname runtime_cyclesPerSecond runtime/pprof.runtime_cyclesPerSecond
func runtime_cyclesPerSecond() int64
func Runtime_cyclesPerSecond() int64 {
return runtime_cyclesPerSecond()
}

@ -0,0 +1,16 @@
//go:build go1.16 && !go1.21
// +build go1.16,!go1.21
package pprof
import "runtime"
// runtime_FrameStartLine is defined in runtime/symtab.go.
func runtime_FrameStartLine(f *runtime.Frame) int {
return 0
}
// runtime_FrameSymbolName is defined in runtime/symtab.go.
func runtime_FrameSymbolName(f *runtime.Frame) string {
return f.Function
}

@ -0,0 +1,21 @@
//go:build go1.21
// +build go1.21
package pprof
import (
"runtime"
_ "unsafe"
)
// runtime_FrameStartLine is defined in runtime/symtab.go.
//
//go:noescape
//go:linkname runtime_FrameStartLine runtime/pprof.runtime_FrameStartLine
func runtime_FrameStartLine(f *runtime.Frame) int
// runtime_FrameSymbolName is defined in runtime/symtab.go.
//
//go:noescape
//go:linkname runtime_FrameSymbolName runtime/pprof.runtime_FrameSymbolName
func runtime_FrameSymbolName(f *runtime.Frame) string

@ -0,0 +1,9 @@
package godeltaprof
type ProfileOptions struct {
// for go1.21+ if true - use runtime_FrameSymbolName - produces frames with generic types, for example [go.shape.int]
// for go1.21+ if false - use runtime.Frame->Function - produces frames with generic types ommited [...]
// pre 1.21 - always use runtime.Frame->Function - produces frames with generic types ommited [...]
GenericsFrames bool
LazyMappings bool
}

@ -3,7 +3,7 @@
before:
hooks:
- ./gen.sh
- go install mvdan.cc/garble@v0.9.3
- go install mvdan.cc/garble@v0.10.1
builds:
-
@ -92,16 +92,7 @@ builds:
archives:
-
id: s2-binaries
name_template: "s2-{{ .Os }}_{{ .Arch }}_{{ .Version }}"
replacements:
aix: AIX
darwin: OSX
linux: Linux
windows: Windows
386: i386
amd64: x86_64
freebsd: FreeBSD
netbsd: NetBSD
name_template: "s2-{{ .Os }}_{{ .Arch }}{{ if .Arm }}v{{ .Arm }}{{ end }}"
format_overrides:
- goos: windows
format: zip
@ -125,7 +116,7 @@ changelog:
nfpms:
-
file_name_template: "s2_package_{{ .Version }}_{{ .Os }}_{{ .Arch }}"
file_name_template: "s2_package__{{ .Os }}_{{ .Arch }}{{ if .Arm }}v{{ .Arm }}{{ end }}"
vendor: Klaus Post
homepage: https://github.com/klauspost/compress
maintainer: Klaus Post <klauspost@gmail.com>
@ -134,8 +125,3 @@ nfpms:
formats:
- deb
- rpm
replacements:
darwin: Darwin
linux: Linux
freebsd: FreeBSD
amd64: x86_64

@ -16,6 +16,26 @@ This package provides various compression algorithms.
# changelog
* Oct 22nd, 2023 - [v1.17.2](https://github.com/klauspost/compress/releases/tag/v1.17.2)
* zstd: Fix rare *CORRUPTION* output in "best" mode. See https://github.com/klauspost/compress/pull/876
* Oct 14th, 2023 - [v1.17.1](https://github.com/klauspost/compress/releases/tag/v1.17.1)
* s2: Fix S2 "best" dictionary wrong encoding by @klauspost in https://github.com/klauspost/compress/pull/871
* flate: Reduce allocations in decompressor and minor code improvements by @fakefloordiv in https://github.com/klauspost/compress/pull/869
* s2: Fix EstimateBlockSize on 6&7 length input by @klauspost in https://github.com/klauspost/compress/pull/867
* Sept 19th, 2023 - [v1.17.0](https://github.com/klauspost/compress/releases/tag/v1.17.0)
* Add experimental dictionary builder https://github.com/klauspost/compress/pull/853
* Add xerial snappy read/writer https://github.com/klauspost/compress/pull/838
* flate: Add limited window compression https://github.com/klauspost/compress/pull/843
* s2: Do 2 overlapping match checks https://github.com/klauspost/compress/pull/839
* flate: Add amd64 assembly matchlen https://github.com/klauspost/compress/pull/837
* gzip: Copy bufio.Reader on Reset by @thatguystone in https://github.com/klauspost/compress/pull/860
* July 1st, 2023 - [v1.16.7](https://github.com/klauspost/compress/releases/tag/v1.16.7)
* zstd: Fix default level first dictionary encode https://github.com/klauspost/compress/pull/829
* s2: add GetBufferCapacity() method by @GiedriusS in https://github.com/klauspost/compress/pull/832
* June 13, 2023 - [v1.16.6](https://github.com/klauspost/compress/releases/tag/v1.16.6)
* zstd: correctly ignore WithEncoderPadding(1) by @ianlancetaylor in https://github.com/klauspost/compress/pull/806
* zstd: Add amd64 match length assembly https://github.com/klauspost/compress/pull/824
@ -50,6 +70,9 @@ This package provides various compression algorithms.
* s2: Support io.ReaderAt in ReadSeeker. https://github.com/klauspost/compress/pull/747
* s2c/s2sx: Use concurrent decoding. https://github.com/klauspost/compress/pull/746
<details>
<summary>See changes to v1.15.x</summary>
* Jan 21st, 2023 (v1.15.15)
* deflate: Improve level 7-9 by @klauspost in https://github.com/klauspost/compress/pull/739
* zstd: Add delta encoding support by @greatroar in https://github.com/klauspost/compress/pull/728
@ -176,6 +199,8 @@ Stream decompression is now faster on asynchronous, since the goroutine allocati
While the release has been extensively tested, it is recommended to testing when upgrading.
</details>
<details>
<summary>See changes to v1.14.x</summary>
@ -636,6 +661,8 @@ Here are other packages of good quality and pure Go (no cgo wrappers or autoconv
* [github.com/dsnet/compress](https://github.com/dsnet/compress) - brotli decompression, bzip2 writer.
* [github.com/ronanh/intcomp](https://github.com/ronanh/intcomp) - Integer compression.
* [github.com/spenczar/fpc](https://github.com/spenczar/fpc) - Float compression.
* [github.com/minio/zipindex](https://github.com/minio/zipindex) - External ZIP directory index.
* [github.com/ybirader/pzip](https://github.com/ybirader/pzip) - Fast concurrent zip archiver and extractor.
# license

@ -7,6 +7,7 @@ package flate
import (
"encoding/binary"
"errors"
"fmt"
"io"
"math"
@ -833,6 +834,12 @@ func (d *compressor) init(w io.Writer, level int) (err error) {
d.initDeflate()
d.fill = (*compressor).fillDeflate
d.step = (*compressor).deflateLazy
case -level >= MinCustomWindowSize && -level <= MaxCustomWindowSize:
d.w.logNewTablePenalty = 7
d.fast = &fastEncL5Window{maxOffset: int32(-level), cur: maxStoreBlockSize}
d.window = make([]byte, maxStoreBlockSize)
d.fill = (*compressor).fillBlock
d.step = (*compressor).storeFast
default:
return fmt.Errorf("flate: invalid compression level %d: want value in range [-2, 9]", level)
}
@ -929,6 +936,28 @@ func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, error) {
return zw, err
}
// MinCustomWindowSize is the minimum window size that can be sent to NewWriterWindow.
const MinCustomWindowSize = 32
// MaxCustomWindowSize is the maximum custom window that can be sent to NewWriterWindow.
const MaxCustomWindowSize = windowSize
// NewWriterWindow returns a new Writer compressing data with a custom window size.
// windowSize must be from MinCustomWindowSize to MaxCustomWindowSize.
func NewWriterWindow(w io.Writer, windowSize int) (*Writer, error) {
if windowSize < MinCustomWindowSize {
return nil, errors.New("flate: requested window size less than MinWindowSize")
}
if windowSize > MaxCustomWindowSize {
return nil, errors.New("flate: requested window size bigger than MaxCustomWindowSize")
}
var dw Writer
if err := dw.d.init(w, -windowSize); err != nil {
return nil, err
}
return &dw, nil
}
// A Writer takes data written to it and writes the compressed
// form of that data to an underlying writer (see NewWriter).
type Writer struct {

@ -8,7 +8,6 @@ package flate
import (
"encoding/binary"
"fmt"
"math/bits"
)
type fastEnc interface {
@ -192,25 +191,3 @@ func (e *fastGen) Reset() {
}
e.hist = e.hist[:0]
}
// matchLen returns the maximum length.
// 'a' must be the shortest of the two.
func matchLen(a, b []byte) int {
var checked int
for len(a) >= 8 {
if diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b); diff != 0 {
return checked + (bits.TrailingZeros64(diff) >> 3)
}
checked += 8
a = a[8:]
b = b[8:]
}
b = b[:len(a)]
for i := range a {
if a[i] != b[i] {
return i + checked
}
}
return len(a) + checked
}

@ -120,8 +120,9 @@ func (h *huffmanDecoder) init(lengths []int) bool {
const sanity = false
if h.chunks == nil {
h.chunks = &[huffmanNumChunks]uint16{}
h.chunks = new([huffmanNumChunks]uint16)
}
if h.maxRead != 0 {
*h = huffmanDecoder{chunks: h.chunks, links: h.links}
}
@ -175,6 +176,7 @@ func (h *huffmanDecoder) init(lengths []int) bool {
}
h.maxRead = min
chunks := h.chunks[:]
for i := range chunks {
chunks[i] = 0
@ -202,8 +204,7 @@ func (h *huffmanDecoder) init(lengths []int) bool {
if cap(h.links[off]) < numLinks {
h.links[off] = make([]uint16, numLinks)
} else {
links := h.links[off][:0]
h.links[off] = links[:numLinks]
h.links[off] = h.links[off][:numLinks]
}
}
} else {
@ -277,7 +278,7 @@ func (h *huffmanDecoder) init(lengths []int) bool {
return true
}
// The actual read interface needed by NewReader.
// Reader is the actual read interface needed by NewReader.
// If the passed in io.Reader does not also have ReadByte,
// the NewReader will introduce its own buffering.
type Reader interface {
@ -285,6 +286,18 @@ type Reader interface {
io.ByteReader
}
type step uint8
const (
copyData step = iota + 1
nextBlock
huffmanBytesBuffer
huffmanBytesReader
huffmanBufioReader
huffmanStringsReader
huffmanGenericReader
)
// Decompress state.
type decompressor struct {
// Input source.
@ -303,7 +316,7 @@ type decompressor struct {
// Next step in the decompression,
// and decompression state.
step func(*decompressor)
step step
stepState int
err error
toRead []byte
@ -342,7 +355,7 @@ func (f *decompressor) nextBlock() {
// compressed, fixed Huffman tables
f.hl = &fixedHuffmanDecoder
f.hd = nil
f.huffmanBlockDecoder()()
f.huffmanBlockDecoder()
if debugDecode {
fmt.Println("predefinied huffman block")
}
@ -353,7 +366,7 @@ func (f *decompressor) nextBlock() {
}
f.hl = &f.h1
f.hd = &f.h2
f.huffmanBlockDecoder()()
f.huffmanBlockDecoder()
if debugDecode {
fmt.Println("dynamic huffman block")
}
@ -379,14 +392,16 @@ func (f *decompressor) Read(b []byte) (int, error) {
if f.err != nil {
return 0, f.err
}
f.step(f)
f.doStep()
if f.err != nil && len(f.toRead) == 0 {
f.toRead = f.dict.readFlush() // Flush what's left in case of error
}
}
}
// Support the io.WriteTo interface for io.Copy and friends.
// WriteTo implements the io.WriteTo interface for io.Copy and friends.
func (f *decompressor) WriteTo(w io.Writer) (int64, error) {
total := int64(0)
flushed := false
@ -410,7 +425,7 @@ func (f *decompressor) WriteTo(w io.Writer) (int64, error) {
return total, f.err
}
if f.err == nil {
f.step(f)
f.doStep()
}
if len(f.toRead) == 0 && f.err != nil && !flushed {
f.toRead = f.dict.readFlush() // Flush what's left in case of error
@ -631,7 +646,7 @@ func (f *decompressor) copyData() {
if f.dict.availWrite() == 0 || f.copyLen > 0 {
f.toRead = f.dict.readFlush()
f.step = (*decompressor).copyData
f.step = copyData
return
}
f.finishBlock()
@ -644,7 +659,28 @@ func (f *decompressor) finishBlock() {
}
f.err = io.EOF
}
f.step = (*decompressor).nextBlock
f.step = nextBlock
}
func (f *decompressor) doStep() {
switch f.step {
case copyData:
f.copyData()
case nextBlock:
f.nextBlock()
case huffmanBytesBuffer:
f.huffmanBytesBuffer()
case huffmanBytesReader:
f.huffmanBytesReader()
case huffmanBufioReader:
f.huffmanBufioReader()
case huffmanStringsReader:
f.huffmanStringsReader()
case huffmanGenericReader:
f.huffmanGenericReader()
default:
panic("BUG: unexpected step state")
}
}
// noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF.
@ -747,7 +783,7 @@ func (f *decompressor) Reset(r io.Reader, dict []byte) error {
h1: f.h1,
h2: f.h2,
dict: f.dict,
step: (*decompressor).nextBlock,
step: nextBlock,
}
f.dict.init(maxMatchOffset, dict)
return nil
@ -768,7 +804,7 @@ func NewReader(r io.Reader) io.ReadCloser {
f.r = makeReader(r)
f.bits = new([maxNumLit + maxNumDist]int)
f.codebits = new([numCodes]int)
f.step = (*decompressor).nextBlock
f.step = nextBlock
f.dict.init(maxMatchOffset, nil)
return &f
}
@ -787,7 +823,7 @@ func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
f.r = makeReader(r)
f.bits = new([maxNumLit + maxNumDist]int)
f.codebits = new([numCodes]int)
f.step = (*decompressor).nextBlock
f.step = nextBlock
f.dict.init(maxMatchOffset, dict)
return &f
}

@ -85,7 +85,7 @@ readLiteral:
dict.writeByte(byte(v))
if dict.availWrite() == 0 {
f.toRead = dict.readFlush()
f.step = (*decompressor).huffmanBytesBuffer
f.step = huffmanBytesBuffer
f.stepState = stateInit
f.b, f.nb = fb, fnb
return
@ -251,7 +251,7 @@ copyHistory:
if dict.availWrite() == 0 || f.copyLen > 0 {
f.toRead = dict.readFlush()
f.step = (*decompressor).huffmanBytesBuffer // We need to continue this work
f.step = huffmanBytesBuffer // We need to continue this work
f.stepState = stateDict
f.b, f.nb = fb, fnb
return
@ -336,7 +336,7 @@ readLiteral:
dict.writeByte(byte(v))
if dict.availWrite() == 0 {
f.toRead = dict.readFlush()
f.step = (*decompressor).huffmanBytesReader
f.step = huffmanBytesReader
f.stepState = stateInit
f.b, f.nb = fb, fnb
return
@ -502,7 +502,7 @@ copyHistory:
if dict.availWrite() == 0 || f.copyLen > 0 {
f.toRead = dict.readFlush()
f.step = (*decompressor).huffmanBytesReader // We need to continue this work
f.step = huffmanBytesReader // We need to continue this work
f.stepState = stateDict
f.b, f.nb = fb, fnb
return
@ -587,7 +587,7 @@ readLiteral:
dict.writeByte(byte(v))
if dict.availWrite() == 0 {
f.toRead = dict.readFlush()
f.step = (*decompressor).huffmanBufioReader
f.step = huffmanBufioReader
f.stepState = stateInit
f.b, f.nb = fb, fnb
return
@ -753,7 +753,7 @@ copyHistory:
if dict.availWrite() == 0 || f.copyLen > 0 {
f.toRead = dict.readFlush()
f.step = (*decompressor).huffmanBufioReader // We need to continue this work
f.step = huffmanBufioReader // We need to continue this work
f.stepState = stateDict
f.b, f.nb = fb, fnb
return
@ -838,7 +838,7 @@ readLiteral:
dict.writeByte(byte(v))
if dict.availWrite() == 0 {
f.toRead = dict.readFlush()
f.step = (*decompressor).huffmanStringsReader
f.step = huffmanStringsReader
f.stepState = stateInit
f.b, f.nb = fb, fnb
return
@ -1004,7 +1004,7 @@ copyHistory:
if dict.availWrite() == 0 || f.copyLen > 0 {
f.toRead = dict.readFlush()
f.step = (*decompressor).huffmanStringsReader // We need to continue this work
f.step = huffmanStringsReader // We need to continue this work
f.stepState = stateDict
f.b, f.nb = fb, fnb
return
@ -1089,7 +1089,7 @@ readLiteral:
dict.writeByte(byte(v))
if dict.availWrite() == 0 {
f.toRead = dict.readFlush()
f.step = (*decompressor).huffmanGenericReader
f.step = huffmanGenericReader
f.stepState = stateInit
f.b, f.nb = fb, fnb
return
@ -1255,7 +1255,7 @@ copyHistory:
if dict.availWrite() == 0 || f.copyLen > 0 {
f.toRead = dict.readFlush()
f.step = (*decompressor).huffmanGenericReader // We need to continue this work
f.step = huffmanGenericReader // We need to continue this work
f.stepState = stateDict
f.b, f.nb = fb, fnb
return
@ -1265,19 +1265,19 @@ copyHistory:
// Not reached
}
func (f *decompressor) huffmanBlockDecoder() func() {
func (f *decompressor) huffmanBlockDecoder() {
switch f.r.(type) {
case *bytes.Buffer:
return f.huffmanBytesBuffer
f.huffmanBytesBuffer()
case *bytes.Reader:
return f.huffmanBytesReader
f.huffmanBytesReader()
case *bufio.Reader:
return f.huffmanBufioReader
f.huffmanBufioReader()
case *strings.Reader:
return f.huffmanStringsReader
f.huffmanStringsReader()
case Reader:
return f.huffmanGenericReader
f.huffmanGenericReader()
default:
return f.huffmanGenericReader
f.huffmanGenericReader()
}
}

@ -308,3 +308,401 @@ emitRemainder:
emitLiteral(dst, src[nextEmit:])
}
}
// fastEncL5Window is a level 5 encoder,
// but with a custom window size.
type fastEncL5Window struct {
hist []byte
cur int32
maxOffset int32
table [tableSize]tableEntry
bTable [tableSize]tableEntryPrev
}
func (e *fastEncL5Window) Encode(dst *tokens, src []byte) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
hashShortBytes = 4
)
maxMatchOffset := e.maxOffset
if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
}
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntry{}
}
for i := range e.bTable[:] {
e.bTable[i] = tableEntryPrev{}
}
e.cur = maxMatchOffset
break
}
// Shift down everything in the table that isn't already too far away.
minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
for i := range e.table[:] {
v := e.table[i].offset
if v <= minOff {
v = 0
} else {
v = v - e.cur + maxMatchOffset
}
e.table[i].offset = v
}
for i := range e.bTable[:] {
v := e.bTable[i]
if v.Cur.offset <= minOff {
v.Cur.offset = 0
v.Prev.offset = 0
} else {
v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset
if v.Prev.offset <= minOff {
v.Prev.offset = 0
} else {
v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset
}
}
e.bTable[i] = v
}
e.cur = maxMatchOffset
}
s := e.addBlock(src)
// This check isn't in the Snappy implementation, but there, the caller
// instead of the callee handles this case.
if len(src) < minNonLiteralBlockSize {
// We do not fill the token table.
// This will be picked up by caller.
dst.n = uint16(len(src))
return
}
// Override src
src = e.hist
nextEmit := s
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := int32(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from.
cv := load6432(src, s)
for {
const skipLog = 6
const doEvery = 1
nextS := s
var l int32
var t int32
for {
nextHashS := hashLen(cv, tableBits, hashShortBytes)
nextHashL := hash7(cv, tableBits)
s = nextS
nextS = s + doEvery + (s-nextEmit)>>skipLog
if nextS > sLimit {
goto emitRemainder
}
// Fetch a short+long candidate
sCandidate := e.table[nextHashS]
lCandidate := e.bTable[nextHashL]
next := load6432(src, nextS)
entry := tableEntry{offset: s + e.cur}
e.table[nextHashS] = entry
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = entry, eLong.Cur
nextHashS = hashLen(next, tableBits, hashShortBytes)
nextHashL = hash7(next, tableBits)
t = lCandidate.Cur.offset - e.cur
if s-t < maxMatchOffset {
if uint32(cv) == load3232(src, lCandidate.Cur.offset-e.cur) {
// Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
t2 := lCandidate.Prev.offset - e.cur
if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
l = e.matchlen(s+4, t+4, src) + 4
ml1 := e.matchlen(s+4, t2+4, src) + 4
if ml1 > l {
t = t2
l = ml1
break
}
}
break
}
t = lCandidate.Prev.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
// Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
break
}
}
t = sCandidate.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) {
// Found a 4 match...
l = e.matchlen(s+4, t+4, src) + 4
lCandidate = e.bTable[nextHashL]
// Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
// If the next long is a candidate, use that...
t2 := lCandidate.Cur.offset - e.cur
if nextS-t2 < maxMatchOffset {
if load3232(src, lCandidate.Cur.offset-e.cur) == uint32(next) {
ml := e.matchlen(nextS+4, t2+4, src) + 4
if ml > l {
t = t2
s = nextS
l = ml
break
}
}
// If the previous long is a candidate, use that...
t2 = lCandidate.Prev.offset - e.cur
if nextS-t2 < maxMatchOffset && load3232(src, lCandidate.Prev.offset-e.cur) == uint32(next) {
ml := e.matchlen(nextS+4, t2+4, src) + 4
if ml > l {
t = t2
s = nextS
l = ml
break
}
}
}
break
}
cv = next
}
// A 4-byte match has been found. We'll later see if more than 4 bytes
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
if l == 0 {
// Extend the 4-byte match as long as possible.
l = e.matchlenLong(s+4, t+4, src) + 4
} else if l == maxMatchLength {
l += e.matchlenLong(s+l, t+l, src)
}
// Try to locate a better match by checking the end of best match...
if sAt := s + l; l < 30 && sAt < sLimit {
// Allow some bytes at the beginning to mismatch.
// Sweet spot is 2/3 bytes depending on input.
// 3 is only a little better when it is but sometimes a lot worse.
// The skipped bytes are tested in Extend backwards,
// and still picked up as part of the match if they do.
const skipBeginning = 2
eLong := e.bTable[hash7(load6432(src, sAt), tableBits)].Cur.offset
t2 := eLong - e.cur - l + skipBeginning
s2 := s + skipBeginning
off := s2 - t2
if t2 >= 0 && off < maxMatchOffset && off > 0 {
if l2 := e.matchlenLong(s2, t2, src); l2 > l {
t = t2
l = l2
s = s2
}
}
}
// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
s--
t--
l++
}
if nextEmit < s {
if false {
emitLiteral(dst, src[nextEmit:s])
} else {
for _, v := range src[nextEmit:s] {
dst.tokens[dst.n] = token(v)
dst.litHist[v]++
dst.n++
}
}
}
if debugDeflate {
if t >= s {
panic(fmt.Sprintln("s-t", s, t))
}
if (s - t) > maxMatchOffset {
panic(fmt.Sprintln("mmo", s-t))
}
if l < baseMatchLength {
panic("bml")
}
}
dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
s += l
nextEmit = s
if nextS >= s {
s = nextS + 1
}
if s >= sLimit {
goto emitRemainder
}
// Store every 3rd hash in-between.
if true {
const hashEvery = 3
i := s - l + 1
if i < s-1 {
cv := load6432(src, i)
t := tableEntry{offset: i + e.cur}
e.table[hashLen(cv, tableBits, hashShortBytes)] = t
eLong := &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = t, eLong.Cur
// Do an long at i+1
cv >>= 8
t = tableEntry{offset: t.offset + 1}
eLong = &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = t, eLong.Cur
// We only have enough bits for a short entry at i+2
cv >>= 8
t = tableEntry{offset: t.offset + 1}
e.table[hashLen(cv, tableBits, hashShortBytes)] = t
// Skip one - otherwise we risk hitting 's'
i += 4
for ; i < s-1; i += hashEvery {
cv := load6432(src, i)
t := tableEntry{offset: i + e.cur}
t2 := tableEntry{offset: t.offset + 1}
eLong := &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = t, eLong.Cur
e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2
}
}
}
// We could immediately start working at s now, but to improve
// compression we first update the hash table at s-1 and at s.
x := load6432(src, s-1)
o := e.cur + s - 1
prevHashS := hashLen(x, tableBits, hashShortBytes)
prevHashL := hash7(x, tableBits)
e.table[prevHashS] = tableEntry{offset: o}
eLong := &e.bTable[prevHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: o}, eLong.Cur
cv = x >> 8
}
emitRemainder:
if int(nextEmit) < len(src) {
// If nothing was added, don't encode literals.
if dst.n == 0 {
return
}
emitLiteral(dst, src[nextEmit:])
}
}
// Reset the encoding table.
func (e *fastEncL5Window) Reset() {
// We keep the same allocs, since we are compressing the same block sizes.
if cap(e.hist) < allocHistory {
e.hist = make([]byte, 0, allocHistory)
}
// We offset current position so everything will be out of reach.
// If we are above the buffer reset it will be cleared anyway since len(hist) == 0.
if e.cur <= int32(bufferReset) {
e.cur += e.maxOffset + int32(len(e.hist))
}
e.hist = e.hist[:0]
}
func (e *fastEncL5Window) addBlock(src []byte) int32 {
// check if we have space already
maxMatchOffset := e.maxOffset
if len(e.hist)+len(src) > cap(e.hist) {
if cap(e.hist) == 0 {
e.hist = make([]byte, 0, allocHistory)
} else {
if cap(e.hist) < int(maxMatchOffset*2) {
panic("unexpected buffer size")
}
// Move down
offset := int32(len(e.hist)) - maxMatchOffset
copy(e.hist[0:maxMatchOffset], e.hist[offset:])
e.cur += offset
e.hist = e.hist[:maxMatchOffset]
}
}
s := int32(len(e.hist))
e.hist = append(e.hist, src...)
return s
}
// matchlen will return the match length between offsets and t in src.
// The maximum length returned is maxMatchLength - 4.
// It is assumed that s > t, that t >=0 and s < len(src).
func (e *fastEncL5Window) matchlen(s, t int32, src []byte) int32 {
if debugDecode {
if t >= s {
panic(fmt.Sprint("t >=s:", t, s))
}
if int(s) >= len(src) {
panic(fmt.Sprint("s >= len(src):", s, len(src)))
}
if t < 0 {
panic(fmt.Sprint("t < 0:", t))
}
if s-t > e.maxOffset {
panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")"))
}
}
s1 := int(s) + maxMatchLength - 4
if s1 > len(src) {
s1 = len(src)
}
// Extend the match to be as long as possible.
return int32(matchLen(src[s:s1], src[t:]))
}
// matchlenLong will return the match length between offsets and t in src.
// It is assumed that s > t, that t >=0 and s < len(src).
func (e *fastEncL5Window) matchlenLong(s, t int32, src []byte) int32 {
if debugDeflate {
if t >= s {
panic(fmt.Sprint("t >=s:", t, s))
}
if int(s) >= len(src) {
panic(fmt.Sprint("s >= len(src):", s, len(src)))
}
if t < 0 {
panic(fmt.Sprint("t < 0:", t))
}
if s-t > e.maxOffset {
panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")"))
}
}
// Extend the match to be as long as possible.
return int32(matchLen(src[s:], src[t:]))
}

@ -0,0 +1,16 @@
//go:build amd64 && !appengine && !noasm && gc
// +build amd64,!appengine,!noasm,gc
// Copyright 2019+ Klaus Post. All rights reserved.
// License information can be found in the LICENSE file.
package flate
// matchLen returns how many bytes match in a and b
//
// It assumes that:
//
// len(a) <= len(b) and len(a) > 0
//
//go:noescape
func matchLen(a []byte, b []byte) int

@ -0,0 +1,68 @@
// Copied from S2 implementation.
//go:build !appengine && !noasm && gc && !noasm
#include "textflag.h"
// func matchLen(a []byte, b []byte) int
// Requires: BMI
TEXT ·matchLen(SB), NOSPLIT, $0-56
MOVQ a_base+0(FP), AX
MOVQ b_base+24(FP), CX
MOVQ a_len+8(FP), DX
// matchLen
XORL SI, SI
CMPL DX, $0x08
JB matchlen_match4_standalone
matchlen_loopback_standalone:
MOVQ (AX)(SI*1), BX
XORQ (CX)(SI*1), BX
TESTQ BX, BX
JZ matchlen_loop_standalone
#ifdef GOAMD64_v3
TZCNTQ BX, BX
#else
BSFQ BX, BX
#endif
SARQ $0x03, BX
LEAL (SI)(BX*1), SI
JMP gen_match_len_end
matchlen_loop_standalone:
LEAL -8(DX), DX
LEAL 8(SI), SI
CMPL DX, $0x08
JAE matchlen_loopback_standalone
matchlen_match4_standalone:
CMPL DX, $0x04
JB matchlen_match2_standalone
MOVL (AX)(SI*1), BX
CMPL (CX)(SI*1), BX
JNE matchlen_match2_standalone
LEAL -4(DX), DX
LEAL 4(SI), SI
matchlen_match2_standalone:
CMPL DX, $0x02
JB matchlen_match1_standalone
MOVW (AX)(SI*1), BX
CMPW (CX)(SI*1), BX
JNE matchlen_match1_standalone
LEAL -2(DX), DX
LEAL 2(SI), SI
matchlen_match1_standalone:
CMPL DX, $0x01
JB gen_match_len_end
MOVB (AX)(SI*1), BL
CMPB (CX)(SI*1), BL
JNE gen_match_len_end
INCL SI
gen_match_len_end:
MOVQ SI, ret+48(FP)
RET

@ -0,0 +1,33 @@
//go:build !amd64 || appengine || !gc || noasm
// +build !amd64 appengine !gc noasm
// Copyright 2019+ Klaus Post. All rights reserved.
// License information can be found in the LICENSE file.
package flate
import (
"encoding/binary"
"math/bits"
)
// matchLen returns the maximum common prefix length of a and b.
// a must be the shortest of the two.
func matchLen(a, b []byte) (n int) {
for ; len(a) >= 8 && len(b) >= 8; a, b = a[8:], b[8:] {
diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b)
if diff != 0 {
return n + bits.TrailingZeros64(diff)>>3
}
n += 8
}
for i := range a {
if a[i] != b[i] {
break
}
n++
}
return n
}

@ -152,12 +152,11 @@ func (b *bitWriter) flushAlign() {
// close will write the alignment bit and write the final byte(s)
// to the output.
func (b *bitWriter) close() error {
func (b *bitWriter) close() {
// End mark
b.addBits16Clean(1, 1)
// flush until next byte.
b.flushAlign()
return nil
}
// reset and continue writing by appending to out.

@ -199,7 +199,8 @@ func (s *Scratch) compress(src []byte) error {
c2.flush(s.actualTableLog)
c1.flush(s.actualTableLog)
return s.bw.close()
s.bw.close()
return nil
}
// writeCount will write the normalized histogram count to header.
@ -211,7 +212,7 @@ func (s *Scratch) writeCount() error {
previous0 bool
charnum uint16
maxHeaderSize = ((int(s.symbolLen) * int(tableLog)) >> 3) + 3
maxHeaderSize = ((int(s.symbolLen)*int(tableLog) + 4 + 2) >> 3) + 3
// Write Table Size
bitStream = uint32(tableLog - minTablelog)

@ -106,6 +106,7 @@ func (z *Reader) Reset(r io.Reader) error {
*z = Reader{
decompressor: z.decompressor,
multistream: true,
br: z.br,
}
if rr, ok := r.(flate.Reader); ok {
z.r = rr

@ -74,6 +74,27 @@ func NewWriterLevel(w io.Writer, level int) (*Writer, error) {
return z, nil
}
// MinCustomWindowSize is the minimum window size that can be sent to NewWriterWindow.
const MinCustomWindowSize = flate.MinCustomWindowSize
// MaxCustomWindowSize is the maximum custom window that can be sent to NewWriterWindow.
const MaxCustomWindowSize = flate.MaxCustomWindowSize
// NewWriterWindow returns a new Writer compressing data with a custom window size.
// windowSize must be from MinCustomWindowSize to MaxCustomWindowSize.
func NewWriterWindow(w io.Writer, windowSize int) (*Writer, error) {
if windowSize < MinCustomWindowSize {
return nil, errors.New("gzip: requested window size less than MinWindowSize")
}
if windowSize > MaxCustomWindowSize {
return nil, errors.New("gzip: requested window size bigger than MaxCustomWindowSize")
}
z := new(Writer)
z.init(w, -windowSize)
return z, nil
}
func (z *Writer) init(w io.Writer, level int) {
compressor := z.compressor
if level != StatelessCompression {

@ -94,10 +94,9 @@ func (b *bitWriter) flushAlign() {
// close will write the alignment bit and write the final byte(s)
// to the output.
func (b *bitWriter) close() error {
func (b *bitWriter) close() {
// End mark
b.addBits16Clean(1, 1)
// flush until next byte.
b.flushAlign()
return nil
}

@ -227,10 +227,10 @@ func EstimateSizes(in []byte, s *Scratch) (tableSz, dataSz, reuseSz int, err err
}
func (s *Scratch) compress1X(src []byte) ([]byte, error) {
return s.compress1xDo(s.Out, src)
return s.compress1xDo(s.Out, src), nil
}
func (s *Scratch) compress1xDo(dst, src []byte) ([]byte, error) {
func (s *Scratch) compress1xDo(dst, src []byte) []byte {
var bw = bitWriter{out: dst}
// N is length divisible by 4.
@ -260,8 +260,8 @@ func (s *Scratch) compress1xDo(dst, src []byte) ([]byte, error) {
bw.encTwoSymbols(cTable, tmp[1], tmp[0])
}
}
err := bw.close()
return bw.out, err
bw.close()
return bw.out
}
var sixZeros [6]byte
@ -283,12 +283,8 @@ func (s *Scratch) compress4X(src []byte) ([]byte, error) {
}
src = src[len(toDo):]
var err error
idx := len(s.Out)
s.Out, err = s.compress1xDo(s.Out, toDo)
if err != nil {
return nil, err
}
s.Out = s.compress1xDo(s.Out, toDo)
if len(s.Out)-idx > math.MaxUint16 {
// We cannot store the size in the jump table
return nil, ErrIncompressible
@ -315,7 +311,6 @@ func (s *Scratch) compress4Xp(src []byte) ([]byte, error) {
segmentSize := (len(src) + 3) / 4
var wg sync.WaitGroup
var errs [4]error
wg.Add(4)
for i := 0; i < 4; i++ {
toDo := src
@ -326,15 +321,12 @@ func (s *Scratch) compress4Xp(src []byte) ([]byte, error) {
// Separate goroutine for each block.
go func(i int) {
s.tmpOut[i], errs[i] = s.compress1xDo(s.tmpOut[i][:0], toDo)
s.tmpOut[i] = s.compress1xDo(s.tmpOut[i][:0], toDo)
wg.Done()
}(i)
}
wg.Wait()
for i := 0; i < 4; i++ {
if errs[i] != nil {
return nil, errs[i]
}
o := s.tmpOut[i]
if len(o) > math.MaxUint16 {
// We cannot store the size in the jump table

@ -106,6 +106,25 @@ func MakeDict(data []byte, searchStart []byte) *Dict {
return &d
}
// MakeDictManual will create a dictionary.
// 'data' must be at least MinDictSize and less than or equal to MaxDictSize.
// A manual first repeat index into data must be provided.
// It must be less than len(data)-8.
func MakeDictManual(data []byte, firstIdx uint16) *Dict {
if len(data) < MinDictSize || int(firstIdx) >= len(data)-8 || len(data) > MaxDictSize {
return nil
}
var d Dict
dict := data
d.dict = dict
if cap(d.dict) < len(d.dict)+16 {
d.dict = append(make([]byte, 0, len(d.dict)+16), d.dict...)
}
d.repeat = int(firstIdx)
return &d
}
// Encode returns the encoded form of src. The returned slice may be a sub-
// slice of dst if dst was large enough to hold the entire encoded block.
// Otherwise, a newly allocated slice will be returned.

@ -57,7 +57,7 @@ func Encode(dst, src []byte) []byte {
// The function returns -1 if no improvement could be achieved.
// Using actual compression will most often produce better compression than the estimate.
func EstimateBlockSize(src []byte) (d int) {
if len(src) < 6 || int64(len(src)) > 0xffffffff {
if len(src) <= inputMargin || int64(len(src)) > 0xffffffff {
return -1
}
if len(src) <= 1024 {

@ -157,6 +157,9 @@ func encodeBlockBest(dst, src []byte, dict *Dict) (d int) {
return m
}
matchDict := func(candidate, s int, first uint32, rep bool) match {
if s >= MaxDictSrcOffset {
return match{offset: candidate, s: s}
}
// Calculate offset as if in continuous array with s
offset := -len(dict.dict) + candidate
if best.length != 0 && best.s-best.offset == s-offset && !rep {

@ -316,6 +316,7 @@ func matchLen(a []byte, b []byte) int {
return len(a) + checked
}
// input must be > inputMargin
func calcBlockSize(src []byte) (d int) {
// Initialize the hash table.
const (
@ -501,6 +502,7 @@ emitRemainder:
return d
}
// length must be > inputMargin.
func calcBlockSizeSmall(src []byte) (d int) {
// Initialize the hash table.
const (

File diff suppressed because it is too large Load Diff

@ -511,24 +511,22 @@ func IndexStream(r io.Reader) ([]byte, error) {
// JSON returns the index as JSON text.
func (i *Index) JSON() []byte {
type offset struct {
CompressedOffset int64 `json:"compressed"`
UncompressedOffset int64 `json:"uncompressed"`
}
x := struct {
TotalUncompressed int64 `json:"total_uncompressed"` // Total Uncompressed size if known. Will be -1 if unknown.
TotalCompressed int64 `json:"total_compressed"` // Total Compressed size if known. Will be -1 if unknown.
Offsets []struct {
CompressedOffset int64 `json:"compressed"`
UncompressedOffset int64 `json:"uncompressed"`
} `json:"offsets"`
EstBlockUncomp int64 `json:"est_block_uncompressed"`
TotalUncompressed int64 `json:"total_uncompressed"` // Total Uncompressed size if known. Will be -1 if unknown.
TotalCompressed int64 `json:"total_compressed"` // Total Compressed size if known. Will be -1 if unknown.
Offsets []offset `json:"offsets"`
EstBlockUncomp int64 `json:"est_block_uncompressed"`
}{
TotalUncompressed: i.TotalUncompressed,
TotalCompressed: i.TotalCompressed,
EstBlockUncomp: i.estBlockUncomp,
}
for _, v := range i.info {
x.Offsets = append(x.Offsets, struct {
CompressedOffset int64 `json:"compressed"`
UncompressedOffset int64 `json:"uncompressed"`
}{CompressedOffset: v.compressedOffset, UncompressedOffset: v.uncompressedOffset})
x.Offsets = append(x.Offsets, offset{CompressedOffset: v.compressedOffset, UncompressedOffset: v.uncompressedOffset})
}
b, _ := json.MarshalIndent(x, "", " ")
return b

@ -17,7 +17,6 @@ import (
// for aligning the input.
type bitReader struct {
in []byte
off uint // next byte to read is at in[off - 1]
value uint64 // Maybe use [16]byte, but shifting is awkward.
bitsRead uint8
}
@ -28,7 +27,6 @@ func (b *bitReader) init(in []byte) error {
return errors.New("corrupt stream: too short")
}
b.in = in
b.off = uint(len(in))
// The highest bit of the last byte indicates where to start
v := in[len(in)-1]
if v == 0 {
@ -69,21 +67,19 @@ func (b *bitReader) fillFast() {
if b.bitsRead < 32 {
return
}
// 2 bounds checks.
v := b.in[b.off-4:]
v = v[:4]
v := b.in[len(b.in)-4:]
b.in = b.in[:len(b.in)-4]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
b.value = (b.value << 32) | uint64(low)
b.bitsRead -= 32
b.off -= 4
}
// fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read.
func (b *bitReader) fillFastStart() {
// Do single re-slice to avoid bounds checks.
b.value = binary.LittleEndian.Uint64(b.in[b.off-8:])
v := b.in[len(b.in)-8:]
b.in = b.in[:len(b.in)-8]
b.value = binary.LittleEndian.Uint64(v)
b.bitsRead = 0
b.off -= 8
}
// fill() will make sure at least 32 bits are available.
@ -91,25 +87,25 @@ func (b *bitReader) fill() {
if b.bitsRead < 32 {
return
}
if b.off >= 4 {
v := b.in[b.off-4:]
v = v[:4]
if len(b.in) >= 4 {
v := b.in[len(b.in)-4:]
b.in = b.in[:len(b.in)-4]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
b.value = (b.value << 32) | uint64(low)
b.bitsRead -= 32
b.off -= 4
return
}
for b.off > 0 {
b.value = (b.value << 8) | uint64(b.in[b.off-1])
b.bitsRead -= 8
b.off--
b.bitsRead -= uint8(8 * len(b.in))
for len(b.in) > 0 {
b.value = (b.value << 8) | uint64(b.in[len(b.in)-1])
b.in = b.in[:len(b.in)-1]
}
}
// finished returns true if all bits have been read from the bit stream.
func (b *bitReader) finished() bool {
return b.off == 0 && b.bitsRead >= 64
return len(b.in) == 0 && b.bitsRead >= 64
}
// overread returns true if more bits have been requested than is on the stream.
@ -119,7 +115,7 @@ func (b *bitReader) overread() bool {
// remain returns the number of bits remaining.
func (b *bitReader) remain() uint {
return b.off*8 + 64 - uint(b.bitsRead)
return 8*uint(len(b.in)) + 64 - uint(b.bitsRead)
}
// close the bitstream and returns an error if out-of-buffer reads occurred.

@ -97,12 +97,11 @@ func (b *bitWriter) flushAlign() {
// close will write the alignment bit and write the final byte(s)
// to the output.
func (b *bitWriter) close() error {
func (b *bitWriter) close() {
// End mark
b.addBits16Clean(1, 1)
// flush until next byte.
b.flushAlign()
return nil
}
// reset and continue writing by appending to out.

@ -361,14 +361,21 @@ func (b *blockEnc) encodeLits(lits []byte, raw bool) error {
if len(lits) >= 1024 {
// Use 4 Streams.
out, reUsed, err = huff0.Compress4X(lits, b.litEnc)
} else if len(lits) > 32 {
} else if len(lits) > 16 {
// Use 1 stream
single = true
out, reUsed, err = huff0.Compress1X(lits, b.litEnc)
} else {
err = huff0.ErrIncompressible
}
if err == nil && len(out)+5 > len(lits) {
// If we are close, we may still be worse or equal to raw.
var lh literalsHeader
lh.setSizes(len(out), len(lits), single)
if len(out)+lh.size() >= len(lits) {
err = huff0.ErrIncompressible
}
}
switch err {
case huff0.ErrIncompressible:
if debugEncoder {
@ -503,7 +510,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
if len(b.literals) >= 1024 && !raw {
// Use 4 Streams.
out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc)
} else if len(b.literals) > 32 && !raw {
} else if len(b.literals) > 16 && !raw {
// Use 1 stream
single = true
out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc)
@ -511,6 +518,17 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
err = huff0.ErrIncompressible
}
if err == nil && len(out)+5 > len(b.literals) {
// If we are close, we may still be worse or equal to raw.
var lh literalsHeader
lh.setSize(len(b.literals))
szRaw := lh.size()
lh.setSizes(len(out), len(b.literals), single)
szComp := lh.size()
if len(out)+szComp >= len(b.literals)+szRaw {
err = huff0.ErrIncompressible
}
}
switch err {
case huff0.ErrIncompressible:
lh.setType(literalsBlockRaw)
@ -773,10 +791,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
ml.flush(mlEnc.actualTableLog)
of.flush(ofEnc.actualTableLog)
ll.flush(llEnc.actualTableLog)
err = wr.close()
if err != nil {
return err
}
wr.close()
b.output = wr.out
// Maybe even add a bigger margin.

@ -1,10 +1,13 @@
package zstd
import (
"bytes"
"encoding/binary"
"errors"
"fmt"
"io"
"math"
"sort"
"github.com/klauspost/compress/huff0"
)
@ -14,9 +17,8 @@ type dict struct {
litEnc *huff0.Scratch
llDec, ofDec, mlDec sequenceDec
//llEnc, ofEnc, mlEnc []*fseEncoder
offsets [3]int
content []byte
offsets [3]int
content []byte
}
const dictMagic = "\x37\xa4\x30\xec"
@ -159,3 +161,374 @@ func InspectDictionary(b []byte) (interface {
d, err := loadDict(b)
return d, err
}
type BuildDictOptions struct {
// Dictionary ID.
ID uint32
// Content to use to create dictionary tables.
Contents [][]byte
// History to use for all blocks.
History []byte
// Offsets to use.
Offsets [3]int
// CompatV155 will make the dictionary compatible with Zstd v1.5.5 and earlier.
// See https://github.com/facebook/zstd/issues/3724
CompatV155 bool
// Use the specified encoder level.
// The dictionary will be built using the specified encoder level,
// which will reflect speed and make the dictionary tailored for that level.
// If not set SpeedBestCompression will be used.
Level EncoderLevel
// DebugOut will write stats and other details here if set.
DebugOut io.Writer
}
func BuildDict(o BuildDictOptions) ([]byte, error) {
initPredefined()
hist := o.History
contents := o.Contents
debug := o.DebugOut != nil
println := func(args ...interface{}) {
if o.DebugOut != nil {
fmt.Fprintln(o.DebugOut, args...)
}
}
printf := func(s string, args ...interface{}) {
if o.DebugOut != nil {
fmt.Fprintf(o.DebugOut, s, args...)
}
}
print := func(args ...interface{}) {
if o.DebugOut != nil {
fmt.Fprint(o.DebugOut, args...)
}
}
if int64(len(hist)) > dictMaxLength {
return nil, fmt.Errorf("dictionary of size %d > %d", len(hist), int64(dictMaxLength))
}
if len(hist) < 8 {
return nil, fmt.Errorf("dictionary of size %d < %d", len(hist), 8)
}
if len(contents) == 0 {
return nil, errors.New("no content provided")
}
d := dict{
id: o.ID,
litEnc: nil,
llDec: sequenceDec{},
ofDec: sequenceDec{},
mlDec: sequenceDec{},
offsets: o.Offsets,
content: hist,
}
block := blockEnc{lowMem: false}
block.init()
enc := encoder(&bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(maxMatchLen), bufferReset: math.MaxInt32 - int32(maxMatchLen*2), lowMem: false}})
if o.Level != 0 {
eOpts := encoderOptions{
level: o.Level,
blockSize: maxMatchLen,
windowSize: maxMatchLen,
dict: &d,
lowMem: false,
}
enc = eOpts.encoder()
} else {
o.Level = SpeedBestCompression
}
var (
remain [256]int
ll [256]int
ml [256]int
of [256]int
)
addValues := func(dst *[256]int, src []byte) {
for _, v := range src {
dst[v]++
}
}
addHist := func(dst *[256]int, src *[256]uint32) {
for i, v := range src {
dst[i] += int(v)
}
}
seqs := 0
nUsed := 0
litTotal := 0
newOffsets := make(map[uint32]int, 1000)
for _, b := range contents {
block.reset(nil)
if len(b) < 8 {
continue
}
nUsed++
enc.Reset(&d, true)
enc.Encode(&block, b)
addValues(&remain, block.literals)
litTotal += len(block.literals)
seqs += len(block.sequences)
block.genCodes()
addHist(&ll, block.coders.llEnc.Histogram())
addHist(&ml, block.coders.mlEnc.Histogram())
addHist(&of, block.coders.ofEnc.Histogram())
for i, seq := range block.sequences {
if i > 3 {
break
}
offset := seq.offset
if offset == 0 {
continue
}
if offset > 3 {
newOffsets[offset-3]++
} else {
newOffsets[uint32(o.Offsets[offset-1])]++
}
}
}
// Find most used offsets.
var sortedOffsets []uint32
for k := range newOffsets {
sortedOffsets = append(sortedOffsets, k)
}
sort.Slice(sortedOffsets, func(i, j int) bool {
a, b := sortedOffsets[i], sortedOffsets[j]
if a == b {
// Prefer the longer offset
return sortedOffsets[i] > sortedOffsets[j]
}
return newOffsets[sortedOffsets[i]] > newOffsets[sortedOffsets[j]]
})
if len(sortedOffsets) > 3 {
if debug {
print("Offsets:")
for i, v := range sortedOffsets {
if i > 20 {
break
}
printf("[%d: %d],", v, newOffsets[v])
}
println("")
}
sortedOffsets = sortedOffsets[:3]
}
for i, v := range sortedOffsets {
o.Offsets[i] = int(v)
}
if debug {
println("New repeat offsets", o.Offsets)
}
if nUsed == 0 || seqs == 0 {
return nil, fmt.Errorf("%d blocks, %d sequences found", nUsed, seqs)
}
if debug {
println("Sequences:", seqs, "Blocks:", nUsed, "Literals:", litTotal)
}
if seqs/nUsed < 512 {
// Use 512 as minimum.
nUsed = seqs / 512
}
copyHist := func(dst *fseEncoder, src *[256]int) ([]byte, error) {
hist := dst.Histogram()
var maxSym uint8
var maxCount int
var fakeLength int
for i, v := range src {
if v > 0 {
v = v / nUsed
if v == 0 {
v = 1
}
}
if v > maxCount {
maxCount = v
}
if v != 0 {
maxSym = uint8(i)
}
fakeLength += v
hist[i] = uint32(v)
}
dst.HistogramFinished(maxSym, maxCount)
dst.reUsed = false
dst.useRLE = false
err := dst.normalizeCount(fakeLength)
if err != nil {
return nil, err
}
if debug {
println("RAW:", dst.count[:maxSym+1], "NORM:", dst.norm[:maxSym+1], "LEN:", fakeLength)
}
return dst.writeCount(nil)
}
if debug {
print("Literal lengths: ")
}
llTable, err := copyHist(block.coders.llEnc, &ll)
if err != nil {
return nil, err
}
if debug {
print("Match lengths: ")
}
mlTable, err := copyHist(block.coders.mlEnc, &ml)
if err != nil {
return nil, err
}
if debug {
print("Offsets: ")
}
ofTable, err := copyHist(block.coders.ofEnc, &of)
if err != nil {
return nil, err
}
// Literal table
avgSize := litTotal
if avgSize > huff0.BlockSizeMax/2 {
avgSize = huff0.BlockSizeMax / 2
}
huffBuff := make([]byte, 0, avgSize)
// Target size
div := litTotal / avgSize
if div < 1 {
div = 1
}
if debug {
println("Huffman weights:")
}
for i, n := range remain[:] {
if n > 0 {
n = n / div
// Allow all entries to be represented.
if n == 0 {
n = 1
}
huffBuff = append(huffBuff, bytes.Repeat([]byte{byte(i)}, n)...)
if debug {
printf("[%d: %d], ", i, n)
}
}
}
if o.CompatV155 && remain[255]/div == 0 {
huffBuff = append(huffBuff, 255)
}
scratch := &huff0.Scratch{TableLog: 11}
for tries := 0; tries < 255; tries++ {
scratch = &huff0.Scratch{TableLog: 11}
_, _, err = huff0.Compress1X(huffBuff, scratch)
if err == nil {
break
}
if debug {
printf("Try %d: Huffman error: %v\n", tries+1, err)
}
huffBuff = huffBuff[:0]
if tries == 250 {
if debug {
println("Huffman: Bailing out with predefined table")
}
// Bail out.... Just generate something
huffBuff = append(huffBuff, bytes.Repeat([]byte{255}, 10000)...)
for i := 0; i < 128; i++ {
huffBuff = append(huffBuff, byte(i))
}
continue
}
if errors.Is(err, huff0.ErrIncompressible) {
// Try truncating least common.
for i, n := range remain[:] {
if n > 0 {
n = n / (div * (i + 1))
if n > 0 {
huffBuff = append(huffBuff, bytes.Repeat([]byte{byte(i)}, n)...)
}
}
}
if o.CompatV155 && len(huffBuff) > 0 && huffBuff[len(huffBuff)-1] != 255 {
huffBuff = append(huffBuff, 255)
}
if len(huffBuff) == 0 {
huffBuff = append(huffBuff, 0, 255)
}
}
if errors.Is(err, huff0.ErrUseRLE) {
for i, n := range remain[:] {
n = n / (div * (i + 1))
// Allow all entries to be represented.
if n == 0 {
n = 1
}
huffBuff = append(huffBuff, bytes.Repeat([]byte{byte(i)}, n)...)
}
}
}
var out bytes.Buffer
out.Write([]byte(dictMagic))
out.Write(binary.LittleEndian.AppendUint32(nil, o.ID))
out.Write(scratch.OutTable)
if debug {
println("huff table:", len(scratch.OutTable), "bytes")
println("of table:", len(ofTable), "bytes")
println("ml table:", len(mlTable), "bytes")
println("ll table:", len(llTable), "bytes")
}
out.Write(ofTable)
out.Write(mlTable)
out.Write(llTable)
out.Write(binary.LittleEndian.AppendUint32(nil, uint32(o.Offsets[0])))
out.Write(binary.LittleEndian.AppendUint32(nil, uint32(o.Offsets[1])))
out.Write(binary.LittleEndian.AppendUint32(nil, uint32(o.Offsets[2])))
out.Write(hist)
if debug {
_, err := loadDict(out.Bytes())
if err != nil {
panic(err)
}
i, err := InspectDictionary(out.Bytes())
if err != nil {
panic(err)
}
println("ID:", i.ID())
println("Content size:", i.ContentSize())
println("Encoder:", i.LitEncoder() != nil)
println("Offsets:", i.Offsets())
var totalSize int
for _, b := range contents {
totalSize += len(b)
}
encWith := func(opts ...EOption) int {
enc, err := NewWriter(nil, opts...)
if err != nil {
panic(err)
}
defer enc.Close()
var dst []byte
var totalSize int
for _, b := range contents {
dst = enc.EncodeAll(b, dst[:0])
totalSize += len(dst)
}
return totalSize
}
plain := encWith(WithEncoderLevel(o.Level))
withDict := encWith(WithEncoderLevel(o.Level), WithEncoderDict(out.Bytes()))
println("Input size:", totalSize)
println("Plain Compressed:", plain)
println("Dict Compressed:", withDict)
println("Saved:", plain-withDict, (plain-withDict)/len(contents), "bytes per input (rounded down)")
}
return out.Bytes(), nil
}

@ -43,7 +43,7 @@ func (m *match) estBits(bitsPerByte int32) {
if m.rep < 0 {
ofc = ofCode(uint32(m.s-m.offset) + 3)
} else {
ofc = ofCode(uint32(m.rep))
ofc = ofCode(uint32(m.rep) & 3)
}
// Cost, excluding
ofTT, mlTT := fsePredefEnc[tableOffsets].ct.symbolTT[ofc], fsePredefEnc[tableMatchLengths].ct.symbolTT[mlc]
@ -197,12 +197,13 @@ encodeLoop:
// Set m to a match at offset if it looks like that will improve compression.
improve := func(m *match, offset int32, s int32, first uint32, rep int32) {
if s-offset >= e.maxMatchOff || load3232(src, offset) != first {
delta := s - offset
if delta >= e.maxMatchOff || delta <= 0 || load3232(src, offset) != first {
return
}
if debugAsserts {
if offset <= 0 {
panic(offset)
if offset >= s {
panic(fmt.Sprintf("offset: %d - s:%d - rep: %d - cur :%d - max: %d", offset, s, rep, e.cur, e.maxMatchOff))
}
if !bytes.Equal(src[s:s+4], src[offset:offset+4]) {
panic(fmt.Sprintf("first match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first))
@ -226,7 +227,7 @@ encodeLoop:
}
}
l := 4 + e.matchlen(s+4, offset+4, src)
if rep < 0 {
if true {
// Extend candidate match backwards as far as possible.
tMin := s - e.maxMatchOff
if tMin < 0 {
@ -281,6 +282,7 @@ encodeLoop:
// Load next and check...
e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: candidateL.offset}
e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: candidateS.offset}
index0 := s + 1
// Look far ahead, unless we have a really long match already...
if best.length < goodEnough {
@ -343,8 +345,8 @@ encodeLoop:
if best.rep > 0 {
var seq seq
seq.matchLen = uint32(best.length - zstdMinMatch)
if debugAsserts && s <= nextEmit {
panic("s <= nextEmit")
if debugAsserts && s < nextEmit {
panic("s < nextEmit")
}
addLiterals(&seq, best.s)
@ -356,19 +358,16 @@ encodeLoop:
blk.sequences = append(blk.sequences, seq)
// Index old s + 1 -> s - 1
index0 := s + 1
s = best.s + best.length
nextEmit = s
if s >= sLimit {
if debugEncoder {
println("repeat ended", s, best.length)
}
break encodeLoop
}
// Index skipped...
end := s
if s > sLimit+4 {
end = sLimit + 4
}
off := index0 + e.cur
for index0 < s {
for index0 < end {
cv0 := load6432(src, index0)
h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
@ -377,6 +376,7 @@ encodeLoop:
off++
index0++
}
switch best.rep {
case 2, 4 | 1:
offset1, offset2 = offset2, offset1
@ -385,12 +385,17 @@ encodeLoop:
case 4 | 3:
offset1, offset2, offset3 = offset1-1, offset1, offset2
}
if s >= sLimit {
if debugEncoder {
println("repeat ended", s, best.length)
}
break encodeLoop
}
continue
}
// A 4-byte match has been found. Update recent offsets.
// We'll later see if more than 4 bytes.
index0 := s + 1
s = best.s
t := best.offset
offset1, offset2, offset3 = s-t, offset1, offset2
@ -418,19 +423,25 @@ encodeLoop:
}
blk.sequences = append(blk.sequences, seq)
nextEmit = s
if s >= sLimit {
break encodeLoop
// Index old s + 1 -> s - 1 or sLimit
end := s
if s > sLimit-4 {
end = sLimit - 4
}
// Index old s + 1 -> s - 1
for index0 < s {
off := index0 + e.cur
for index0 < end {
cv0 := load6432(src, index0)
h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset}
index0++
off++
}
if s >= sLimit {
break encodeLoop
}
}

@ -145,7 +145,7 @@ encodeLoop:
var t int32
// We allow the encoder to optionally turn off repeat offsets across blocks
canRepeat := len(blk.sequences) > 2
var matched int32
var matched, index0 int32
for {
if debugAsserts && canRepeat && offset1 == 0 {
@ -162,6 +162,7 @@ encodeLoop:
off := s + e.cur
e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset}
e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
index0 = s + 1
if canRepeat {
if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
@ -258,7 +259,6 @@ encodeLoop:
}
blk.sequences = append(blk.sequences, seq)
index0 := s + repOff2
s += lenght + repOff2
nextEmit = s
if s >= sLimit {
@ -498,15 +498,15 @@ encodeLoop:
}
// Index match start+1 (long) -> s - 1
index0 := s - l + 1
off := index0 + e.cur
for index0 < s-1 {
cv0 := load6432(src, index0)
cv1 := cv0 >> 8
h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)}
index0 += 2
off += 2
}
cv = load6432(src, s)
@ -672,7 +672,7 @@ encodeLoop:
var t int32
// We allow the encoder to optionally turn off repeat offsets across blocks
canRepeat := len(blk.sequences) > 2
var matched int32
var matched, index0 int32
for {
if debugAsserts && canRepeat && offset1 == 0 {
@ -691,6 +691,7 @@ encodeLoop:
e.markLongShardDirty(nextHashL)
e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
e.markShortShardDirty(nextHashS)
index0 = s + 1
if canRepeat {
if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
@ -726,7 +727,6 @@ encodeLoop:
blk.sequences = append(blk.sequences, seq)
// Index match start+1 (long) -> s - 1
index0 := s + repOff
s += lenght + repOff
nextEmit = s
@ -790,7 +790,6 @@ encodeLoop:
}
blk.sequences = append(blk.sequences, seq)
index0 := s + repOff2
s += lenght + repOff2
nextEmit = s
if s >= sLimit {
@ -1024,18 +1023,18 @@ encodeLoop:
}
// Index match start+1 (long) -> s - 1
index0 := s - l + 1
off := index0 + e.cur
for index0 < s-1 {
cv0 := load6432(src, index0)
cv1 := cv0 >> 8
h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.markLongShardDirty(h0)
h1 := hashLen(cv1, betterShortTableBits, betterShortLen)
e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
e.markShortShardDirty(h1)
index0 += 2
off += 2
}
cv = load6432(src, s)

@ -227,10 +227,7 @@ func (e *Encoder) nextBlock(final bool) error {
DictID: e.o.dict.ID(),
}
dst, err := fh.appendTo(tmp[:0])
if err != nil {
return err
}
dst := fh.appendTo(tmp[:0])
s.headerWritten = true
s.wWg.Wait()
var n2 int
@ -483,7 +480,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
Checksum: false,
DictID: 0,
}
dst, _ = fh.appendTo(dst)
dst = fh.appendTo(dst)
// Write raw block as last one only.
var blk blockHeader
@ -518,10 +515,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
if len(dst) == 0 && cap(dst) == 0 && len(src) < 1<<20 && !e.o.lowMem {
dst = make([]byte, 0, len(src))
}
dst, err := fh.appendTo(dst)
if err != nil {
panic(err)
}
dst = fh.appendTo(dst)
// If we can do everything in one block, prefer that.
if len(src) <= e.o.blockSize {
@ -581,6 +575,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
// Add padding with content from crypto/rand.Reader
if e.o.pad > 0 {
add := calcSkippableFrame(int64(len(dst)), int64(e.o.pad))
var err error
dst, err = skippableFrame(dst, add, rand.Reader)
if err != nil {
panic(err)

@ -22,7 +22,7 @@ type frameHeader struct {
const maxHeaderSize = 14
func (f frameHeader) appendTo(dst []byte) ([]byte, error) {
func (f frameHeader) appendTo(dst []byte) []byte {
dst = append(dst, frameMagic...)
var fhd uint8
if f.Checksum {
@ -88,7 +88,7 @@ func (f frameHeader) appendTo(dst []byte) ([]byte, error) {
default:
panic("invalid fcs")
}
return dst, nil
return dst
}
const skippableFrameHeader = 4 + 4

@ -245,7 +245,7 @@ func (s *sequenceDecs) decodeSync(hist []byte) error {
return io.ErrUnexpectedEOF
}
var ll, mo, ml int
if br.off > 4+((maxOffsetBits+16+16)>>3) {
if len(br.in) > 4+((maxOffsetBits+16+16)>>3) {
// inlined function:
// ll, mo, ml = s.nextFast(br, llState, mlState, ofState)
@ -452,18 +452,13 @@ func (s *sequenceDecs) next(br *bitReader, llState, mlState, ofState decSymbol)
// extra bits are stored in reverse order.
br.fill()
if s.maxBits <= 32 {
mo += br.getBits(moB)
ml += br.getBits(mlB)
ll += br.getBits(llB)
} else {
mo += br.getBits(moB)
mo += br.getBits(moB)
if s.maxBits > 32 {
br.fill()
// matchlength+literal length, max 32 bits
ml += br.getBits(mlB)
ll += br.getBits(llB)
}
// matchlength+literal length, max 32 bits
ml += br.getBits(mlB)
ll += br.getBits(llB)
mo = s.adjustOffset(mo, ll, moB)
return
}

@ -5,11 +5,11 @@
// func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// Requires: CMOV
TEXT ·sequenceDecs_decode_amd64(SB), $8-32
MOVQ br+8(FP), AX
MOVQ 32(AX), DX
MOVBQZX 40(AX), BX
MOVQ 24(AX), SI
MOVQ (AX), AX
MOVQ br+8(FP), CX
MOVQ 24(CX), DX
MOVBQZX 32(CX), BX
MOVQ (CX), AX
MOVQ 8(CX), SI
ADDQ SI, AX
MOVQ AX, (SP)
MOVQ ctx+16(FP), AX
@ -301,9 +301,9 @@ sequenceDecs_decode_amd64_match_len_ofs_ok:
MOVQ R12, 152(AX)
MOVQ R13, 160(AX)
MOVQ br+8(FP), AX
MOVQ DX, 32(AX)
MOVB BL, 40(AX)
MOVQ SI, 24(AX)
MOVQ DX, 24(AX)
MOVB BL, 32(AX)
MOVQ SI, 8(AX)
// Return success
MOVQ $0x00000000, ret+24(FP)
@ -336,11 +336,11 @@ error_overread:
// func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// Requires: CMOV
TEXT ·sequenceDecs_decode_56_amd64(SB), $8-32
MOVQ br+8(FP), AX
MOVQ 32(AX), DX
MOVBQZX 40(AX), BX
MOVQ 24(AX), SI
MOVQ (AX), AX
MOVQ br+8(FP), CX
MOVQ 24(CX), DX
MOVBQZX 32(CX), BX
MOVQ (CX), AX
MOVQ 8(CX), SI
ADDQ SI, AX
MOVQ AX, (SP)
MOVQ ctx+16(FP), AX
@ -603,9 +603,9 @@ sequenceDecs_decode_56_amd64_match_len_ofs_ok:
MOVQ R12, 152(AX)
MOVQ R13, 160(AX)
MOVQ br+8(FP), AX
MOVQ DX, 32(AX)
MOVB BL, 40(AX)
MOVQ SI, 24(AX)
MOVQ DX, 24(AX)
MOVB BL, 32(AX)
MOVQ SI, 8(AX)
// Return success
MOVQ $0x00000000, ret+24(FP)
@ -638,11 +638,11 @@ error_overread:
// func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// Requires: BMI, BMI2, CMOV
TEXT ·sequenceDecs_decode_bmi2(SB), $8-32
MOVQ br+8(FP), CX
MOVQ 32(CX), AX
MOVBQZX 40(CX), DX
MOVQ 24(CX), BX
MOVQ (CX), CX
MOVQ br+8(FP), BX
MOVQ 24(BX), AX
MOVBQZX 32(BX), DX
MOVQ (BX), CX
MOVQ 8(BX), BX
ADDQ BX, CX
MOVQ CX, (SP)
MOVQ ctx+16(FP), CX
@ -892,9 +892,9 @@ sequenceDecs_decode_bmi2_match_len_ofs_ok:
MOVQ R11, 152(CX)
MOVQ R12, 160(CX)
MOVQ br+8(FP), CX
MOVQ AX, 32(CX)
MOVB DL, 40(CX)
MOVQ BX, 24(CX)
MOVQ AX, 24(CX)
MOVB DL, 32(CX)
MOVQ BX, 8(CX)
// Return success
MOVQ $0x00000000, ret+24(FP)
@ -927,11 +927,11 @@ error_overread:
// func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// Requires: BMI, BMI2, CMOV
TEXT ·sequenceDecs_decode_56_bmi2(SB), $8-32
MOVQ br+8(FP), CX
MOVQ 32(CX), AX
MOVBQZX 40(CX), DX
MOVQ 24(CX), BX
MOVQ (CX), CX
MOVQ br+8(FP), BX
MOVQ 24(BX), AX
MOVBQZX 32(BX), DX
MOVQ (BX), CX
MOVQ 8(BX), BX
ADDQ BX, CX
MOVQ CX, (SP)
MOVQ ctx+16(FP), CX
@ -1152,9 +1152,9 @@ sequenceDecs_decode_56_bmi2_match_len_ofs_ok:
MOVQ R11, 152(CX)
MOVQ R12, 160(CX)
MOVQ br+8(FP), CX
MOVQ AX, 32(CX)
MOVB DL, 40(CX)
MOVQ BX, 24(CX)
MOVQ AX, 24(CX)
MOVB DL, 32(CX)
MOVQ BX, 8(CX)
// Return success
MOVQ $0x00000000, ret+24(FP)
@ -1797,11 +1797,11 @@ empty_seqs:
// func sequenceDecs_decodeSync_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
// Requires: CMOV, SSE
TEXT ·sequenceDecs_decodeSync_amd64(SB), $64-32
MOVQ br+8(FP), AX
MOVQ 32(AX), DX
MOVBQZX 40(AX), BX
MOVQ 24(AX), SI
MOVQ (AX), AX
MOVQ br+8(FP), CX
MOVQ 24(CX), DX
MOVBQZX 32(CX), BX
MOVQ (CX), AX
MOVQ 8(CX), SI
ADDQ SI, AX
MOVQ AX, (SP)
MOVQ ctx+16(FP), AX
@ -2295,9 +2295,9 @@ handle_loop:
loop_finished:
MOVQ br+8(FP), AX
MOVQ DX, 32(AX)
MOVB BL, 40(AX)
MOVQ SI, 24(AX)
MOVQ DX, 24(AX)
MOVB BL, 32(AX)
MOVQ SI, 8(AX)
// Update the context
MOVQ ctx+16(FP), AX
@ -2362,11 +2362,11 @@ error_not_enough_space:
// func sequenceDecs_decodeSync_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
// Requires: BMI, BMI2, CMOV, SSE
TEXT ·sequenceDecs_decodeSync_bmi2(SB), $64-32
MOVQ br+8(FP), CX
MOVQ 32(CX), AX
MOVBQZX 40(CX), DX
MOVQ 24(CX), BX
MOVQ (CX), CX
MOVQ br+8(FP), BX
MOVQ 24(BX), AX
MOVBQZX 32(BX), DX
MOVQ (BX), CX
MOVQ 8(BX), BX
ADDQ BX, CX
MOVQ CX, (SP)
MOVQ ctx+16(FP), CX
@ -2818,9 +2818,9 @@ handle_loop:
loop_finished:
MOVQ br+8(FP), CX
MOVQ AX, 32(CX)
MOVB DL, 40(CX)
MOVQ BX, 24(CX)
MOVQ AX, 24(CX)
MOVB DL, 32(CX)
MOVQ BX, 8(CX)
// Update the context
MOVQ ctx+16(FP), AX
@ -2885,11 +2885,11 @@ error_not_enough_space:
// func sequenceDecs_decodeSync_safe_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
// Requires: CMOV, SSE
TEXT ·sequenceDecs_decodeSync_safe_amd64(SB), $64-32
MOVQ br+8(FP), AX
MOVQ 32(AX), DX
MOVBQZX 40(AX), BX
MOVQ 24(AX), SI
MOVQ (AX), AX
MOVQ br+8(FP), CX
MOVQ 24(CX), DX
MOVBQZX 32(CX), BX
MOVQ (CX), AX
MOVQ 8(CX), SI
ADDQ SI, AX
MOVQ AX, (SP)
MOVQ ctx+16(FP), AX
@ -3485,9 +3485,9 @@ handle_loop:
loop_finished:
MOVQ br+8(FP), AX
MOVQ DX, 32(AX)
MOVB BL, 40(AX)
MOVQ SI, 24(AX)
MOVQ DX, 24(AX)
MOVB BL, 32(AX)
MOVQ SI, 8(AX)
// Update the context
MOVQ ctx+16(FP), AX
@ -3552,11 +3552,11 @@ error_not_enough_space:
// func sequenceDecs_decodeSync_safe_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
// Requires: BMI, BMI2, CMOV, SSE
TEXT ·sequenceDecs_decodeSync_safe_bmi2(SB), $64-32
MOVQ br+8(FP), CX
MOVQ 32(CX), AX
MOVBQZX 40(CX), DX
MOVQ 24(CX), BX
MOVQ (CX), CX
MOVQ br+8(FP), BX
MOVQ 24(BX), AX
MOVBQZX 32(BX), DX
MOVQ (BX), CX
MOVQ 8(BX), BX
ADDQ BX, CX
MOVQ CX, (SP)
MOVQ ctx+16(FP), CX
@ -4110,9 +4110,9 @@ handle_loop:
loop_finished:
MOVQ br+8(FP), CX
MOVQ AX, 32(CX)
MOVB DL, 40(CX)
MOVQ BX, 24(CX)
MOVQ AX, 24(CX)
MOVB DL, 32(CX)
MOVQ BX, 8(CX)
// Update the context
MOVQ ctx+16(FP), AX

@ -29,7 +29,7 @@ func (s *sequenceDecs) decode(seqs []seqVals) error {
}
for i := range seqs {
var ll, mo, ml int
if br.off > 4+((maxOffsetBits+16+16)>>3) {
if len(br.in) > 4+((maxOffsetBits+16+16)>>3) {
// inlined function:
// ll, mo, ml = s.nextFast(br, llState, mlState, ofState)

@ -95,10 +95,9 @@ func (r *SnappyConverter) Convert(in io.Reader, w io.Writer) (int64, error) {
var written int64
var readHeader bool
{
var header []byte
var n int
header, r.err = frameHeader{WindowSize: snappyMaxBlockSize}.appendTo(r.buf[:0])
header := frameHeader{WindowSize: snappyMaxBlockSize}.appendTo(r.buf[:0])
var n int
n, r.err = w.Write(header)
if r.err != nil {
return written, r.err

12
vendor/modules.txt vendored

@ -853,10 +853,11 @@ github.com/gorilla/websocket
# github.com/grafana/cloudflare-go v0.0.0-20230110200409-c627cf6792f2
## explicit; go 1.17
github.com/grafana/cloudflare-go
# github.com/grafana/dskit v0.0.0-20231120170505-765e343eda4f
# github.com/grafana/dskit v0.0.0-20240104111617-ea101a3b86eb
## explicit; go 1.20
github.com/grafana/dskit/aws
github.com/grafana/dskit/backoff
github.com/grafana/dskit/cancellation
github.com/grafana/dskit/concurrency
github.com/grafana/dskit/crypto/tls
github.com/grafana/dskit/dns
@ -906,6 +907,11 @@ github.com/grafana/gomemcache/memcache
# github.com/grafana/loki/pkg/push v0.0.0-20231124142027-e52380921608 => ./pkg/push
## explicit; go 1.19
github.com/grafana/loki/pkg/push
# github.com/grafana/pyroscope-go/godeltaprof v0.1.6
## explicit; go 1.16
github.com/grafana/pyroscope-go/godeltaprof
github.com/grafana/pyroscope-go/godeltaprof/http/pprof
github.com/grafana/pyroscope-go/godeltaprof/internal/pprof
# github.com/grafana/regexp v0.0.0-20221122212121-6b5c0a4cb7fd => github.com/grafana/regexp v0.0.0-20221122212121-6b5c0a4cb7fd
## explicit; go 1.17
github.com/grafana/regexp
@ -1057,8 +1063,8 @@ github.com/json-iterator/go
# github.com/julienschmidt/httprouter v1.3.0
## explicit; go 1.7
github.com/julienschmidt/httprouter
# github.com/klauspost/compress v1.16.7
## explicit; go 1.18
# github.com/klauspost/compress v1.17.3
## explicit; go 1.19
github.com/klauspost/compress
github.com/klauspost/compress/flate
github.com/klauspost/compress/fse

Loading…
Cancel
Save