mirror of https://github.com/grafana/loki
Remove call to set default resolver (#11580)
**What this PR does / why we need it**: **Which issue(s) this PR fixes**: Fixes #<issue number> **Special notes for your reviewer**: **Checklist** - [ ] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [ ] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](pull/11600/headd10549e3ec
) - [ ] If the change is deprecating or removing a configuration option, update the `deprecated-config.yaml` and `deleted-config.yaml` files respectively in the `tools/deprecated-config-checker` directory. [Example PR](0d4416a4b0
)
parent
599eed7c52
commit
6c4699d8f7
@ -0,0 +1,37 @@ |
||||
package cancellation |
||||
|
||||
import ( |
||||
"context" |
||||
"fmt" |
||||
|
||||
"google.golang.org/grpc/codes" |
||||
"google.golang.org/grpc/status" |
||||
) |
||||
|
||||
type cancellationError struct { |
||||
inner error |
||||
} |
||||
|
||||
func NewError(err error) error { |
||||
return cancellationError{err} |
||||
} |
||||
|
||||
func NewErrorf(format string, args ...any) error { |
||||
return NewError(fmt.Errorf(format, args...)) |
||||
} |
||||
|
||||
func (e cancellationError) Error() string { |
||||
return "context canceled: " + e.inner.Error() |
||||
} |
||||
|
||||
func (e cancellationError) Is(err error) bool { |
||||
return err == context.Canceled |
||||
} |
||||
|
||||
func (e cancellationError) Unwrap() error { |
||||
return e.inner |
||||
} |
||||
|
||||
func (e cancellationError) GRPCStatus() *status.Status { |
||||
return status.New(codes.Canceled, e.Error()) |
||||
} |
@ -0,0 +1,153 @@ |
||||
package ring |
||||
|
||||
import ( |
||||
"math" |
||||
|
||||
"github.com/pkg/errors" |
||||
"golang.org/x/exp/slices" // using exp/slices until moving to go 1.21.
|
||||
) |
||||
|
||||
// TokenRanges describes token ranges owned by an instance.
|
||||
// It consists of [start, end] pairs, where both start and end are inclusive.
|
||||
// For example TokenRanges with values [5, 10, 20, 30] covers tokens [5..10] and [20..30].
|
||||
type TokenRanges []uint32 |
||||
|
||||
func (tr TokenRanges) IncludesKey(key uint32) bool { |
||||
switch { |
||||
case len(tr) == 0: |
||||
return false |
||||
case key < tr[0]: |
||||
// key comes before the first range
|
||||
return false |
||||
case key > tr[len(tr)-1]: |
||||
// key comes after the last range
|
||||
return false |
||||
} |
||||
|
||||
index, found := slices.BinarySearch(tr, key) |
||||
switch { |
||||
case found: |
||||
// ranges are closed
|
||||
return true |
||||
case index%2 == 1: |
||||
// hash would be inserted after the start of a range (even index)
|
||||
return true |
||||
default: |
||||
return false |
||||
} |
||||
} |
||||
|
||||
func (tr TokenRanges) Equal(other TokenRanges) bool { |
||||
if len(tr) != len(other) { |
||||
return false |
||||
} |
||||
|
||||
for i := 0; i < len(tr); i++ { |
||||
if tr[i] != other[i] { |
||||
return false |
||||
} |
||||
} |
||||
|
||||
return true |
||||
} |
||||
|
||||
// GetTokenRangesForInstance returns the token ranges owned by an instance in the ring.
|
||||
//
|
||||
// Current implementation only works with multizone setup, where number of zones is equal to replication factor.
|
||||
func (r *Ring) GetTokenRangesForInstance(instanceID string) (TokenRanges, error) { |
||||
r.mtx.RLock() |
||||
defer r.mtx.RUnlock() |
||||
|
||||
instance, ok := r.ringDesc.Ingesters[instanceID] |
||||
if !ok { |
||||
return nil, ErrInstanceNotFound |
||||
} |
||||
if instance.Zone == "" { |
||||
return nil, errors.New("zone not set") |
||||
} |
||||
|
||||
rf := r.cfg.ReplicationFactor |
||||
numZones := len(r.ringTokensByZone) |
||||
|
||||
// To simplify computation of token ranges, we currently only support case where zone-awareness is enabled,
|
||||
// and replicaction factor is equal to number of zones.
|
||||
if !r.cfg.ZoneAwarenessEnabled || rf != numZones { |
||||
// if zoneAwareness is disabled we need to treat the whole ring as one big zone, and we would
|
||||
// need to walk the ring backwards looking for RF-1 tokens from other instances to determine the range.
|
||||
return nil, errors.New("can't use ring configuration for computing token ranges") |
||||
} |
||||
|
||||
// at this point zone-aware replication is enabled, and rf == numZones
|
||||
// this means that we will write to one replica in each zone, so we can just consider the zonal ring for our instance
|
||||
subringTokens, ok := r.ringTokensByZone[instance.Zone] |
||||
if !ok || len(subringTokens) == 0 { |
||||
return nil, errors.New("no tokens for zone") |
||||
} |
||||
|
||||
// 1 range (2 values) per token + one additional if we need to split the rollover range.
|
||||
ranges := make(TokenRanges, 0, 2*(len(instance.Tokens)+1)) |
||||
// non-zero value means we're now looking for start of the range. Zero value means we're looking for next end of range (ie. token owned by this instance).
|
||||
rangeEnd := uint32(0) |
||||
|
||||
// if this instance claimed the first token, it owns the wrap-around range, which we'll break into two separate ranges
|
||||
firstToken := subringTokens[0] |
||||
firstTokenInfo, ok := r.ringInstanceByToken[firstToken] |
||||
if !ok { |
||||
// This should never happen unless there's a bug in the ring code.
|
||||
return nil, ErrInconsistentTokensInfo |
||||
} |
||||
|
||||
if firstTokenInfo.InstanceID == instanceID { |
||||
// we'll start by looking for the beginning of the range that ends with math.MaxUint32
|
||||
rangeEnd = math.MaxUint32 |
||||
} |
||||
|
||||
// walk the ring backwards, alternating looking for ends and starts of ranges
|
||||
for i := len(subringTokens) - 1; i > 0; i-- { |
||||
token := subringTokens[i] |
||||
info, ok := r.ringInstanceByToken[token] |
||||
if !ok { |
||||
// This should never happen unless a bug in the ring code.
|
||||
return nil, ErrInconsistentTokensInfo |
||||
} |
||||
|
||||
if rangeEnd == 0 { |
||||
// we're looking for the end of the next range
|
||||
if info.InstanceID == instanceID { |
||||
rangeEnd = token - 1 |
||||
} |
||||
} else { |
||||
// we have a range end, and are looking for the start of the range
|
||||
if info.InstanceID != instanceID { |
||||
ranges = append(ranges, rangeEnd, token) |
||||
rangeEnd = 0 |
||||
} |
||||
} |
||||
} |
||||
|
||||
// finally look at the first token again
|
||||
// - if we have a range end, check if we claimed token 0
|
||||
// - if we don't, we have our start
|
||||
// - if we do, the start is 0
|
||||
// - if we don't have a range end, check if we claimed token 0
|
||||
// - if we don't, do nothing
|
||||
// - if we do, add the range of [0, token-1]
|
||||
// - BUT, if the token itself is 0, do nothing, because we don't own the tokens themselves (we should be covered by the already added range that ends with MaxUint32)
|
||||
|
||||
if rangeEnd == 0 { |
||||
if firstTokenInfo.InstanceID == instanceID && firstToken != 0 { |
||||
ranges = append(ranges, firstToken-1, 0) |
||||
} |
||||
} else { |
||||
if firstTokenInfo.InstanceID == instanceID { |
||||
ranges = append(ranges, rangeEnd, 0) |
||||
} else { |
||||
ranges = append(ranges, rangeEnd, firstToken) |
||||
} |
||||
} |
||||
|
||||
// Ensure returned ranges are sorted.
|
||||
slices.Sort(ranges) |
||||
|
||||
return ranges, nil |
||||
} |
@ -0,0 +1 @@ |
||||
.idea/ |
@ -0,0 +1,203 @@ |
||||
|
||||
Apache License |
||||
Version 2.0, January 2004 |
||||
http://www.apache.org/licenses/ |
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION |
||||
|
||||
1. Definitions. |
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction, |
||||
and distribution as defined by Sections 1 through 9 of this document. |
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by |
||||
the copyright owner that is granting the License. |
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all |
||||
other entities that control, are controlled by, or are under common |
||||
control with that entity. For the purposes of this definition, |
||||
"control" means (i) the power, direct or indirect, to cause the |
||||
direction or management of such entity, whether by contract or |
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the |
||||
outstanding shares, or (iii) beneficial ownership of such entity. |
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity |
||||
exercising permissions granted by this License. |
||||
|
||||
"Source" form shall mean the preferred form for making modifications, |
||||
including but not limited to software source code, documentation |
||||
source, and configuration files. |
||||
|
||||
"Object" form shall mean any form resulting from mechanical |
||||
transformation or translation of a Source form, including but |
||||
not limited to compiled object code, generated documentation, |
||||
and conversions to other media types. |
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or |
||||
Object form, made available under the License, as indicated by a |
||||
copyright notice that is included in or attached to the work |
||||
(an example is provided in the Appendix below). |
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object |
||||
form, that is based on (or derived from) the Work and for which the |
||||
editorial revisions, annotations, elaborations, or other modifications |
||||
represent, as a whole, an original work of authorship. For the purposes |
||||
of this License, Derivative Works shall not include works that remain |
||||
separable from, or merely link (or bind by name) to the interfaces of, |
||||
the Work and Derivative Works thereof. |
||||
|
||||
"Contribution" shall mean any work of authorship, including |
||||
the original version of the Work and any modifications or additions |
||||
to that Work or Derivative Works thereof, that is intentionally |
||||
submitted to Licensor for inclusion in the Work by the copyright owner |
||||
or by an individual or Legal Entity authorized to submit on behalf of |
||||
the copyright owner. For the purposes of this definition, "submitted" |
||||
means any form of electronic, verbal, or written communication sent |
||||
to the Licensor or its representatives, including but not limited to |
||||
communication on electronic mailing lists, source code control systems, |
||||
and issue tracking systems that are managed by, or on behalf of, the |
||||
Licensor for the purpose of discussing and improving the Work, but |
||||
excluding communication that is conspicuously marked or otherwise |
||||
designated in writing by the copyright owner as "Not a Contribution." |
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity |
||||
on behalf of whom a Contribution has been received by Licensor and |
||||
subsequently incorporated within the Work. |
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of |
||||
this License, each Contributor hereby grants to You a perpetual, |
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable |
||||
copyright license to reproduce, prepare Derivative Works of, |
||||
publicly display, publicly perform, sublicense, and distribute the |
||||
Work and such Derivative Works in Source or Object form. |
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of |
||||
this License, each Contributor hereby grants to You a perpetual, |
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable |
||||
(except as stated in this section) patent license to make, have made, |
||||
use, offer to sell, sell, import, and otherwise transfer the Work, |
||||
where such license applies only to those patent claims licensable |
||||
by such Contributor that are necessarily infringed by their |
||||
Contribution(s) alone or by combination of their Contribution(s) |
||||
with the Work to which such Contribution(s) was submitted. If You |
||||
institute patent litigation against any entity (including a |
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work |
||||
or a Contribution incorporated within the Work constitutes direct |
||||
or contributory patent infringement, then any patent licenses |
||||
granted to You under this License for that Work shall terminate |
||||
as of the date such litigation is filed. |
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the |
||||
Work or Derivative Works thereof in any medium, with or without |
||||
modifications, and in Source or Object form, provided that You |
||||
meet the following conditions: |
||||
|
||||
(a) You must give any other recipients of the Work or |
||||
Derivative Works a copy of this License; and |
||||
|
||||
(b) You must cause any modified files to carry prominent notices |
||||
stating that You changed the files; and |
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works |
||||
that You distribute, all copyright, patent, trademark, and |
||||
attribution notices from the Source form of the Work, |
||||
excluding those notices that do not pertain to any part of |
||||
the Derivative Works; and |
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its |
||||
distribution, then any Derivative Works that You distribute must |
||||
include a readable copy of the attribution notices contained |
||||
within such NOTICE file, excluding those notices that do not |
||||
pertain to any part of the Derivative Works, in at least one |
||||
of the following places: within a NOTICE text file distributed |
||||
as part of the Derivative Works; within the Source form or |
||||
documentation, if provided along with the Derivative Works; or, |
||||
within a display generated by the Derivative Works, if and |
||||
wherever such third-party notices normally appear. The contents |
||||
of the NOTICE file are for informational purposes only and |
||||
do not modify the License. You may add Your own attribution |
||||
notices within Derivative Works that You distribute, alongside |
||||
or as an addendum to the NOTICE text from the Work, provided |
||||
that such additional attribution notices cannot be construed |
||||
as modifying the License. |
||||
|
||||
You may add Your own copyright statement to Your modifications and |
||||
may provide additional or different license terms and conditions |
||||
for use, reproduction, or distribution of Your modifications, or |
||||
for any such Derivative Works as a whole, provided Your use, |
||||
reproduction, and distribution of the Work otherwise complies with |
||||
the conditions stated in this License. |
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise, |
||||
any Contribution intentionally submitted for inclusion in the Work |
||||
by You to the Licensor shall be under the terms and conditions of |
||||
this License, without any additional terms or conditions. |
||||
Notwithstanding the above, nothing herein shall supersede or modify |
||||
the terms of any separate license agreement you may have executed |
||||
with Licensor regarding such Contributions. |
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade |
||||
names, trademarks, service marks, or product names of the Licensor, |
||||
except as required for reasonable and customary use in describing the |
||||
origin of the Work and reproducing the content of the NOTICE file. |
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or |
||||
agreed to in writing, Licensor provides the Work (and each |
||||
Contributor provides its Contributions) on an "AS IS" BASIS, |
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or |
||||
implied, including, without limitation, any warranties or conditions |
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A |
||||
PARTICULAR PURPOSE. You are solely responsible for determining the |
||||
appropriateness of using or redistributing the Work and assume any |
||||
risks associated with Your exercise of permissions under this License. |
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory, |
||||
whether in tort (including negligence), contract, or otherwise, |
||||
unless required by applicable law (such as deliberate and grossly |
||||
negligent acts) or agreed to in writing, shall any Contributor be |
||||
liable to You for damages, including any direct, indirect, special, |
||||
incidental, or consequential damages of any character arising as a |
||||
result of this License or out of the use or inability to use the |
||||
Work (including but not limited to damages for loss of goodwill, |
||||
work stoppage, computer failure or malfunction, or any and all |
||||
other commercial damages or losses), even if such Contributor |
||||
has been advised of the possibility of such damages. |
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing |
||||
the Work or Derivative Works thereof, You may choose to offer, |
||||
and charge a fee for, acceptance of support, warranty, indemnity, |
||||
or other liability obligations and/or rights consistent with this |
||||
License. However, in accepting such obligations, You may act only |
||||
on Your own behalf and on Your sole responsibility, not on behalf |
||||
of any other Contributor, and only if You agree to indemnify, |
||||
defend, and hold each Contributor harmless for any liability |
||||
incurred by, or claims asserted against, such Contributor by reason |
||||
of your accepting any such warranty or additional liability. |
||||
|
||||
END OF TERMS AND CONDITIONS |
||||
|
||||
APPENDIX: How to apply the Apache License to your work. |
||||
|
||||
To apply the Apache License to your work, attach the following |
||||
boilerplate notice, with the fields enclosed by brackets "[]" |
||||
replaced with your own identifying information. (Don't include |
||||
the brackets!) The text should be enclosed in the appropriate |
||||
comment syntax for the file format. We also recommend that a |
||||
file or class name and description of purpose be included on the |
||||
same "printed page" as the copyright notice for easier |
||||
identification within third-party archives. |
||||
|
||||
Copyright 2020 Pyroscope |
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); |
||||
you may not use this file except in compliance with the License. |
||||
You may obtain a copy of the License at |
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
||||
Unless required by applicable law or agreed to in writing, software |
||||
distributed under the License is distributed on an "AS IS" BASIS, |
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
See the License for the specific language governing permissions and |
||||
limitations under the License. |
||||
|
@ -0,0 +1,98 @@ |
||||
# godeltaprof |
||||
|
||||
godeltaprof is an efficient delta profiler for memory, mutex, and block. |
||||
|
||||
# Why |
||||
|
||||
In Golang, allocation, mutex and block profiles are cumulative. They only grow over time and show allocations that happened since the beginning of the running program. |
||||
Not only values grow, but the size of the profile itself grows as well. It could grow up to megabytes in size for long-running processes. These megabytes profiles are called huge profiles in this document. |
||||
|
||||
In many cases, it's more useful to see the differences between two points in time. |
||||
You can use the original runtime/pprof package, called a delta profile, to see these differences. |
||||
Using the delta profile requires passing seconds argument to the pprof endpoint query. |
||||
|
||||
``` |
||||
go tool pprof http://localhost:6060/debug/pprof/heap?seconds=30 |
||||
``` |
||||
|
||||
What this does: |
||||
1. Dump profile `p0` |
||||
2. Sleep |
||||
3. Dump profile `p1` |
||||
4. Decompress and parse protobuf `p0` |
||||
5. Decompress and parse protobuf `p1` |
||||
6. Subtract `p0` from `p1` |
||||
7. Serialize protobuf and compress the result |
||||
|
||||
The resulting profile is *usually* much smaller (`p0` may be megabytes, while result is usually tens of kilobytes). |
||||
|
||||
There are number of issues with this approach: |
||||
|
||||
1. Heap profile contains both allocation values and in-use values. In-use values are not cumulative. In-use values are corrupted by the subtraction. |
||||
**Note:** It can be fixed if runtime/pprof package uses `p0.ScaleN([]float64{-1,-1,0,0})`, instead of `p0.Scale(-1)` - that would subtract allocation values and zero out in-use values in `p0`. |
||||
2. It requires dumping two profiles. |
||||
3. It produces a lot of allocations putting pressure on GC. |
||||
|
||||
|
||||
## DataDog's fastdelta |
||||
|
||||
DataDog's [fastdelta profiler](https://github.com/DataDog/dd-trace-go/blob/30e1406c2cb62af749df03d559853e1d1de0e3bf/profiler/internal/fastdelta/fd.go#L75) uses another approach. |
||||
|
||||
It improves the runtime/pprof approach by keeping a copy of the previous profile and subtracting the current profile from it. |
||||
The fastdelta profiler uses a custom protobuf pprof parser that doesn't allocate as much memory. |
||||
This approach is more efficient, faster, and produces less garbage. It also doesn't require using two profiles. |
||||
However, the fastdelta profiler still parses huge profiles up to megabytes, just to discard most of it. |
||||
|
||||
## godeltaprof |
||||
|
||||
godeltaprof does a similar job but slightly differently. |
||||
|
||||
Delta computation happens before serializing any pprof files using `runtime.MemprofileRecord` and `BlockProfileRecord`. |
||||
This way, huge profiles don't need to be parsed. The delta is computed on raw records, all zeros are rejected, and results are serialized and compressed. |
||||
|
||||
The source code for godeltaprof is based (forked) on the original [runtime/pprof package](https://github.com/golang/go/tree/master/src/runtime/pprof). |
||||
godeltaprof is modified to include delta computation before serialization and to expose the new endpoints. |
||||
There are other small improvements and benefits: |
||||
- Using `github.com/klauspost/compress/gzip` instead of `compress/gzip` |
||||
- Optional lazy mappings reading (they don't change over time for most applications) |
||||
- Separate package from runtime, so updated independently |
||||
|
||||
# benchmarks |
||||
|
||||
These benchmarks used memory profiles from the [pyroscope](https://github.com/grafana/pyroscope) server. |
||||
|
||||
BenchmarkOG - dumps memory profile with runtime/pprof package |
||||
BenchmarkFastDelta - dumps memory profile with runtime/pprof package and computes delta using fastdelta |
||||
BenchmarkGodeltaprof - does not dump profile with runtime/pprof, computes delta, outputs it results |
||||
|
||||
Each benchmark also outputs produced profile sizes. |
||||
``` |
||||
BenchmarkOG |
||||
63 181862189 ns/op |
||||
profile sizes: [209117 209107 209077 209089 209095 209076 209088 209082 209090 209092] |
||||
|
||||
BenchmarkFastDelta |
||||
43 273936764 ns/op |
||||
profile sizes: [169300 10815 8969 9511 9752 9376 9545 8959 10357 9536] |
||||
|
||||
BenchmarkGodeltaprof |
||||
366 31148264 ns/op |
||||
profile sizes: [208898 11485 9347 9967 10291 9848 10085 9285 11033 9986] |
||||
``` |
||||
|
||||
Notice how BenchmarkOG profiles sizes are ~200k and BenchmarkGodeltaprof and BenchmarkFastDelta are ~10k - that is because a lof of samples |
||||
with zero values are discarded after delta computation. |
||||
|
||||
Source code of benchmarks could be found [here](https://github.com/grafana/pyroscope/compare/godeltaprofbench?expand=1) |
||||
|
||||
CPU profiles: [BenchmarkOG](https://flamegraph.com/share/a8f68312-98c7-11ee-a502-466f68d203a5), [BenchmarkFastDelta](https://flamegraph.com/share/c23821f3-98c7-11ee-a502-466f68d203a5), [BenchmarkGodeltaprof]( https://flamegraph.com/share/ea66df36-98c7-11ee-9a0d-f2c25703e557) |
||||
|
||||
|
||||
|
||||
# upstreaming |
||||
|
||||
TODO(korniltsev): create golang issue and ask if godeltaprof is something that could be considered merging to upstream golang repo |
||||
in some way(maybe not as is, maybe with different APIs) |
||||
|
||||
|
||||
|
@ -0,0 +1,119 @@ |
||||
package godeltaprof |
||||
|
||||
import ( |
||||
"io" |
||||
"runtime" |
||||
"sort" |
||||
"sync" |
||||
|
||||
"github.com/grafana/pyroscope-go/godeltaprof/internal/pprof" |
||||
) |
||||
|
||||
// BlockProfiler is a stateful profiler for goroutine blocking events and mutex contention in Go programs.
|
||||
// Depending on the function used to create the BlockProfiler, it uses either runtime.BlockProfile or runtime.MutexProfile.
|
||||
// The BlockProfiler provides similar functionality to pprof.Lookup("block").WriteTo and pprof.Lookup("mutex").WriteTo,
|
||||
// but with some key differences.
|
||||
//
|
||||
// The BlockProfiler tracks the delta of blocking events or mutex contention since the last
|
||||
// profile was written, effectively providing a snapshot of the changes
|
||||
// between two points in time. This is in contrast to the
|
||||
// pprof.Lookup functions, which accumulate profiling data
|
||||
// and result in profiles that represent the entire lifetime of the program.
|
||||
//
|
||||
// The BlockProfiler is safe for concurrent use, as it serializes access to
|
||||
// its internal state using a sync.Mutex. This ensures that multiple goroutines
|
||||
// can call the Profile method without causing any data race issues.
|
||||
type BlockProfiler struct { |
||||
impl pprof.DeltaMutexProfiler |
||||
mutex sync.Mutex |
||||
runtimeProfile func([]runtime.BlockProfileRecord) (int, bool) |
||||
scaleProfile pprof.MutexProfileScaler |
||||
} |
||||
|
||||
// NewMutexProfiler creates a new BlockProfiler instance for profiling mutex contention.
|
||||
// The resulting BlockProfiler uses runtime.MutexProfile as its data source.
|
||||
//
|
||||
// Usage:
|
||||
//
|
||||
// mp := godeltaprof.NewMutexProfiler()
|
||||
// ...
|
||||
// err := mp.Profile(someWriter)
|
||||
func NewMutexProfiler() *BlockProfiler { |
||||
return &BlockProfiler{ |
||||
runtimeProfile: runtime.MutexProfile, |
||||
scaleProfile: pprof.ScalerMutexProfile, |
||||
impl: pprof.DeltaMutexProfiler{ |
||||
Options: pprof.ProfileBuilderOptions{ |
||||
GenericsFrames: true, |
||||
LazyMapping: true, |
||||
}, |
||||
}, |
||||
} |
||||
} |
||||
|
||||
func NewMutexProfilerWithOptions(options ProfileOptions) *BlockProfiler { |
||||
return &BlockProfiler{ |
||||
runtimeProfile: runtime.MutexProfile, |
||||
scaleProfile: pprof.ScalerMutexProfile, |
||||
impl: pprof.DeltaMutexProfiler{ |
||||
Options: pprof.ProfileBuilderOptions{ |
||||
GenericsFrames: options.GenericsFrames, |
||||
LazyMapping: options.LazyMappings, |
||||
}, |
||||
}, |
||||
} |
||||
} |
||||
|
||||
// NewBlockProfiler creates a new BlockProfiler instance for profiling goroutine blocking events.
|
||||
// The resulting BlockProfiler uses runtime.BlockProfile as its data source.
|
||||
//
|
||||
// Usage:
|
||||
//
|
||||
// bp := godeltaprof.NewBlockProfiler()
|
||||
// ...
|
||||
// err := bp.Profile(someWriter)
|
||||
func NewBlockProfiler() *BlockProfiler { |
||||
return &BlockProfiler{ |
||||
runtimeProfile: runtime.BlockProfile, |
||||
scaleProfile: pprof.ScalerBlockProfile, |
||||
impl: pprof.DeltaMutexProfiler{ |
||||
Options: pprof.ProfileBuilderOptions{ |
||||
GenericsFrames: true, |
||||
LazyMapping: true, |
||||
}, |
||||
}, |
||||
} |
||||
} |
||||
|
||||
func NewBlockProfilerWithOptions(options ProfileOptions) *BlockProfiler { |
||||
return &BlockProfiler{ |
||||
runtimeProfile: runtime.BlockProfile, |
||||
scaleProfile: pprof.ScalerBlockProfile, |
||||
impl: pprof.DeltaMutexProfiler{ |
||||
Options: pprof.ProfileBuilderOptions{ |
||||
GenericsFrames: options.GenericsFrames, |
||||
LazyMapping: options.LazyMappings, |
||||
}, |
||||
}, |
||||
} |
||||
} |
||||
|
||||
func (d *BlockProfiler) Profile(w io.Writer) error { |
||||
d.mutex.Lock() |
||||
defer d.mutex.Unlock() |
||||
|
||||
var p []runtime.BlockProfileRecord |
||||
n, ok := d.runtimeProfile(nil) |
||||
for { |
||||
p = make([]runtime.BlockProfileRecord, n+50) |
||||
n, ok = d.runtimeProfile(p) |
||||
if ok { |
||||
p = p[:n] |
||||
break |
||||
} |
||||
} |
||||
|
||||
sort.Slice(p, func(i, j int) bool { return p[i].Cycles > p[j].Cycles }) |
||||
|
||||
return d.impl.PrintCountCycleProfile(w, "contentions", "delay", d.scaleProfile, p) |
||||
} |
@ -0,0 +1,81 @@ |
||||
package godeltaprof |
||||
|
||||
import ( |
||||
"io" |
||||
"runtime" |
||||
"sync" |
||||
|
||||
"github.com/grafana/pyroscope-go/godeltaprof/internal/pprof" |
||||
) |
||||
|
||||
// HeapProfiler is a stateful profiler for heap allocations in Go programs.
|
||||
// It is based on runtime.MemProfile and provides similar functionality to
|
||||
// pprof.WriteHeapProfile, but with some key differences.
|
||||
//
|
||||
// The HeapProfiler tracks the delta of heap allocations since the last
|
||||
// profile was written, effectively providing a snapshot of the changes
|
||||
// in heap usage between two points in time. This is in contrast to the
|
||||
// pprof.WriteHeapProfile function, which accumulates profiling data
|
||||
// and results in profiles that represent the entire lifetime of the program.
|
||||
//
|
||||
// The HeapProfiler is safe for concurrent use, as it serializes access to
|
||||
// its internal state using a sync.Mutex. This ensures that multiple goroutines
|
||||
// can call the Profile method without causing any data race issues.
|
||||
//
|
||||
// Usage:
|
||||
//
|
||||
// hp := godeltaprof.NewHeapProfiler()
|
||||
// ...
|
||||
// err := hp.Profile(someWriter)
|
||||
type HeapProfiler struct { |
||||
impl pprof.DeltaHeapProfiler |
||||
mutex sync.Mutex |
||||
} |
||||
|
||||
func NewHeapProfiler() *HeapProfiler { |
||||
return &HeapProfiler{ |
||||
impl: pprof.DeltaHeapProfiler{ |
||||
Options: pprof.ProfileBuilderOptions{ |
||||
GenericsFrames: true, |
||||
LazyMapping: true, |
||||
}, |
||||
}} |
||||
} |
||||
|
||||
func NewHeapProfilerWithOptions(options ProfileOptions) *HeapProfiler { |
||||
return &HeapProfiler{ |
||||
impl: pprof.DeltaHeapProfiler{ |
||||
Options: pprof.ProfileBuilderOptions{ |
||||
GenericsFrames: options.GenericsFrames, |
||||
LazyMapping: options.LazyMappings, |
||||
}, |
||||
}} |
||||
} |
||||
|
||||
func (d *HeapProfiler) Profile(w io.Writer) error { |
||||
d.mutex.Lock() |
||||
defer d.mutex.Unlock() |
||||
|
||||
// Find out how many records there are (MemProfile(nil, true)),
|
||||
// allocate that many records, and get the data.
|
||||
// There's a race—more records might be added between
|
||||
// the two calls—so allocate a few extra records for safety
|
||||
// and also try again if we're very unlucky.
|
||||
// The loop should only execute one iteration in the common case.
|
||||
var p []runtime.MemProfileRecord |
||||
n, ok := runtime.MemProfile(nil, true) |
||||
for { |
||||
// Allocate room for a slightly bigger profile,
|
||||
// in case a few more entries have been added
|
||||
// since the call to MemProfile.
|
||||
p = make([]runtime.MemProfileRecord, n+50) |
||||
n, ok = runtime.MemProfile(p, true) |
||||
if ok { |
||||
p = p[0:n] |
||||
break |
||||
} |
||||
// Profile grew; try again.
|
||||
} |
||||
|
||||
return d.impl.WriteHeapProto(w, p, int64(runtime.MemProfileRate), "") |
||||
} |
@ -0,0 +1,50 @@ |
||||
package pprof |
||||
|
||||
import ( |
||||
"fmt" |
||||
"io" |
||||
"net/http" |
||||
"runtime" |
||||
"strconv" |
||||
|
||||
"github.com/grafana/pyroscope-go/godeltaprof" |
||||
) |
||||
|
||||
var ( |
||||
deltaHeapProfiler = godeltaprof.NewHeapProfiler() |
||||
deltaBlockProfiler = godeltaprof.NewBlockProfiler() |
||||
deltaMutexProfiler = godeltaprof.NewMutexProfiler() |
||||
) |
||||
|
||||
type deltaProfiler interface { |
||||
Profile(w io.Writer) error |
||||
} |
||||
|
||||
func init() { |
||||
http.HandleFunc("/debug/pprof/delta_heap", Heap) |
||||
http.HandleFunc("/debug/pprof/delta_block", Block) |
||||
http.HandleFunc("/debug/pprof/delta_mutex", Mutex) |
||||
} |
||||
|
||||
func Heap(w http.ResponseWriter, r *http.Request) { |
||||
gc, _ := strconv.Atoi(r.FormValue("gc")) |
||||
if gc > 0 { |
||||
runtime.GC() |
||||
} |
||||
writeDeltaProfile(deltaHeapProfiler, "heap", w) |
||||
} |
||||
|
||||
func Block(w http.ResponseWriter, r *http.Request) { |
||||
writeDeltaProfile(deltaBlockProfiler, "block", w) |
||||
} |
||||
|
||||
func Mutex(w http.ResponseWriter, r *http.Request) { |
||||
writeDeltaProfile(deltaMutexProfiler, "mutex", w) |
||||
} |
||||
|
||||
func writeDeltaProfile(p deltaProfiler, name string, w http.ResponseWriter) { |
||||
w.Header().Set("X-Content-Type-Options", "nosniff") |
||||
w.Header().Set("Content-Type", "application/octet-stream") |
||||
w.Header().Set("Content-Disposition", fmt.Sprintf(`attachment; filename="%s.pprof.gz"`, name)) |
||||
_ = p.Profile(w) |
||||
} |
118
vendor/github.com/grafana/pyroscope-go/godeltaprof/internal/pprof/delta_heap.go
generated
vendored
118
vendor/github.com/grafana/pyroscope-go/godeltaprof/internal/pprof/delta_heap.go
generated
vendored
@ -0,0 +1,118 @@ |
||||
package pprof |
||||
|
||||
import ( |
||||
"io" |
||||
"math" |
||||
"runtime" |
||||
"strings" |
||||
) |
||||
|
||||
type DeltaHeapProfiler struct { |
||||
m profMap |
||||
mem []memMap |
||||
Options ProfileBuilderOptions |
||||
} |
||||
|
||||
// WriteHeapProto writes the current heap profile in protobuf format to w.
|
||||
func (d *DeltaHeapProfiler) WriteHeapProto(w io.Writer, p []runtime.MemProfileRecord, rate int64, defaultSampleType string) error { |
||||
if d.mem == nil || !d.Options.LazyMapping { |
||||
d.mem = readMapping() |
||||
} |
||||
b := newProfileBuilder(w, d.Options, d.mem) |
||||
b.pbValueType(tagProfile_PeriodType, "space", "bytes") |
||||
b.pb.int64Opt(tagProfile_Period, rate) |
||||
b.pbValueType(tagProfile_SampleType, "alloc_objects", "count") |
||||
b.pbValueType(tagProfile_SampleType, "alloc_space", "bytes") |
||||
b.pbValueType(tagProfile_SampleType, "inuse_objects", "count") |
||||
b.pbValueType(tagProfile_SampleType, "inuse_space", "bytes") |
||||
if defaultSampleType != "" { |
||||
b.pb.int64Opt(tagProfile_DefaultSampleType, b.stringIndex(defaultSampleType)) |
||||
} |
||||
|
||||
values := []int64{0, 0, 0, 0} |
||||
var locs []uint64 |
||||
for _, r := range p { |
||||
// do the delta
|
||||
if r.AllocBytes == 0 && r.AllocObjects == 0 && r.FreeObjects == 0 && r.FreeBytes == 0 { |
||||
// it is a fresh bucket and it will be published after next 1-2 gc cycles
|
||||
continue |
||||
} |
||||
var blockSize int64 |
||||
if r.AllocObjects > 0 { |
||||
blockSize = r.AllocBytes / r.AllocObjects |
||||
} |
||||
entry := d.m.Lookup(r.Stack(), uintptr(blockSize)) |
||||
|
||||
if (r.AllocObjects - entry.count.v1) < 0 { |
||||
continue |
||||
} |
||||
AllocObjects := r.AllocObjects - entry.count.v1 |
||||
AllocBytes := r.AllocBytes - entry.count.v2 |
||||
entry.count.v1 = r.AllocObjects |
||||
entry.count.v2 = r.AllocBytes |
||||
|
||||
values[0], values[1] = scaleHeapSample(AllocObjects, AllocBytes, rate) |
||||
values[2], values[3] = scaleHeapSample(r.InUseObjects(), r.InUseBytes(), rate) |
||||
|
||||
if values[0] == 0 && values[1] == 0 && values[2] == 0 && values[3] == 0 { |
||||
continue |
||||
} |
||||
|
||||
hideRuntime := true |
||||
for tries := 0; tries < 2; tries++ { |
||||
stk := r.Stack() |
||||
// For heap profiles, all stack
|
||||
// addresses are return PCs, which is
|
||||
// what appendLocsForStack expects.
|
||||
if hideRuntime { |
||||
for i, addr := range stk { |
||||
if f := runtime.FuncForPC(addr); f != nil && strings.HasPrefix(f.Name(), "runtime.") { |
||||
continue |
||||
} |
||||
// Found non-runtime. Show any runtime uses above it.
|
||||
stk = stk[i:] |
||||
break |
||||
} |
||||
} |
||||
locs = b.appendLocsForStack(locs[:0], stk) |
||||
if len(locs) > 0 { |
||||
break |
||||
} |
||||
hideRuntime = false // try again, and show all frames next time.
|
||||
} |
||||
|
||||
b.pbSample(values, locs, func() { |
||||
if blockSize != 0 { |
||||
b.pbLabel(tagSample_Label, "bytes", "", blockSize) |
||||
} |
||||
}) |
||||
} |
||||
b.build() |
||||
return nil |
||||
} |
||||
|
||||
// scaleHeapSample adjusts the data from a heap Sample to
|
||||
// account for its probability of appearing in the collected
|
||||
// data. heap profiles are a sampling of the memory allocations
|
||||
// requests in a program. We estimate the unsampled value by dividing
|
||||
// each collected sample by its probability of appearing in the
|
||||
// profile. heap profiles rely on a poisson process to determine
|
||||
// which samples to collect, based on the desired average collection
|
||||
// rate R. The probability of a sample of size S to appear in that
|
||||
// profile is 1-exp(-S/R).
|
||||
func scaleHeapSample(count, size, rate int64) (int64, int64) { |
||||
if count == 0 || size == 0 { |
||||
return 0, 0 |
||||
} |
||||
|
||||
if rate <= 1 { |
||||
// if rate==1 all samples were collected so no adjustment is needed.
|
||||
// if rate<1 treat as unknown and skip scaling.
|
||||
return count, size |
||||
} |
||||
|
||||
avgSize := float64(size) / float64(count) |
||||
scale := 1 / (1 - math.Exp(-avgSize/float64(rate))) |
||||
|
||||
return int64(float64(count) * scale), int64(float64(size) * scale) |
||||
} |
59
vendor/github.com/grafana/pyroscope-go/godeltaprof/internal/pprof/delta_mutex.go
generated
vendored
59
vendor/github.com/grafana/pyroscope-go/godeltaprof/internal/pprof/delta_mutex.go
generated
vendored
@ -0,0 +1,59 @@ |
||||
package pprof |
||||
|
||||
import ( |
||||
"io" |
||||
"runtime" |
||||
) |
||||
|
||||
type DeltaMutexProfiler struct { |
||||
m profMap |
||||
mem []memMap |
||||
Options ProfileBuilderOptions |
||||
} |
||||
|
||||
// PrintCountCycleProfile outputs block profile records (for block or mutex profiles)
|
||||
// as the pprof-proto format output. Translations from cycle count to time duration
|
||||
// are done because The proto expects count and time (nanoseconds) instead of count
|
||||
// and the number of cycles for block, contention profiles.
|
||||
// Possible 'scaler' functions are scaleBlockProfile and scaleMutexProfile.
|
||||
func (d *DeltaMutexProfiler) PrintCountCycleProfile(w io.Writer, countName, cycleName string, scaler MutexProfileScaler, records []runtime.BlockProfileRecord) error { |
||||
if d.mem == nil || !d.Options.LazyMapping { |
||||
d.mem = readMapping() |
||||
} |
||||
// Output profile in protobuf form.
|
||||
b := newProfileBuilder(w, d.Options, d.mem) |
||||
b.pbValueType(tagProfile_PeriodType, countName, "count") |
||||
b.pb.int64Opt(tagProfile_Period, 1) |
||||
b.pbValueType(tagProfile_SampleType, countName, "count") |
||||
b.pbValueType(tagProfile_SampleType, cycleName, "nanoseconds") |
||||
|
||||
cpuGHz := float64(runtime_cyclesPerSecond()) / 1e9 |
||||
|
||||
values := []int64{0, 0} |
||||
var locs []uint64 |
||||
for _, r := range records { |
||||
count, nanosec := ScaleMutexProfile(scaler, r.Count, float64(r.Cycles)/cpuGHz) |
||||
inanosec := int64(nanosec) |
||||
|
||||
// do the delta
|
||||
entry := d.m.Lookup(r.Stack(), 0) |
||||
values[0] = count - entry.count.v1 |
||||
values[1] = inanosec - entry.count.v2 |
||||
entry.count.v1 = count |
||||
entry.count.v2 = inanosec |
||||
|
||||
if values[0] < 0 || values[1] < 0 { |
||||
continue |
||||
} |
||||
if values[0] == 0 && values[1] == 0 { |
||||
continue |
||||
} |
||||
|
||||
// For count profiles, all stack addresses are
|
||||
// return PCs, which is what appendLocsForStack expects.
|
||||
locs = b.appendLocsForStack(locs[:0], r.Stack()) |
||||
b.pbSample(values, locs, nil) |
||||
} |
||||
b.build() |
||||
return nil |
||||
} |
@ -0,0 +1,109 @@ |
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package pprof |
||||
|
||||
import ( |
||||
"encoding/binary" |
||||
"errors" |
||||
"fmt" |
||||
"os" |
||||
) |
||||
|
||||
var ( |
||||
errBadELF = errors.New("malformed ELF binary") |
||||
errNoBuildID = errors.New("no NT_GNU_BUILD_ID found in ELF binary") |
||||
) |
||||
|
||||
// elfBuildID returns the GNU build ID of the named ELF binary,
|
||||
// without introducing a dependency on debug/elf and its dependencies.
|
||||
func elfBuildID(file string) (string, error) { |
||||
buf := make([]byte, 256) |
||||
f, err := os.Open(file) |
||||
if err != nil { |
||||
return "", err |
||||
} |
||||
defer f.Close() |
||||
|
||||
if _, err := f.ReadAt(buf[:64], 0); err != nil { |
||||
return "", err |
||||
} |
||||
|
||||
// ELF file begins with \x7F E L F.
|
||||
if buf[0] != 0x7F || buf[1] != 'E' || buf[2] != 'L' || buf[3] != 'F' { |
||||
return "", errBadELF |
||||
} |
||||
|
||||
var byteOrder binary.ByteOrder |
||||
switch buf[5] { |
||||
default: |
||||
return "", errBadELF |
||||
case 1: // little-endian
|
||||
byteOrder = binary.LittleEndian |
||||
case 2: // big-endian
|
||||
byteOrder = binary.BigEndian |
||||
} |
||||
|
||||
var shnum int |
||||
var shoff, shentsize int64 |
||||
switch buf[4] { |
||||
default: |
||||
return "", errBadELF |
||||
case 1: // 32-bit file header
|
||||
shoff = int64(byteOrder.Uint32(buf[32:])) |
||||
shentsize = int64(byteOrder.Uint16(buf[46:])) |
||||
if shentsize != 40 { |
||||
return "", errBadELF |
||||
} |
||||
shnum = int(byteOrder.Uint16(buf[48:])) |
||||
case 2: // 64-bit file header
|
||||
shoff = int64(byteOrder.Uint64(buf[40:])) |
||||
shentsize = int64(byteOrder.Uint16(buf[58:])) |
||||
if shentsize != 64 { |
||||
return "", errBadELF |
||||
} |
||||
shnum = int(byteOrder.Uint16(buf[60:])) |
||||
} |
||||
|
||||
for i := 0; i < shnum; i++ { |
||||
if _, err := f.ReadAt(buf[:shentsize], shoff+int64(i)*shentsize); err != nil { |
||||
return "", err |
||||
} |
||||
if typ := byteOrder.Uint32(buf[4:]); typ != 7 { // SHT_NOTE
|
||||
continue |
||||
} |
||||
var off, size int64 |
||||
if shentsize == 40 { |
||||
// 32-bit section header
|
||||
off = int64(byteOrder.Uint32(buf[16:])) |
||||
size = int64(byteOrder.Uint32(buf[20:])) |
||||
} else { |
||||
// 64-bit section header
|
||||
off = int64(byteOrder.Uint64(buf[24:])) |
||||
size = int64(byteOrder.Uint64(buf[32:])) |
||||
} |
||||
size += off |
||||
for off < size { |
||||
if _, err := f.ReadAt(buf[:16], off); err != nil { // room for header + name GNU\x00
|
||||
return "", err |
||||
} |
||||
nameSize := int(byteOrder.Uint32(buf[0:])) |
||||
descSize := int(byteOrder.Uint32(buf[4:])) |
||||
noteType := int(byteOrder.Uint32(buf[8:])) |
||||
descOff := off + int64(12+(nameSize+3)&^3) |
||||
off = descOff + int64((descSize+3)&^3) |
||||
if nameSize != 4 || noteType != 3 || buf[12] != 'G' || buf[13] != 'N' || buf[14] != 'U' || buf[15] != '\x00' { // want name GNU\x00 type 3 (NT_GNU_BUILD_ID)
|
||||
continue |
||||
} |
||||
if descSize > len(buf) { |
||||
return "", errBadELF |
||||
} |
||||
if _, err := f.ReadAt(buf[:descSize], descOff); err != nil { |
||||
return "", err |
||||
} |
||||
return fmt.Sprintf("%x", buf[:descSize]), nil |
||||
} |
||||
} |
||||
return "", errNoBuildID |
||||
} |
18
vendor/github.com/grafana/pyroscope-go/godeltaprof/internal/pprof/gzip_go16.go
generated
vendored
18
vendor/github.com/grafana/pyroscope-go/godeltaprof/internal/pprof/gzip_go16.go
generated
vendored
@ -0,0 +1,18 @@ |
||||
//go:build go1.16 && !go1.17
|
||||
// +build go1.16,!go1.17
|
||||
|
||||
package pprof |
||||
|
||||
import ( |
||||
"compress/gzip" |
||||
"io" |
||||
) |
||||
|
||||
type gzipWriter struct { |
||||
*gzip.Writer |
||||
} |
||||
|
||||
func newGzipWriter(w io.Writer) gzipWriter { |
||||
zw, _ := gzip.NewWriterLevel(w, gzip.BestSpeed) |
||||
return gzipWriter{zw} |
||||
} |
19
vendor/github.com/grafana/pyroscope-go/godeltaprof/internal/pprof/gzip_go17.go
generated
vendored
19
vendor/github.com/grafana/pyroscope-go/godeltaprof/internal/pprof/gzip_go17.go
generated
vendored
@ -0,0 +1,19 @@ |
||||
//go:build go1.17
|
||||
// +build go1.17
|
||||
|
||||
package pprof |
||||
|
||||
import ( |
||||
"io" |
||||
|
||||
"github.com/klauspost/compress/gzip" |
||||
) |
||||
|
||||
type gzipWriter struct { |
||||
*gzip.Writer |
||||
} |
||||
|
||||
func newGzipWriter(w io.Writer) gzipWriter { |
||||
zw, _ := gzip.NewWriterLevel(w, gzip.BestSpeed) |
||||
return gzipWriter{zw} |
||||
} |
@ -0,0 +1,96 @@ |
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package pprof |
||||
|
||||
import "unsafe" |
||||
|
||||
// A profMap is a map from (stack, tag) to mapEntry.
|
||||
// It grows without bound, but that's assumed to be OK.
|
||||
type profMap struct { |
||||
hash map[uintptr]*profMapEntry |
||||
all *profMapEntry |
||||
last *profMapEntry |
||||
free []profMapEntry |
||||
freeStk []uintptr |
||||
} |
||||
|
||||
type count struct { |
||||
// alloc_objects, alloc_bytes for heap
|
||||
// mutex_count, mutex_duration for mutex
|
||||
v1, v2 int64 |
||||
} |
||||
|
||||
// A profMapEntry is a single entry in the profMap.
|
||||
type profMapEntry struct { |
||||
nextHash *profMapEntry // next in hash list
|
||||
nextAll *profMapEntry // next in list of all entries
|
||||
stk []uintptr |
||||
tag uintptr |
||||
count count |
||||
} |
||||
|
||||
func (m *profMap) Lookup(stk []uintptr, tag uintptr) *profMapEntry { |
||||
// Compute hash of (stk, tag).
|
||||
h := uintptr(0) |
||||
for _, x := range stk { |
||||
h = h<<8 | (h >> (8 * (unsafe.Sizeof(h) - 1))) |
||||
h += uintptr(x) * 41 |
||||
} |
||||
h = h<<8 | (h >> (8 * (unsafe.Sizeof(h) - 1))) |
||||
h += uintptr(tag) * 41 |
||||
|
||||
// Find entry if present.
|
||||
var last *profMapEntry |
||||
Search: |
||||
for e := m.hash[h]; e != nil; last, e = e, e.nextHash { |
||||
if len(e.stk) != len(stk) || e.tag != tag { |
||||
continue |
||||
} |
||||
for j := range stk { |
||||
if e.stk[j] != uintptr(stk[j]) { |
||||
continue Search |
||||
} |
||||
} |
||||
// Move to front.
|
||||
if last != nil { |
||||
last.nextHash = e.nextHash |
||||
e.nextHash = m.hash[h] |
||||
m.hash[h] = e |
||||
} |
||||
return e |
||||
} |
||||
|
||||
// Add new entry.
|
||||
if len(m.free) < 1 { |
||||
m.free = make([]profMapEntry, 128) |
||||
} |
||||
e := &m.free[0] |
||||
m.free = m.free[1:] |
||||
e.nextHash = m.hash[h] |
||||
e.tag = tag |
||||
|
||||
if len(m.freeStk) < len(stk) { |
||||
m.freeStk = make([]uintptr, 1024) |
||||
} |
||||
// Limit cap to prevent append from clobbering freeStk.
|
||||
e.stk = m.freeStk[:len(stk):len(stk)] |
||||
m.freeStk = m.freeStk[len(stk):] |
||||
|
||||
for j := range stk { |
||||
e.stk[j] = uintptr(stk[j]) |
||||
} |
||||
if m.hash == nil { |
||||
m.hash = make(map[uintptr]*profMapEntry) |
||||
} |
||||
m.hash[h] = e |
||||
if m.all == nil { |
||||
m.all = e |
||||
m.last = e |
||||
} else { |
||||
m.last.nextAll = e |
||||
m.last = e |
||||
} |
||||
return e |
||||
} |
27
vendor/github.com/grafana/pyroscope-go/godeltaprof/internal/pprof/mutex_scale_go19.go
generated
vendored
27
vendor/github.com/grafana/pyroscope-go/godeltaprof/internal/pprof/mutex_scale_go19.go
generated
vendored
@ -0,0 +1,27 @@ |
||||
//go:build go1.16 && !go1.20
|
||||
// +build go1.16,!go1.20
|
||||
|
||||
package pprof |
||||
|
||||
import "runtime" |
||||
|
||||
type MutexProfileScaler struct { |
||||
f func(cnt int64, ns float64) (int64, float64) |
||||
} |
||||
|
||||
func ScaleMutexProfile(scaler MutexProfileScaler, cnt int64, ns float64) (int64, float64) { |
||||
return scaler.f(cnt, ns) |
||||
} |
||||
|
||||
var ScalerMutexProfile = MutexProfileScaler{func(cnt int64, ns float64) (int64, float64) { |
||||
period := runtime.SetMutexProfileFraction(-1) |
||||
return cnt * int64(period), ns * float64(period) |
||||
}} |
||||
|
||||
var ScalerBlockProfile = MutexProfileScaler{func(cnt int64, ns float64) (int64, float64) { |
||||
// Do nothing.
|
||||
// The current way of block profile sampling makes it
|
||||
// hard to compute the unsampled number. The legacy block
|
||||
// profile parse doesn't attempt to scale or unsample.
|
||||
return cnt, ns |
||||
}} |
17
vendor/github.com/grafana/pyroscope-go/godeltaprof/internal/pprof/mutex_scale_go20.go
generated
vendored
17
vendor/github.com/grafana/pyroscope-go/godeltaprof/internal/pprof/mutex_scale_go20.go
generated
vendored
@ -0,0 +1,17 @@ |
||||
//go:build go1.20
|
||||
// +build go1.20
|
||||
|
||||
package pprof |
||||
|
||||
type MutexProfileScaler struct { |
||||
} |
||||
|
||||
// ScaleMutexProfile is a no-op for go1.20+.
|
||||
// https://github.com/golang/go/commit/30b1af00ff142a3f1a5e2a0f32cf04a649bd5e65
|
||||
func ScaleMutexProfile(_ MutexProfileScaler, cnt int64, ns float64) (int64, float64) { |
||||
return cnt, ns |
||||
} |
||||
|
||||
var ScalerMutexProfile = MutexProfileScaler{} |
||||
|
||||
var ScalerBlockProfile = MutexProfileScaler{} |
@ -0,0 +1,715 @@ |
||||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package pprof |
||||
|
||||
import ( |
||||
"bytes" |
||||
"io" |
||||
"os" |
||||
"runtime" |
||||
"strconv" |
||||
"strings" |
||||
"time" |
||||
) |
||||
|
||||
// lostProfileEvent is the function to which lost profiling
|
||||
// events are attributed.
|
||||
// (The name shows up in the pprof graphs.)
|
||||
func lostProfileEvent() { lostProfileEvent() } |
||||
|
||||
type ProfileBuilderOptions struct { |
||||
// for go1.21+ if true - use runtime_FrameSymbolName - produces frames with generic types, for example [go.shape.int]
|
||||
// for go1.21+ if false - use runtime.Frame->Function - produces frames with generic types ommited [...]
|
||||
// pre 1.21 - always use runtime.Frame->Function - produces frames with generic types ommited [...]
|
||||
GenericsFrames bool |
||||
LazyMapping bool |
||||
} |
||||
|
||||
// A profileBuilder writes a profile incrementally from a
|
||||
// stream of profile samples delivered by the runtime.
|
||||
type profileBuilder struct { |
||||
start time.Time |
||||
end time.Time |
||||
havePeriod bool |
||||
period int64 |
||||
|
||||
// encoding state
|
||||
w io.Writer |
||||
zw gzipWriter |
||||
pb protobuf |
||||
strings []string |
||||
stringMap map[string]int |
||||
locs map[uintptr]locInfo // list of locInfo starting with the given PC.
|
||||
funcs map[string]int // Package path-qualified function name to Function.ID
|
||||
mem []memMap |
||||
deck pcDeck |
||||
|
||||
opt ProfileBuilderOptions |
||||
} |
||||
|
||||
type memMap struct { |
||||
// initialized as reading mapping
|
||||
start uintptr // Address at which the binary (or DLL) is loaded into memory.
|
||||
end uintptr // The limit of the address range occupied by this mapping.
|
||||
offset uint64 // Offset in the binary that corresponds to the first mapped address.
|
||||
file string // The object this entry is loaded from.
|
||||
buildID string // A string that uniquely identifies a particular program version with high probability.
|
||||
|
||||
funcs symbolizeFlag |
||||
fake bool // map entry was faked; /proc/self/maps wasn't available
|
||||
} |
||||
|
||||
// symbolizeFlag keeps track of symbolization result.
|
||||
//
|
||||
// 0 : no symbol lookup was performed
|
||||
// 1<<0 (lookupTried) : symbol lookup was performed
|
||||
// 1<<1 (lookupFailed): symbol lookup was performed but failed
|
||||
type symbolizeFlag uint8 |
||||
|
||||
const ( |
||||
lookupTried symbolizeFlag = 1 << iota |
||||
lookupFailed symbolizeFlag = 1 << iota |
||||
) |
||||
|
||||
const ( |
||||
// message Profile
|
||||
tagProfile_SampleType = 1 // repeated ValueType
|
||||
tagProfile_Sample = 2 // repeated Sample
|
||||
tagProfile_Mapping = 3 // repeated Mapping
|
||||
tagProfile_Location = 4 // repeated Location
|
||||
tagProfile_Function = 5 // repeated Function
|
||||
tagProfile_StringTable = 6 // repeated string
|
||||
tagProfile_DropFrames = 7 // int64 (string table index)
|
||||
tagProfile_KeepFrames = 8 // int64 (string table index)
|
||||
tagProfile_TimeNanos = 9 // int64
|
||||
tagProfile_DurationNanos = 10 // int64
|
||||
tagProfile_PeriodType = 11 // ValueType (really optional string???)
|
||||
tagProfile_Period = 12 // int64
|
||||
tagProfile_Comment = 13 // repeated int64
|
||||
tagProfile_DefaultSampleType = 14 // int64
|
||||
|
||||
// message ValueType
|
||||
tagValueType_Type = 1 // int64 (string table index)
|
||||
tagValueType_Unit = 2 // int64 (string table index)
|
||||
|
||||
// message Sample
|
||||
tagSample_Location = 1 // repeated uint64
|
||||
tagSample_Value = 2 // repeated int64
|
||||
tagSample_Label = 3 // repeated Label
|
||||
|
||||
// message Label
|
||||
tagLabel_Key = 1 // int64 (string table index)
|
||||
tagLabel_Str = 2 // int64 (string table index)
|
||||
tagLabel_Num = 3 // int64
|
||||
|
||||
// message Mapping
|
||||
tagMapping_ID = 1 // uint64
|
||||
tagMapping_Start = 2 // uint64
|
||||
tagMapping_Limit = 3 // uint64
|
||||
tagMapping_Offset = 4 // uint64
|
||||
tagMapping_Filename = 5 // int64 (string table index)
|
||||
tagMapping_BuildID = 6 // int64 (string table index)
|
||||
tagMapping_HasFunctions = 7 // bool
|
||||
tagMapping_HasFilenames = 8 // bool
|
||||
tagMapping_HasLineNumbers = 9 // bool
|
||||
tagMapping_HasInlineFrames = 10 // bool
|
||||
|
||||
// message Location
|
||||
tagLocation_ID = 1 // uint64
|
||||
tagLocation_MappingID = 2 // uint64
|
||||
tagLocation_Address = 3 // uint64
|
||||
tagLocation_Line = 4 // repeated Line
|
||||
|
||||
// message Line
|
||||
tagLine_FunctionID = 1 // uint64
|
||||
tagLine_Line = 2 // int64
|
||||
|
||||
// message Function
|
||||
tagFunction_ID = 1 // uint64
|
||||
tagFunction_Name = 2 // int64 (string table index)
|
||||
tagFunction_SystemName = 3 // int64 (string table index)
|
||||
tagFunction_Filename = 4 // int64 (string table index)
|
||||
tagFunction_StartLine = 5 // int64
|
||||
) |
||||
|
||||
// stringIndex adds s to the string table if not already present
|
||||
// and returns the index of s in the string table.
|
||||
func (b *profileBuilder) stringIndex(s string) int64 { |
||||
id, ok := b.stringMap[s] |
||||
if !ok { |
||||
id = len(b.strings) |
||||
b.strings = append(b.strings, s) |
||||
b.stringMap[s] = id |
||||
} |
||||
return int64(id) |
||||
} |
||||
|
||||
func (b *profileBuilder) flush() { |
||||
const dataFlush = 4096 |
||||
if b.pb.nest == 0 && len(b.pb.data) > dataFlush { |
||||
b.zw.Write(b.pb.data) |
||||
b.pb.data = b.pb.data[:0] |
||||
} |
||||
} |
||||
|
||||
// pbValueType encodes a ValueType message to b.pb.
|
||||
func (b *profileBuilder) pbValueType(tag int, typ, unit string) { |
||||
start := b.pb.startMessage() |
||||
b.pb.int64(tagValueType_Type, b.stringIndex(typ)) |
||||
b.pb.int64(tagValueType_Unit, b.stringIndex(unit)) |
||||
b.pb.endMessage(tag, start) |
||||
} |
||||
|
||||
// pbSample encodes a Sample message to b.pb.
|
||||
func (b *profileBuilder) pbSample(values []int64, locs []uint64, labels func()) { |
||||
start := b.pb.startMessage() |
||||
b.pb.int64s(tagSample_Value, values) |
||||
b.pb.uint64s(tagSample_Location, locs) |
||||
if labels != nil { |
||||
labels() |
||||
} |
||||
b.pb.endMessage(tagProfile_Sample, start) |
||||
b.flush() |
||||
} |
||||
|
||||
// pbLabel encodes a Label message to b.pb.
|
||||
func (b *profileBuilder) pbLabel(tag int, key, str string, num int64) { |
||||
start := b.pb.startMessage() |
||||
b.pb.int64Opt(tagLabel_Key, b.stringIndex(key)) |
||||
b.pb.int64Opt(tagLabel_Str, b.stringIndex(str)) |
||||
b.pb.int64Opt(tagLabel_Num, num) |
||||
b.pb.endMessage(tag, start) |
||||
} |
||||
|
||||
// pbLine encodes a Line message to b.pb.
|
||||
func (b *profileBuilder) pbLine(tag int, funcID uint64, line int64) { |
||||
start := b.pb.startMessage() |
||||
b.pb.uint64Opt(tagLine_FunctionID, funcID) |
||||
b.pb.int64Opt(tagLine_Line, line) |
||||
b.pb.endMessage(tag, start) |
||||
} |
||||
|
||||
// pbMapping encodes a Mapping message to b.pb.
|
||||
func (b *profileBuilder) pbMapping(tag int, id, base, limit, offset uint64, file, buildID string, hasFuncs bool) { |
||||
start := b.pb.startMessage() |
||||
b.pb.uint64Opt(tagMapping_ID, id) |
||||
b.pb.uint64Opt(tagMapping_Start, base) |
||||
b.pb.uint64Opt(tagMapping_Limit, limit) |
||||
b.pb.uint64Opt(tagMapping_Offset, offset) |
||||
b.pb.int64Opt(tagMapping_Filename, b.stringIndex(file)) |
||||
b.pb.int64Opt(tagMapping_BuildID, b.stringIndex(buildID)) |
||||
// TODO: we set HasFunctions if all symbols from samples were symbolized (hasFuncs).
|
||||
// Decide what to do about HasInlineFrames and HasLineNumbers.
|
||||
// Also, another approach to handle the mapping entry with
|
||||
// incomplete symbolization results is to dupliace the mapping
|
||||
// entry (but with different Has* fields values) and use
|
||||
// different entries for symbolized locations and unsymbolized locations.
|
||||
if hasFuncs { |
||||
b.pb.bool(tagMapping_HasFunctions, true) |
||||
} |
||||
b.pb.endMessage(tag, start) |
||||
} |
||||
|
||||
func allFrames(addr uintptr) ([]runtime.Frame, symbolizeFlag) { |
||||
// Expand this one address using CallersFrames so we can cache
|
||||
// each expansion. In general, CallersFrames takes a whole
|
||||
// stack, but in this case we know there will be no skips in
|
||||
// the stack and we have return PCs anyway.
|
||||
frames := runtime.CallersFrames([]uintptr{addr}) |
||||
frame, more := frames.Next() |
||||
if frame.Function == "runtime.goexit" { |
||||
// Short-circuit if we see runtime.goexit so the loop
|
||||
// below doesn't allocate a useless empty location.
|
||||
return nil, 0 |
||||
} |
||||
|
||||
symbolizeResult := lookupTried |
||||
if frame.PC == 0 || frame.Function == "" || frame.File == "" || frame.Line == 0 { |
||||
symbolizeResult |= lookupFailed |
||||
} |
||||
|
||||
if frame.PC == 0 { |
||||
// If we failed to resolve the frame, at least make up
|
||||
// a reasonable call PC. This mostly happens in tests.
|
||||
frame.PC = addr - 1 |
||||
} |
||||
ret := []runtime.Frame{frame} |
||||
for frame.Function != "runtime.goexit" && more { |
||||
frame, more = frames.Next() |
||||
ret = append(ret, frame) |
||||
} |
||||
return ret, symbolizeResult |
||||
} |
||||
|
||||
type locInfo struct { |
||||
// location id assigned by the profileBuilder
|
||||
id uint64 |
||||
|
||||
// sequence of PCs, including the fake PCs returned by the traceback
|
||||
// to represent inlined functions
|
||||
// https://github.com/golang/go/blob/d6f2f833c93a41ec1c68e49804b8387a06b131c5/src/runtime/traceback.go#L347-L368
|
||||
pcs []uintptr |
||||
|
||||
// firstPCFrames and firstPCSymbolizeResult hold the results of the
|
||||
// allFrames call for the first (leaf-most) PC this locInfo represents
|
||||
firstPCFrames []runtime.Frame |
||||
firstPCSymbolizeResult symbolizeFlag |
||||
} |
||||
|
||||
// newProfileBuilder returns a new profileBuilder.
|
||||
// CPU profiling data obtained from the runtime can be added
|
||||
// by calling b.addCPUData, and then the eventual profile
|
||||
// can be obtained by calling b.finish.
|
||||
func newProfileBuilder(w io.Writer, opt ProfileBuilderOptions, mapping []memMap) *profileBuilder { |
||||
zw := newGzipWriter(w) |
||||
b := &profileBuilder{ |
||||
w: w, |
||||
zw: zw, |
||||
start: time.Now(), |
||||
strings: []string{""}, |
||||
stringMap: map[string]int{"": 0}, |
||||
locs: map[uintptr]locInfo{}, |
||||
funcs: map[string]int{}, |
||||
opt: opt, |
||||
} |
||||
b.mem = mapping |
||||
return b |
||||
} |
||||
|
||||
// build completes and returns the constructed profile.
|
||||
func (b *profileBuilder) build() { |
||||
b.end = time.Now() |
||||
|
||||
b.pb.int64Opt(tagProfile_TimeNanos, b.start.UnixNano()) |
||||
if b.havePeriod { // must be CPU profile
|
||||
b.pbValueType(tagProfile_SampleType, "samples", "count") |
||||
b.pbValueType(tagProfile_SampleType, "cpu", "nanoseconds") |
||||
b.pb.int64Opt(tagProfile_DurationNanos, b.end.Sub(b.start).Nanoseconds()) |
||||
b.pbValueType(tagProfile_PeriodType, "cpu", "nanoseconds") |
||||
b.pb.int64Opt(tagProfile_Period, b.period) |
||||
} |
||||
|
||||
for i, m := range b.mem { |
||||
hasFunctions := m.funcs == lookupTried // lookupTried but not lookupFailed
|
||||
b.pbMapping(tagProfile_Mapping, uint64(i+1), uint64(m.start), uint64(m.end), m.offset, m.file, m.buildID, hasFunctions) |
||||
} |
||||
|
||||
// TODO: Anything for tagProfile_DropFrames?
|
||||
// TODO: Anything for tagProfile_KeepFrames?
|
||||
|
||||
b.pb.strings(tagProfile_StringTable, b.strings) |
||||
b.zw.Write(b.pb.data) |
||||
b.zw.Close() |
||||
} |
||||
|
||||
// appendLocsForStack appends the location IDs for the given stack trace to the given
|
||||
// location ID slice, locs. The addresses in the stack are return PCs or 1 + the PC of
|
||||
// an inline marker as the runtime traceback function returns.
|
||||
//
|
||||
// It may return an empty slice even if locs is non-empty, for example if locs consists
|
||||
// solely of runtime.goexit. We still count these empty stacks in profiles in order to
|
||||
// get the right cumulative sample count.
|
||||
//
|
||||
// It may emit to b.pb, so there must be no message encoding in progress.
|
||||
func (b *profileBuilder) appendLocsForStack(locs []uint64, stk []uintptr) (newLocs []uint64) { |
||||
b.deck.reset() |
||||
|
||||
// The last frame might be truncated. Recover lost inline frames.
|
||||
stk = runtime_expandFinalInlineFrame(stk) |
||||
|
||||
for len(stk) > 0 { |
||||
addr := stk[0] |
||||
if l, ok := b.locs[addr]; ok { |
||||
// When generating code for an inlined function, the compiler adds
|
||||
// NOP instructions to the outermost function as a placeholder for
|
||||
// each layer of inlining. When the runtime generates tracebacks for
|
||||
// stacks that include inlined functions, it uses the addresses of
|
||||
// those NOPs as "fake" PCs on the stack as if they were regular
|
||||
// function call sites. But if a profiling signal arrives while the
|
||||
// CPU is executing one of those NOPs, its PC will show up as a leaf
|
||||
// in the profile with its own Location entry. So, always check
|
||||
// whether addr is a "fake" PC in the context of the current call
|
||||
// stack by trying to add it to the inlining deck before assuming
|
||||
// that the deck is complete.
|
||||
if len(b.deck.pcs) > 0 { |
||||
if added := b.deck.tryAdd(addr, l.firstPCFrames, l.firstPCSymbolizeResult); added { |
||||
stk = stk[1:] |
||||
continue |
||||
} |
||||
} |
||||
|
||||
// first record the location if there is any pending accumulated info.
|
||||
if id := b.emitLocation(); id > 0 { |
||||
locs = append(locs, id) |
||||
} |
||||
|
||||
// then, record the cached location.
|
||||
locs = append(locs, l.id) |
||||
|
||||
// Skip the matching pcs.
|
||||
//
|
||||
// Even if stk was truncated due to the stack depth
|
||||
// limit, expandFinalInlineFrame above has already
|
||||
// fixed the truncation, ensuring it is long enough.
|
||||
stk = stk[len(l.pcs):] |
||||
continue |
||||
} |
||||
|
||||
frames, symbolizeResult := allFrames(addr) |
||||
if len(frames) == 0 { // runtime.goexit.
|
||||
if id := b.emitLocation(); id > 0 { |
||||
locs = append(locs, id) |
||||
} |
||||
stk = stk[1:] |
||||
continue |
||||
} |
||||
|
||||
if added := b.deck.tryAdd(addr, frames, symbolizeResult); added { |
||||
stk = stk[1:] |
||||
continue |
||||
} |
||||
// add failed because this addr is not inlined with the
|
||||
// existing PCs in the deck. Flush the deck and retry handling
|
||||
// this pc.
|
||||
if id := b.emitLocation(); id > 0 { |
||||
locs = append(locs, id) |
||||
} |
||||
|
||||
// check cache again - previous emitLocation added a new entry
|
||||
if l, ok := b.locs[addr]; ok { |
||||
locs = append(locs, l.id) |
||||
stk = stk[len(l.pcs):] // skip the matching pcs.
|
||||
} else { |
||||
b.deck.tryAdd(addr, frames, symbolizeResult) // must succeed.
|
||||
stk = stk[1:] |
||||
} |
||||
} |
||||
if id := b.emitLocation(); id > 0 { // emit remaining location.
|
||||
locs = append(locs, id) |
||||
} |
||||
return locs |
||||
} |
||||
|
||||
// Here's an example of how Go 1.17 writes out inlined functions, compiled for
|
||||
// linux/amd64. The disassembly of main.main shows two levels of inlining: main
|
||||
// calls b, b calls a, a does some work.
|
||||
//
|
||||
// inline.go:9 0x4553ec 90 NOPL // func main() { b(v) }
|
||||
// inline.go:6 0x4553ed 90 NOPL // func b(v *int) { a(v) }
|
||||
// inline.go:5 0x4553ee 48c7002a000000 MOVQ $0x2a, 0(AX) // func a(v *int) { *v = 42 }
|
||||
//
|
||||
// If a profiling signal arrives while executing the MOVQ at 0x4553ee (for line
|
||||
// 5), the runtime will report the stack as the MOVQ frame being called by the
|
||||
// NOPL at 0x4553ed (for line 6) being called by the NOPL at 0x4553ec (for line
|
||||
// 9).
|
||||
//
|
||||
// The role of pcDeck is to collapse those three frames back into a single
|
||||
// location at 0x4553ee, with file/line/function symbolization info representing
|
||||
// the three layers of calls. It does that via sequential calls to pcDeck.tryAdd
|
||||
// starting with the leaf-most address. The fourth call to pcDeck.tryAdd will be
|
||||
// for the caller of main.main. Because main.main was not inlined in its caller,
|
||||
// the deck will reject the addition, and the fourth PC on the stack will get
|
||||
// its own location.
|
||||
|
||||
// pcDeck is a helper to detect a sequence of inlined functions from
|
||||
// a stack trace returned by the runtime.
|
||||
//
|
||||
// The stack traces returned by runtime's trackback functions are fully
|
||||
// expanded (at least for Go functions) and include the fake pcs representing
|
||||
// inlined functions. The profile proto expects the inlined functions to be
|
||||
// encoded in one Location message.
|
||||
// https://github.com/google/pprof/blob/5e965273ee43930341d897407202dd5e10e952cb/proto/profile.proto#L177-L184
|
||||
//
|
||||
// Runtime does not directly expose whether a frame is for an inlined function
|
||||
// and looking up debug info is not ideal, so we use a heuristic to filter
|
||||
// the fake pcs and restore the inlined and entry functions. Inlined functions
|
||||
// have the following properties:
|
||||
//
|
||||
// Frame's Func is nil (note: also true for non-Go functions), and
|
||||
// Frame's Entry matches its entry function frame's Entry (note: could also be true for recursive calls and non-Go functions), and
|
||||
// Frame's Name does not match its entry function frame's name (note: inlined functions cannot be directly recursive).
|
||||
//
|
||||
// As reading and processing the pcs in a stack trace one by one (from leaf to the root),
|
||||
// we use pcDeck to temporarily hold the observed pcs and their expanded frames
|
||||
// until we observe the entry function frame.
|
||||
type pcDeck struct { |
||||
pcs []uintptr |
||||
frames []runtime.Frame |
||||
symbolizeResult symbolizeFlag |
||||
|
||||
// firstPCFrames indicates the number of frames associated with the first
|
||||
// (leaf-most) PC in the deck
|
||||
firstPCFrames int |
||||
// firstPCSymbolizeResult holds the results of the allFrames call for the
|
||||
// first (leaf-most) PC in the deck
|
||||
firstPCSymbolizeResult symbolizeFlag |
||||
} |
||||
|
||||
func (d *pcDeck) reset() { |
||||
d.pcs = d.pcs[:0] |
||||
d.frames = d.frames[:0] |
||||
d.symbolizeResult = 0 |
||||
d.firstPCFrames = 0 |
||||
d.firstPCSymbolizeResult = 0 |
||||
} |
||||
|
||||
// tryAdd tries to add the pc and Frames expanded from it (most likely one,
|
||||
// since the stack trace is already fully expanded) and the symbolizeResult
|
||||
// to the deck. If it fails the caller needs to flush the deck and retry.
|
||||
func (d *pcDeck) tryAdd(pc uintptr, frames []runtime.Frame, symbolizeResult symbolizeFlag) (success bool) { |
||||
if existing := len(d.frames); existing > 0 { |
||||
// 'd.frames' are all expanded from one 'pc' and represent all
|
||||
// inlined functions so we check only the last one.
|
||||
newFrame := frames[0] |
||||
last := d.frames[existing-1] |
||||
if last.Func != nil { // the last frame can't be inlined. Flush.
|
||||
return false |
||||
} |
||||
if last.Entry == 0 || newFrame.Entry == 0 { // Possibly not a Go function. Don't try to merge.
|
||||
return false |
||||
} |
||||
|
||||
if last.Entry != newFrame.Entry { // newFrame is for a different function.
|
||||
return false |
||||
} |
||||
if last.Function == newFrame.Function { // maybe recursion.
|
||||
return false |
||||
} |
||||
} |
||||
d.pcs = append(d.pcs, pc) |
||||
d.frames = append(d.frames, frames...) |
||||
d.symbolizeResult |= symbolizeResult |
||||
if len(d.pcs) == 1 { |
||||
d.firstPCFrames = len(d.frames) |
||||
d.firstPCSymbolizeResult = symbolizeResult |
||||
} |
||||
return true |
||||
} |
||||
|
||||
// emitLocation emits the new location and function information recorded in the deck
|
||||
// and returns the location ID encoded in the profile protobuf.
|
||||
// It emits to b.pb, so there must be no message encoding in progress.
|
||||
// It resets the deck.
|
||||
func (b *profileBuilder) emitLocation() uint64 { |
||||
if len(b.deck.pcs) == 0 { |
||||
return 0 |
||||
} |
||||
defer b.deck.reset() |
||||
|
||||
addr := b.deck.pcs[0] |
||||
firstFrame := b.deck.frames[0] |
||||
|
||||
// We can't write out functions while in the middle of the
|
||||
// Location message, so record new functions we encounter and
|
||||
// write them out after the Location.
|
||||
type newFunc struct { |
||||
id uint64 |
||||
name, file string |
||||
startLine int64 |
||||
} |
||||
newFuncs := make([]newFunc, 0, 8) |
||||
|
||||
id := uint64(len(b.locs)) + 1 |
||||
b.locs[addr] = locInfo{ |
||||
id: id, |
||||
pcs: append([]uintptr{}, b.deck.pcs...), |
||||
firstPCSymbolizeResult: b.deck.firstPCSymbolizeResult, |
||||
firstPCFrames: append([]runtime.Frame{}, b.deck.frames[:b.deck.firstPCFrames]...), |
||||
} |
||||
|
||||
start := b.pb.startMessage() |
||||
b.pb.uint64Opt(tagLocation_ID, id) |
||||
b.pb.uint64Opt(tagLocation_Address, uint64(firstFrame.PC)) |
||||
for _, frame := range b.deck.frames { |
||||
// Write out each line in frame expansion.
|
||||
funcID := uint64(b.funcs[frame.Function]) |
||||
if funcID == 0 { |
||||
funcID = uint64(len(b.funcs)) + 1 |
||||
b.funcs[frame.Function] = int(funcID) |
||||
var name string |
||||
if b.opt.GenericsFrames { |
||||
name = runtime_FrameSymbolName(&frame) |
||||
} else { |
||||
name = frame.Function |
||||
} |
||||
newFuncs = append(newFuncs, newFunc{ |
||||
id: funcID, |
||||
name: name, |
||||
file: frame.File, |
||||
startLine: int64(runtime_FrameStartLine(&frame)), |
||||
}) |
||||
} |
||||
b.pbLine(tagLocation_Line, funcID, int64(frame.Line)) |
||||
} |
||||
for i := range b.mem { |
||||
if b.mem[i].start <= addr && addr < b.mem[i].end || b.mem[i].fake { |
||||
b.pb.uint64Opt(tagLocation_MappingID, uint64(i+1)) |
||||
|
||||
m := b.mem[i] |
||||
m.funcs |= b.deck.symbolizeResult |
||||
b.mem[i] = m |
||||
break |
||||
} |
||||
} |
||||
b.pb.endMessage(tagProfile_Location, start) |
||||
|
||||
// Write out functions we found during frame expansion.
|
||||
for _, fn := range newFuncs { |
||||
start := b.pb.startMessage() |
||||
b.pb.uint64Opt(tagFunction_ID, fn.id) |
||||
b.pb.int64Opt(tagFunction_Name, b.stringIndex(fn.name)) |
||||
b.pb.int64Opt(tagFunction_SystemName, b.stringIndex(fn.name)) |
||||
b.pb.int64Opt(tagFunction_Filename, b.stringIndex(fn.file)) |
||||
b.pb.int64Opt(tagFunction_StartLine, fn.startLine) |
||||
b.pb.endMessage(tagProfile_Function, start) |
||||
} |
||||
|
||||
b.flush() |
||||
return id |
||||
} |
||||
|
||||
func readMapping() []memMap { |
||||
data, _ := os.ReadFile("/proc/self/maps") |
||||
var mem []memMap |
||||
parseProcSelfMaps(data, func(lo, hi, offset uint64, file, buildID string) { |
||||
mem = append(mem, memMap{ |
||||
start: uintptr(lo), |
||||
end: uintptr(hi), |
||||
offset: offset, |
||||
file: file, |
||||
buildID: buildID, |
||||
fake: false, |
||||
}) |
||||
}) |
||||
if len(mem) == 0 { // pprof expects a map entry, so fake one.
|
||||
mem = []memMap{{ |
||||
start: uintptr(0), |
||||
end: uintptr(0), |
||||
offset: 0, |
||||
file: "", |
||||
buildID: "", |
||||
fake: true, |
||||
}} |
||||
} |
||||
return mem |
||||
} |
||||
|
||||
var space = []byte(" ") |
||||
var newline = []byte("\n") |
||||
|
||||
func parseProcSelfMaps(data []byte, addMapping func(lo, hi, offset uint64, file, buildID string)) { |
||||
// $ cat /proc/self/maps
|
||||
// 00400000-0040b000 r-xp 00000000 fc:01 787766 /bin/cat
|
||||
// 0060a000-0060b000 r--p 0000a000 fc:01 787766 /bin/cat
|
||||
// 0060b000-0060c000 rw-p 0000b000 fc:01 787766 /bin/cat
|
||||
// 014ab000-014cc000 rw-p 00000000 00:00 0 [heap]
|
||||
// 7f7d76af8000-7f7d7797c000 r--p 00000000 fc:01 1318064 /usr/lib/locale/locale-archive
|
||||
// 7f7d7797c000-7f7d77b36000 r-xp 00000000 fc:01 1180226 /lib/x86_64-linux-gnu/libc-2.19.so
|
||||
// 7f7d77b36000-7f7d77d36000 ---p 001ba000 fc:01 1180226 /lib/x86_64-linux-gnu/libc-2.19.so
|
||||
// 7f7d77d36000-7f7d77d3a000 r--p 001ba000 fc:01 1180226 /lib/x86_64-linux-gnu/libc-2.19.so
|
||||
// 7f7d77d3a000-7f7d77d3c000 rw-p 001be000 fc:01 1180226 /lib/x86_64-linux-gnu/libc-2.19.so
|
||||
// 7f7d77d3c000-7f7d77d41000 rw-p 00000000 00:00 0
|
||||
// 7f7d77d41000-7f7d77d64000 r-xp 00000000 fc:01 1180217 /lib/x86_64-linux-gnu/ld-2.19.so
|
||||
// 7f7d77f3f000-7f7d77f42000 rw-p 00000000 00:00 0
|
||||
// 7f7d77f61000-7f7d77f63000 rw-p 00000000 00:00 0
|
||||
// 7f7d77f63000-7f7d77f64000 r--p 00022000 fc:01 1180217 /lib/x86_64-linux-gnu/ld-2.19.so
|
||||
// 7f7d77f64000-7f7d77f65000 rw-p 00023000 fc:01 1180217 /lib/x86_64-linux-gnu/ld-2.19.so
|
||||
// 7f7d77f65000-7f7d77f66000 rw-p 00000000 00:00 0
|
||||
// 7ffc342a2000-7ffc342c3000 rw-p 00000000 00:00 0 [stack]
|
||||
// 7ffc34343000-7ffc34345000 r-xp 00000000 00:00 0 [vdso]
|
||||
// ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]
|
||||
|
||||
var line []byte |
||||
// next removes and returns the next field in the line.
|
||||
// It also removes from line any spaces following the field.
|
||||
next := func() []byte { |
||||
var f []byte |
||||
f, line, _ = bytesCut(line, space) |
||||
line = bytes.TrimLeft(line, " ") |
||||
return f |
||||
} |
||||
|
||||
for len(data) > 0 { |
||||
line, data, _ = bytesCut(data, newline) |
||||
addr := next() |
||||
loStr, hiStr, ok := stringsCut(string(addr), "-") |
||||
if !ok { |
||||
continue |
||||
} |
||||
lo, err := strconv.ParseUint(loStr, 16, 64) |
||||
if err != nil { |
||||
continue |
||||
} |
||||
hi, err := strconv.ParseUint(hiStr, 16, 64) |
||||
if err != nil { |
||||
continue |
||||
} |
||||
perm := next() |
||||
if len(perm) < 4 || perm[2] != 'x' { |
||||
// Only interested in executable mappings.
|
||||
continue |
||||
} |
||||
offset, err := strconv.ParseUint(string(next()), 16, 64) |
||||
if err != nil { |
||||
continue |
||||
} |
||||
next() // dev
|
||||
inode := next() // inode
|
||||
if line == nil { |
||||
continue |
||||
} |
||||
file := string(line) |
||||
|
||||
// Trim deleted file marker.
|
||||
deletedStr := " (deleted)" |
||||
deletedLen := len(deletedStr) |
||||
if len(file) >= deletedLen && file[len(file)-deletedLen:] == deletedStr { |
||||
file = file[:len(file)-deletedLen] |
||||
} |
||||
|
||||
if len(inode) == 1 && inode[0] == '0' && file == "" { |
||||
// Huge-page text mappings list the initial fragment of
|
||||
// mapped but unpopulated memory as being inode 0.
|
||||
// Don't report that part.
|
||||
// But [vdso] and [vsyscall] are inode 0, so let non-empty file names through.
|
||||
continue |
||||
} |
||||
|
||||
// TODO: pprof's remapMappingIDs makes one adjustment:
|
||||
// 1. If there is an /anon_hugepage mapping first and it is
|
||||
// consecutive to a next mapping, drop the /anon_hugepage.
|
||||
// There's no indication why this is needed.
|
||||
// Let's try not doing this and see what breaks.
|
||||
// If we do need it, it would go here, before we
|
||||
// enter the mappings into b.mem in the first place.
|
||||
|
||||
buildID, _ := elfBuildID(file) |
||||
addMapping(lo, hi, offset, file, buildID) |
||||
} |
||||
} |
||||
|
||||
// Cut slices s around the first instance of sep,
|
||||
// returning the text before and after sep.
|
||||
// The found result reports whether sep appears in s.
|
||||
// If sep does not appear in s, cut returns s, nil, false.
|
||||
//
|
||||
// Cut returns slices of the original slice s, not copies.
|
||||
func bytesCut(s, sep []byte) (before, after []byte, found bool) { |
||||
if i := bytes.Index(s, sep); i >= 0 { |
||||
return s[:i], s[i+len(sep):], true |
||||
} |
||||
return s, nil, false |
||||
} |
||||
|
||||
// Cut slices s around the first instance of sep,
|
||||
// returning the text before and after sep.
|
||||
// The found result reports whether sep appears in s.
|
||||
// If sep does not appear in s, cut returns s, "", false.
|
||||
func stringsCut(s, sep string) (before, after string, found bool) { |
||||
if i := strings.Index(s, sep); i >= 0 { |
||||
return s[:i], s[i+len(sep):], true |
||||
} |
||||
return s, "", false |
||||
} |
141
vendor/github.com/grafana/pyroscope-go/godeltaprof/internal/pprof/protobuf.go
generated
vendored
141
vendor/github.com/grafana/pyroscope-go/godeltaprof/internal/pprof/protobuf.go
generated
vendored
@ -0,0 +1,141 @@ |
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package pprof |
||||
|
||||
// A protobuf is a simple protocol buffer encoder.
|
||||
type protobuf struct { |
||||
data []byte |
||||
tmp [16]byte |
||||
nest int |
||||
} |
||||
|
||||
func (b *protobuf) varint(x uint64) { |
||||
for x >= 128 { |
||||
b.data = append(b.data, byte(x)|0x80) |
||||
x >>= 7 |
||||
} |
||||
b.data = append(b.data, byte(x)) |
||||
} |
||||
|
||||
func (b *protobuf) length(tag int, len int) { |
||||
b.varint(uint64(tag)<<3 | 2) |
||||
b.varint(uint64(len)) |
||||
} |
||||
|
||||
func (b *protobuf) uint64(tag int, x uint64) { |
||||
// append varint to b.data
|
||||
b.varint(uint64(tag)<<3 | 0) |
||||
b.varint(x) |
||||
} |
||||
|
||||
func (b *protobuf) uint64s(tag int, x []uint64) { |
||||
if len(x) > 2 { |
||||
// Use packed encoding
|
||||
n1 := len(b.data) |
||||
for _, u := range x { |
||||
b.varint(u) |
||||
} |
||||
n2 := len(b.data) |
||||
b.length(tag, n2-n1) |
||||
n3 := len(b.data) |
||||
copy(b.tmp[:], b.data[n2:n3]) |
||||
copy(b.data[n1+(n3-n2):], b.data[n1:n2]) |
||||
copy(b.data[n1:], b.tmp[:n3-n2]) |
||||
return |
||||
} |
||||
for _, u := range x { |
||||
b.uint64(tag, u) |
||||
} |
||||
} |
||||
|
||||
func (b *protobuf) uint64Opt(tag int, x uint64) { |
||||
if x == 0 { |
||||
return |
||||
} |
||||
b.uint64(tag, x) |
||||
} |
||||
|
||||
func (b *protobuf) int64(tag int, x int64) { |
||||
u := uint64(x) |
||||
b.uint64(tag, u) |
||||
} |
||||
|
||||
func (b *protobuf) int64Opt(tag int, x int64) { |
||||
if x == 0 { |
||||
return |
||||
} |
||||
b.int64(tag, x) |
||||
} |
||||
|
||||
func (b *protobuf) int64s(tag int, x []int64) { |
||||
if len(x) > 2 { |
||||
// Use packed encoding
|
||||
n1 := len(b.data) |
||||
for _, u := range x { |
||||
b.varint(uint64(u)) |
||||
} |
||||
n2 := len(b.data) |
||||
b.length(tag, n2-n1) |
||||
n3 := len(b.data) |
||||
copy(b.tmp[:], b.data[n2:n3]) |
||||
copy(b.data[n1+(n3-n2):], b.data[n1:n2]) |
||||
copy(b.data[n1:], b.tmp[:n3-n2]) |
||||
return |
||||
} |
||||
for _, u := range x { |
||||
b.int64(tag, u) |
||||
} |
||||
} |
||||
|
||||
func (b *protobuf) string(tag int, x string) { |
||||
b.length(tag, len(x)) |
||||
b.data = append(b.data, x...) |
||||
} |
||||
|
||||
func (b *protobuf) strings(tag int, x []string) { |
||||
for _, s := range x { |
||||
b.string(tag, s) |
||||
} |
||||
} |
||||
|
||||
func (b *protobuf) stringOpt(tag int, x string) { |
||||
if x == "" { |
||||
return |
||||
} |
||||
b.string(tag, x) |
||||
} |
||||
|
||||
func (b *protobuf) bool(tag int, x bool) { |
||||
if x { |
||||
b.uint64(tag, 1) |
||||
} else { |
||||
b.uint64(tag, 0) |
||||
} |
||||
} |
||||
|
||||
func (b *protobuf) boolOpt(tag int, x bool) { |
||||
if x == false { |
||||
return |
||||
} |
||||
b.bool(tag, x) |
||||
} |
||||
|
||||
type msgOffset int |
||||
|
||||
func (b *protobuf) startMessage() msgOffset { |
||||
b.nest++ |
||||
return msgOffset(len(b.data)) |
||||
} |
||||
|
||||
func (b *protobuf) endMessage(tag int, start msgOffset) { |
||||
n1 := int(start) |
||||
n2 := len(b.data) |
||||
b.length(tag, n2-n1) |
||||
n3 := len(b.data) |
||||
copy(b.tmp[:], b.data[n2:n3]) |
||||
copy(b.data[n1+(n3-n2):], b.data[n1:n2]) |
||||
copy(b.data[n1:], b.tmp[:n3-n2]) |
||||
b.nest-- |
||||
} |
@ -0,0 +1,17 @@ |
||||
//go:build go1.16 && !go1.23
|
||||
// +build go1.16,!go1.23
|
||||
|
||||
package pprof |
||||
|
||||
// unsafe is required for go:linkname
|
||||
import _ "unsafe" |
||||
|
||||
//go:linkname runtime_expandFinalInlineFrame runtime/pprof.runtime_expandFinalInlineFrame
|
||||
func runtime_expandFinalInlineFrame(stk []uintptr) []uintptr |
||||
|
||||
//go:linkname runtime_cyclesPerSecond runtime/pprof.runtime_cyclesPerSecond
|
||||
func runtime_cyclesPerSecond() int64 |
||||
|
||||
func Runtime_cyclesPerSecond() int64 { |
||||
return runtime_cyclesPerSecond() |
||||
} |
16
vendor/github.com/grafana/pyroscope-go/godeltaprof/internal/pprof/stub_go20.go
generated
vendored
16
vendor/github.com/grafana/pyroscope-go/godeltaprof/internal/pprof/stub_go20.go
generated
vendored
@ -0,0 +1,16 @@ |
||||
//go:build go1.16 && !go1.21
|
||||
// +build go1.16,!go1.21
|
||||
|
||||
package pprof |
||||
|
||||
import "runtime" |
||||
|
||||
// runtime_FrameStartLine is defined in runtime/symtab.go.
|
||||
func runtime_FrameStartLine(f *runtime.Frame) int { |
||||
return 0 |
||||
} |
||||
|
||||
// runtime_FrameSymbolName is defined in runtime/symtab.go.
|
||||
func runtime_FrameSymbolName(f *runtime.Frame) string { |
||||
return f.Function |
||||
} |
21
vendor/github.com/grafana/pyroscope-go/godeltaprof/internal/pprof/stub_go21.go
generated
vendored
21
vendor/github.com/grafana/pyroscope-go/godeltaprof/internal/pprof/stub_go21.go
generated
vendored
@ -0,0 +1,21 @@ |
||||
//go:build go1.21
|
||||
// +build go1.21
|
||||
|
||||
package pprof |
||||
|
||||
import ( |
||||
"runtime" |
||||
_ "unsafe" |
||||
) |
||||
|
||||
// runtime_FrameStartLine is defined in runtime/symtab.go.
|
||||
//
|
||||
//go:noescape
|
||||
//go:linkname runtime_FrameStartLine runtime/pprof.runtime_FrameStartLine
|
||||
func runtime_FrameStartLine(f *runtime.Frame) int |
||||
|
||||
// runtime_FrameSymbolName is defined in runtime/symtab.go.
|
||||
//
|
||||
//go:noescape
|
||||
//go:linkname runtime_FrameSymbolName runtime/pprof.runtime_FrameSymbolName
|
||||
func runtime_FrameSymbolName(f *runtime.Frame) string |
@ -0,0 +1,9 @@ |
||||
package godeltaprof |
||||
|
||||
type ProfileOptions struct { |
||||
// for go1.21+ if true - use runtime_FrameSymbolName - produces frames with generic types, for example [go.shape.int]
|
||||
// for go1.21+ if false - use runtime.Frame->Function - produces frames with generic types ommited [...]
|
||||
// pre 1.21 - always use runtime.Frame->Function - produces frames with generic types ommited [...]
|
||||
GenericsFrames bool |
||||
LazyMappings bool |
||||
} |
@ -0,0 +1,16 @@ |
||||
//go:build amd64 && !appengine && !noasm && gc
|
||||
// +build amd64,!appengine,!noasm,gc
|
||||
|
||||
// Copyright 2019+ Klaus Post. All rights reserved.
|
||||
// License information can be found in the LICENSE file.
|
||||
|
||||
package flate |
||||
|
||||
// matchLen returns how many bytes match in a and b
|
||||
//
|
||||
// It assumes that:
|
||||
//
|
||||
// len(a) <= len(b) and len(a) > 0
|
||||
//
|
||||
//go:noescape
|
||||
func matchLen(a []byte, b []byte) int |
@ -0,0 +1,68 @@ |
||||
// Copied from S2 implementation. |
||||
|
||||
//go:build !appengine && !noasm && gc && !noasm |
||||
|
||||
#include "textflag.h" |
||||
|
||||
// func matchLen(a []byte, b []byte) int |
||||
// Requires: BMI |
||||
TEXT ·matchLen(SB), NOSPLIT, $0-56 |
||||
MOVQ a_base+0(FP), AX |
||||
MOVQ b_base+24(FP), CX |
||||
MOVQ a_len+8(FP), DX |
||||
|
||||
// matchLen |
||||
XORL SI, SI |
||||
CMPL DX, $0x08 |
||||
JB matchlen_match4_standalone |
||||
|
||||
matchlen_loopback_standalone: |
||||
MOVQ (AX)(SI*1), BX |
||||
XORQ (CX)(SI*1), BX |
||||
TESTQ BX, BX |
||||
JZ matchlen_loop_standalone |
||||
|
||||
#ifdef GOAMD64_v3 |
||||
TZCNTQ BX, BX |
||||
#else |
||||
BSFQ BX, BX |
||||
#endif |
||||
SARQ $0x03, BX |
||||
LEAL (SI)(BX*1), SI |
||||
JMP gen_match_len_end |
||||
|
||||
matchlen_loop_standalone: |
||||
LEAL -8(DX), DX |
||||
LEAL 8(SI), SI |
||||
CMPL DX, $0x08 |
||||
JAE matchlen_loopback_standalone |
||||
|
||||
matchlen_match4_standalone: |
||||
CMPL DX, $0x04 |
||||
JB matchlen_match2_standalone |
||||
MOVL (AX)(SI*1), BX |
||||
CMPL (CX)(SI*1), BX |
||||
JNE matchlen_match2_standalone |
||||
LEAL -4(DX), DX |
||||
LEAL 4(SI), SI |
||||
|
||||
matchlen_match2_standalone: |
||||
CMPL DX, $0x02 |
||||
JB matchlen_match1_standalone |
||||
MOVW (AX)(SI*1), BX |
||||
CMPW (CX)(SI*1), BX |
||||
JNE matchlen_match1_standalone |
||||
LEAL -2(DX), DX |
||||
LEAL 2(SI), SI |
||||
|
||||
matchlen_match1_standalone: |
||||
CMPL DX, $0x01 |
||||
JB gen_match_len_end |
||||
MOVB (AX)(SI*1), BL |
||||
CMPB (CX)(SI*1), BL |
||||
JNE gen_match_len_end |
||||
INCL SI |
||||
|
||||
gen_match_len_end: |
||||
MOVQ SI, ret+48(FP) |
||||
RET |
@ -0,0 +1,33 @@ |
||||
//go:build !amd64 || appengine || !gc || noasm
|
||||
// +build !amd64 appengine !gc noasm
|
||||
|
||||
// Copyright 2019+ Klaus Post. All rights reserved.
|
||||
// License information can be found in the LICENSE file.
|
||||
|
||||
package flate |
||||
|
||||
import ( |
||||
"encoding/binary" |
||||
"math/bits" |
||||
) |
||||
|
||||
// matchLen returns the maximum common prefix length of a and b.
|
||||
// a must be the shortest of the two.
|
||||
func matchLen(a, b []byte) (n int) { |
||||
for ; len(a) >= 8 && len(b) >= 8; a, b = a[8:], b[8:] { |
||||
diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b) |
||||
if diff != 0 { |
||||
return n + bits.TrailingZeros64(diff)>>3 |
||||
} |
||||
n += 8 |
||||
} |
||||
|
||||
for i := range a { |
||||
if a[i] != b[i] { |
||||
break |
||||
} |
||||
n++ |
||||
} |
||||
return n |
||||
|
||||
} |
File diff suppressed because it is too large
Load Diff
Loading…
Reference in new issue