labels: Reduce allocated memory and latency of Hash method; Added tests. (#8025)
* labels: Reduce allocated memory by Hash method in edge cases; Added tests. Old: /tmp/___BenchmarkLabels_Hash_in_github_com_prometheus_prometheus_pkg_labels -test.v -test.bench ^\QBenchmarkLabels_Hash\E$ -test.run ^$ goos: linux goarch: amd64 pkg: github.com/prometheus/prometheus/pkg/labels BenchmarkLabels_Hash BenchmarkLabels_Hash/typical_labels_under_1KB BenchmarkLabels_Hash/typical_labels_under_1KB-12 5366161 259 ns/op 0 B/op 0 allocs/op BenchmarkLabels_Hash/bigger_labels_over_1KB BenchmarkLabels_Hash/bigger_labels_over_1KB-12 1700371 767 ns/op 2048 B/op 1 allocs/op BenchmarkLabels_Hash/extremely_large_label_value_10MB BenchmarkLabels_Hash/extremely_large_label_value_10MB-12 356 3743115 ns/op 10523442 B/op 1 allocs/op PASS New: /tmp/___BenchmarkLabels_Hash_in_github_com_prometheus_prometheus_pkg_labels -test.v -test.bench ^\QBenchmarkLabels_Hash\E$ -test.run ^$ goos: linux goarch: amd64 pkg: github.com/prometheus/prometheus/pkg/labels BenchmarkLabels_Hash BenchmarkLabels_Hash/typical_labels_under_1KB BenchmarkLabels_Hash/typical_labels_under_1KB-12 4758883 259 ns/op 0 B/op 0 allocs/op BenchmarkLabels_Hash/bigger_labels_over_1KB BenchmarkLabels_Hash/bigger_labels_over_1KB-12 3324492 357 ns/op 80 B/op 1 allocs/op BenchmarkLabels_Hash/extremely_large_label_value_10MB BenchmarkLabels_Hash/extremely_large_label_value_10MB-12 1087 1083949 ns/op 9734 B/op 1 allocs/op PASS Process finished with exit code 0 Signed-off-by: Bartlomiej Plotka <bwplotka@gmail.com> * Addressed Kemal's comment. Signed-off-by: Bartlomiej Plotka <bwplotka@gmail.com> * Moved to v2 xxhash for improvements. New: /tmp/___BenchmarkLabels_Hash_in_github_com_prometheus_prometheus_pkg_labels -test.v -test.bench ^\QBenchmarkLabels_Hash\E$ -test.run ^$ -test.benchtime 10s goos: linux goarch: amd64 pkg: github.com/prometheus/prometheus/pkg/labels BenchmarkLabels_Hash BenchmarkLabels_Hash/typical_labels_under_1KB BenchmarkLabels_Hash/typical_labels_under_1KB-12 53447894 221 ns/op 0 B/op 0 allocs/op BenchmarkLabels_Hash/bigger_labels_over_1KB BenchmarkLabels_Hash/bigger_labels_over_1KB-12 42341754 326 ns/op 0 B/op 0 allocs/op BenchmarkLabels_Hash/extremely_large_label_value_10MB BenchmarkLabels_Hash/extremely_large_label_value_10MB-12 10000 1248546 ns/op 0 B/op 0 allocs/op PASS Signed-off-by: Bartlomiej Plotka <bwplotka@gmail.com> * Removed old xxhash package. Signed-off-by: Bartlomiej Plotka <bwplotka@gmail.com> * Pined minor version. Signed-off-by: Bartlomiej Plotka <bwplotka@gmail.com>pull/8068/head
parent
d49f267f76
commit
9981b3f3ee
@ -1,22 +0,0 @@ |
||||
Copyright (c) 2016 Caleb Spare |
||||
|
||||
MIT License |
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining |
||||
a copy of this software and associated documentation files (the |
||||
"Software"), to deal in the Software without restriction, including |
||||
without limitation the rights to use, copy, modify, merge, publish, |
||||
distribute, sublicense, and/or sell copies of the Software, and to |
||||
permit persons to whom the Software is furnished to do so, subject to |
||||
the following conditions: |
||||
|
||||
The above copyright notice and this permission notice shall be |
||||
included in all copies or substantial portions of the Software. |
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE |
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||||
@ -1,50 +0,0 @@ |
||||
# xxhash |
||||
|
||||
[](https://godoc.org/github.com/cespare/xxhash) |
||||
|
||||
xxhash is a Go implementation of the 64-bit |
||||
[xxHash](http://cyan4973.github.io/xxHash/) algorithm, XXH64. This is a |
||||
high-quality hashing algorithm that is much faster than anything in the Go |
||||
standard library. |
||||
|
||||
The API is very small, taking its cue from the other hashing packages in the |
||||
standard library: |
||||
|
||||
$ go doc github.com/cespare/xxhash ! |
||||
package xxhash // import "github.com/cespare/xxhash" |
||||
|
||||
Package xxhash implements the 64-bit variant of xxHash (XXH64) as described |
||||
at http://cyan4973.github.io/xxHash/. |
||||
|
||||
func New() hash.Hash64 |
||||
func Sum64(b []byte) uint64 |
||||
func Sum64String(s string) uint64 |
||||
|
||||
This implementation provides a fast pure-Go implementation and an even faster |
||||
assembly implementation for amd64. |
||||
|
||||
## Benchmarks |
||||
|
||||
Here are some quick benchmarks comparing the pure-Go and assembly |
||||
implementations of Sum64 against another popular Go XXH64 implementation, |
||||
[github.com/OneOfOne/xxhash](https://github.com/OneOfOne/xxhash): |
||||
|
||||
| input size | OneOfOne | cespare (purego) | cespare | |
||||
| --- | --- | --- | --- | |
||||
| 5 B | 416 MB/s | 720 MB/s | 872 MB/s | |
||||
| 100 B | 3980 MB/s | 5013 MB/s | 5252 MB/s | |
||||
| 4 KB | 12727 MB/s | 12999 MB/s | 13026 MB/s | |
||||
| 10 MB | 9879 MB/s | 10775 MB/s | 10913 MB/s | |
||||
|
||||
These numbers were generated with: |
||||
|
||||
``` |
||||
$ go test -benchtime 10s -bench '/OneOfOne,' |
||||
$ go test -tags purego -benchtime 10s -bench '/xxhash,' |
||||
$ go test -benchtime 10s -bench '/xxhash,' |
||||
``` |
||||
|
||||
## Projects using this package |
||||
|
||||
- [InfluxDB](https://github.com/influxdata/influxdb) |
||||
- [Prometheus](https://github.com/prometheus/prometheus) |
||||
@ -1,6 +0,0 @@ |
||||
module github.com/cespare/xxhash |
||||
|
||||
require ( |
||||
github.com/OneOfOne/xxhash v1.2.2 |
||||
github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72 |
||||
) |
||||
@ -1,4 +0,0 @@ |
||||
github.com/OneOfOne/xxhash v1.2.2 h1:KMrpdQIwFcEqXDklaen+P1axHaj9BSKzvpUUfnHldSE= |
||||
github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= |
||||
github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72 h1:qLC7fQah7D6K1B0ujays3HV9gkFtllcxhzImRR7ArPQ= |
||||
github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= |
||||
@ -1,14 +0,0 @@ |
||||
// +build !go1.9
|
||||
|
||||
package xxhash |
||||
|
||||
// TODO(caleb): After Go 1.10 comes out, remove this fallback code.
|
||||
|
||||
func rol1(x uint64) uint64 { return (x << 1) | (x >> (64 - 1)) } |
||||
func rol7(x uint64) uint64 { return (x << 7) | (x >> (64 - 7)) } |
||||
func rol11(x uint64) uint64 { return (x << 11) | (x >> (64 - 11)) } |
||||
func rol12(x uint64) uint64 { return (x << 12) | (x >> (64 - 12)) } |
||||
func rol18(x uint64) uint64 { return (x << 18) | (x >> (64 - 18)) } |
||||
func rol23(x uint64) uint64 { return (x << 23) | (x >> (64 - 23)) } |
||||
func rol27(x uint64) uint64 { return (x << 27) | (x >> (64 - 27)) } |
||||
func rol31(x uint64) uint64 { return (x << 31) | (x >> (64 - 31)) } |
||||
@ -1,14 +0,0 @@ |
||||
// +build go1.9
|
||||
|
||||
package xxhash |
||||
|
||||
import "math/bits" |
||||
|
||||
func rol1(x uint64) uint64 { return bits.RotateLeft64(x, 1) } |
||||
func rol7(x uint64) uint64 { return bits.RotateLeft64(x, 7) } |
||||
func rol11(x uint64) uint64 { return bits.RotateLeft64(x, 11) } |
||||
func rol12(x uint64) uint64 { return bits.RotateLeft64(x, 12) } |
||||
func rol18(x uint64) uint64 { return bits.RotateLeft64(x, 18) } |
||||
func rol23(x uint64) uint64 { return bits.RotateLeft64(x, 23) } |
||||
func rol27(x uint64) uint64 { return bits.RotateLeft64(x, 27) } |
||||
func rol31(x uint64) uint64 { return bits.RotateLeft64(x, 31) } |
||||
@ -1,168 +0,0 @@ |
||||
// Package xxhash implements the 64-bit variant of xxHash (XXH64) as described
|
||||
// at http://cyan4973.github.io/xxHash/.
|
||||
package xxhash |
||||
|
||||
import ( |
||||
"encoding/binary" |
||||
"hash" |
||||
) |
||||
|
||||
const ( |
||||
prime1 uint64 = 11400714785074694791 |
||||
prime2 uint64 = 14029467366897019727 |
||||
prime3 uint64 = 1609587929392839161 |
||||
prime4 uint64 = 9650029242287828579 |
||||
prime5 uint64 = 2870177450012600261 |
||||
) |
||||
|
||||
// NOTE(caleb): I'm using both consts and vars of the primes. Using consts where
|
||||
// possible in the Go code is worth a small (but measurable) performance boost
|
||||
// by avoiding some MOVQs. Vars are needed for the asm and also are useful for
|
||||
// convenience in the Go code in a few places where we need to intentionally
|
||||
// avoid constant arithmetic (e.g., v1 := prime1 + prime2 fails because the
|
||||
// result overflows a uint64).
|
||||
var ( |
||||
prime1v = prime1 |
||||
prime2v = prime2 |
||||
prime3v = prime3 |
||||
prime4v = prime4 |
||||
prime5v = prime5 |
||||
) |
||||
|
||||
type xxh struct { |
||||
v1 uint64 |
||||
v2 uint64 |
||||
v3 uint64 |
||||
v4 uint64 |
||||
total int |
||||
mem [32]byte |
||||
n int // how much of mem is used
|
||||
} |
||||
|
||||
// New creates a new hash.Hash64 that implements the 64-bit xxHash algorithm.
|
||||
func New() hash.Hash64 { |
||||
var x xxh |
||||
x.Reset() |
||||
return &x |
||||
} |
||||
|
||||
func (x *xxh) Reset() { |
||||
x.n = 0 |
||||
x.total = 0 |
||||
x.v1 = prime1v + prime2 |
||||
x.v2 = prime2 |
||||
x.v3 = 0 |
||||
x.v4 = -prime1v |
||||
} |
||||
|
||||
func (x *xxh) Size() int { return 8 } |
||||
func (x *xxh) BlockSize() int { return 32 } |
||||
|
||||
// Write adds more data to x. It always returns len(b), nil.
|
||||
func (x *xxh) Write(b []byte) (n int, err error) { |
||||
n = len(b) |
||||
x.total += len(b) |
||||
|
||||
if x.n+len(b) < 32 { |
||||
// This new data doesn't even fill the current block.
|
||||
copy(x.mem[x.n:], b) |
||||
x.n += len(b) |
||||
return |
||||
} |
||||
|
||||
if x.n > 0 { |
||||
// Finish off the partial block.
|
||||
copy(x.mem[x.n:], b) |
||||
x.v1 = round(x.v1, u64(x.mem[0:8])) |
||||
x.v2 = round(x.v2, u64(x.mem[8:16])) |
||||
x.v3 = round(x.v3, u64(x.mem[16:24])) |
||||
x.v4 = round(x.v4, u64(x.mem[24:32])) |
||||
b = b[32-x.n:] |
||||
x.n = 0 |
||||
} |
||||
|
||||
if len(b) >= 32 { |
||||
// One or more full blocks left.
|
||||
b = writeBlocks(x, b) |
||||
} |
||||
|
||||
// Store any remaining partial block.
|
||||
copy(x.mem[:], b) |
||||
x.n = len(b) |
||||
|
||||
return |
||||
} |
||||
|
||||
func (x *xxh) Sum(b []byte) []byte { |
||||
s := x.Sum64() |
||||
return append( |
||||
b, |
||||
byte(s>>56), |
||||
byte(s>>48), |
||||
byte(s>>40), |
||||
byte(s>>32), |
||||
byte(s>>24), |
||||
byte(s>>16), |
||||
byte(s>>8), |
||||
byte(s), |
||||
) |
||||
} |
||||
|
||||
func (x *xxh) Sum64() uint64 { |
||||
var h uint64 |
||||
|
||||
if x.total >= 32 { |
||||
v1, v2, v3, v4 := x.v1, x.v2, x.v3, x.v4 |
||||
h = rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4) |
||||
h = mergeRound(h, v1) |
||||
h = mergeRound(h, v2) |
||||
h = mergeRound(h, v3) |
||||
h = mergeRound(h, v4) |
||||
} else { |
||||
h = x.v3 + prime5 |
||||
} |
||||
|
||||
h += uint64(x.total) |
||||
|
||||
i, end := 0, x.n |
||||
for ; i+8 <= end; i += 8 { |
||||
k1 := round(0, u64(x.mem[i:i+8])) |
||||
h ^= k1 |
||||
h = rol27(h)*prime1 + prime4 |
||||
} |
||||
if i+4 <= end { |
||||
h ^= uint64(u32(x.mem[i:i+4])) * prime1 |
||||
h = rol23(h)*prime2 + prime3 |
||||
i += 4 |
||||
} |
||||
for i < end { |
||||
h ^= uint64(x.mem[i]) * prime5 |
||||
h = rol11(h) * prime1 |
||||
i++ |
||||
} |
||||
|
||||
h ^= h >> 33 |
||||
h *= prime2 |
||||
h ^= h >> 29 |
||||
h *= prime3 |
||||
h ^= h >> 32 |
||||
|
||||
return h |
||||
} |
||||
|
||||
func u64(b []byte) uint64 { return binary.LittleEndian.Uint64(b) } |
||||
func u32(b []byte) uint32 { return binary.LittleEndian.Uint32(b) } |
||||
|
||||
func round(acc, input uint64) uint64 { |
||||
acc += input * prime2 |
||||
acc = rol31(acc) |
||||
acc *= prime1 |
||||
return acc |
||||
} |
||||
|
||||
func mergeRound(acc, val uint64) uint64 { |
||||
val = round(0, val) |
||||
acc ^= val |
||||
acc = acc*prime1 + prime4 |
||||
return acc |
||||
} |
||||
@ -1,12 +0,0 @@ |
||||
// +build !appengine
|
||||
// +build gc
|
||||
// +build !purego
|
||||
|
||||
package xxhash |
||||
|
||||
// Sum64 computes the 64-bit xxHash digest of b.
|
||||
//
|
||||
//go:noescape
|
||||
func Sum64(b []byte) uint64 |
||||
|
||||
func writeBlocks(x *xxh, b []byte) []byte |
||||
@ -1,233 +0,0 @@ |
||||
// +build !appengine |
||||
// +build gc |
||||
// +build !purego |
||||
|
||||
#include "textflag.h" |
||||
|
||||
// Register allocation: |
||||
// AX h |
||||
// CX pointer to advance through b |
||||
// DX n |
||||
// BX loop end |
||||
// R8 v1, k1 |
||||
// R9 v2 |
||||
// R10 v3 |
||||
// R11 v4 |
||||
// R12 tmp |
||||
// R13 prime1v |
||||
// R14 prime2v |
||||
// R15 prime4v |
||||
|
||||
// round reads from and advances the buffer pointer in CX. |
||||
// It assumes that R13 has prime1v and R14 has prime2v. |
||||
#define round(r) \ |
||||
MOVQ (CX), R12 \ |
||||
ADDQ $8, CX \ |
||||
IMULQ R14, R12 \ |
||||
ADDQ R12, r \ |
||||
ROLQ $31, r \ |
||||
IMULQ R13, r |
||||
|
||||
// mergeRound applies a merge round on the two registers acc and val. |
||||
// It assumes that R13 has prime1v, R14 has prime2v, and R15 has prime4v. |
||||
#define mergeRound(acc, val) \ |
||||
IMULQ R14, val \ |
||||
ROLQ $31, val \ |
||||
IMULQ R13, val \ |
||||
XORQ val, acc \ |
||||
IMULQ R13, acc \ |
||||
ADDQ R15, acc |
||||
|
||||
// func Sum64(b []byte) uint64 |
||||
TEXT ·Sum64(SB), NOSPLIT, $0-32 |
||||
// Load fixed primes. |
||||
MOVQ ·prime1v(SB), R13 |
||||
MOVQ ·prime2v(SB), R14 |
||||
MOVQ ·prime4v(SB), R15 |
||||
|
||||
// Load slice. |
||||
MOVQ b_base+0(FP), CX |
||||
MOVQ b_len+8(FP), DX |
||||
LEAQ (CX)(DX*1), BX |
||||
|
||||
// The first loop limit will be len(b)-32. |
||||
SUBQ $32, BX |
||||
|
||||
// Check whether we have at least one block. |
||||
CMPQ DX, $32 |
||||
JLT noBlocks |
||||
|
||||
// Set up initial state (v1, v2, v3, v4). |
||||
MOVQ R13, R8 |
||||
ADDQ R14, R8 |
||||
MOVQ R14, R9 |
||||
XORQ R10, R10 |
||||
XORQ R11, R11 |
||||
SUBQ R13, R11 |
||||
|
||||
// Loop until CX > BX. |
||||
blockLoop: |
||||
round(R8) |
||||
round(R9) |
||||
round(R10) |
||||
round(R11) |
||||
|
||||
CMPQ CX, BX |
||||
JLE blockLoop |
||||
|
||||
MOVQ R8, AX |
||||
ROLQ $1, AX |
||||
MOVQ R9, R12 |
||||
ROLQ $7, R12 |
||||
ADDQ R12, AX |
||||
MOVQ R10, R12 |
||||
ROLQ $12, R12 |
||||
ADDQ R12, AX |
||||
MOVQ R11, R12 |
||||
ROLQ $18, R12 |
||||
ADDQ R12, AX |
||||
|
||||
mergeRound(AX, R8) |
||||
mergeRound(AX, R9) |
||||
mergeRound(AX, R10) |
||||
mergeRound(AX, R11) |
||||
|
||||
JMP afterBlocks |
||||
|
||||
noBlocks: |
||||
MOVQ ·prime5v(SB), AX |
||||
|
||||
afterBlocks: |
||||
ADDQ DX, AX |
||||
|
||||
// Right now BX has len(b)-32, and we want to loop until CX > len(b)-8. |
||||
ADDQ $24, BX |
||||
|
||||
CMPQ CX, BX |
||||
JG fourByte |
||||
|
||||
wordLoop: |
||||
// Calculate k1. |
||||
MOVQ (CX), R8 |
||||
ADDQ $8, CX |
||||
IMULQ R14, R8 |
||||
ROLQ $31, R8 |
||||
IMULQ R13, R8 |
||||
|
||||
XORQ R8, AX |
||||
ROLQ $27, AX |
||||
IMULQ R13, AX |
||||
ADDQ R15, AX |
||||
|
||||
CMPQ CX, BX |
||||
JLE wordLoop |
||||
|
||||
fourByte: |
||||
ADDQ $4, BX |
||||
CMPQ CX, BX |
||||
JG singles |
||||
|
||||
MOVL (CX), R8 |
||||
ADDQ $4, CX |
||||
IMULQ R13, R8 |
||||
XORQ R8, AX |
||||
|
||||
ROLQ $23, AX |
||||
IMULQ R14, AX |
||||
ADDQ ·prime3v(SB), AX |
||||
|
||||
singles: |
||||
ADDQ $4, BX |
||||
CMPQ CX, BX |
||||
JGE finalize |
||||
|
||||
singlesLoop: |
||||
MOVBQZX (CX), R12 |
||||
ADDQ $1, CX |
||||
IMULQ ·prime5v(SB), R12 |
||||
XORQ R12, AX |
||||
|
||||
ROLQ $11, AX |
||||
IMULQ R13, AX |
||||
|
||||
CMPQ CX, BX |
||||
JL singlesLoop |
||||
|
||||
finalize: |
||||
MOVQ AX, R12 |
||||
SHRQ $33, R12 |
||||
XORQ R12, AX |
||||
IMULQ R14, AX |
||||
MOVQ AX, R12 |
||||
SHRQ $29, R12 |
||||
XORQ R12, AX |
||||
IMULQ ·prime3v(SB), AX |
||||
MOVQ AX, R12 |
||||
SHRQ $32, R12 |
||||
XORQ R12, AX |
||||
|
||||
MOVQ AX, ret+24(FP) |
||||
RET |
||||
|
||||
// writeBlocks uses the same registers as above except that it uses AX to store |
||||
// the x pointer. |
||||
|
||||
// func writeBlocks(x *xxh, b []byte) []byte |
||||
TEXT ·writeBlocks(SB), NOSPLIT, $0-56 |
||||
// Load fixed primes needed for round. |
||||
MOVQ ·prime1v(SB), R13 |
||||
MOVQ ·prime2v(SB), R14 |
||||
|
||||
// Load slice. |
||||
MOVQ b_base+8(FP), CX |
||||
MOVQ CX, ret_base+32(FP) // initialize return base pointer; see NOTE below
|
||||
MOVQ b_len+16(FP), DX |
||||
LEAQ (CX)(DX*1), BX |
||||
SUBQ $32, BX |
||||
|
||||
// Load vN from x. |
||||
MOVQ x+0(FP), AX |
||||
MOVQ 0(AX), R8 // v1 |
||||
MOVQ 8(AX), R9 // v2 |
||||
MOVQ 16(AX), R10 // v3 |
||||
MOVQ 24(AX), R11 // v4 |
||||
|
||||
// We don't need to check the loop condition here; this function is
|
||||
// always called with at least one block of data to process. |
||||
blockLoop: |
||||
round(R8) |
||||
round(R9) |
||||
round(R10) |
||||
round(R11) |
||||
|
||||
CMPQ CX, BX |
||||
JLE blockLoop |
||||
|
||||
// Copy vN back to x. |
||||
MOVQ R8, 0(AX) |
||||
MOVQ R9, 8(AX) |
||||
MOVQ R10, 16(AX) |
||||
MOVQ R11, 24(AX) |
||||
|
||||
// Construct return slice. |
||||
// NOTE: It's important that we don't construct a slice that has a base |
||||
// pointer off the end of the original slice, as in Go 1.7+ this will |
||||
// cause runtime crashes. (See discussion in, for example, |
||||
// https://github.com/golang/go/issues/16772.) |
||||
// Therefore, we calculate the length/cap first, and if they're zero, we |
||||
// keep the old base. This is what the compiler does as well if you |
||||
// write code like |
||||
// b = b[len(b):] |
||||
|
||||
// New length is 32 - (CX - BX) -> BX+32 - CX. |
||||
ADDQ $32, BX |
||||
SUBQ CX, BX |
||||
JZ afterSetBase |
||||
|
||||
MOVQ CX, ret_base+32(FP) |
||||
|
||||
afterSetBase: |
||||
MOVQ BX, ret_len+40(FP) |
||||
MOVQ BX, ret_cap+48(FP) // set cap == len |
||||
|
||||
RET |
||||
@ -1,75 +0,0 @@ |
||||
// +build !amd64 appengine !gc purego
|
||||
|
||||
package xxhash |
||||
|
||||
// Sum64 computes the 64-bit xxHash digest of b.
|
||||
func Sum64(b []byte) uint64 { |
||||
// A simpler version would be
|
||||
// x := New()
|
||||
// x.Write(b)
|
||||
// return x.Sum64()
|
||||
// but this is faster, particularly for small inputs.
|
||||
|
||||
n := len(b) |
||||
var h uint64 |
||||
|
||||
if n >= 32 { |
||||
v1 := prime1v + prime2 |
||||
v2 := prime2 |
||||
v3 := uint64(0) |
||||
v4 := -prime1v |
||||
for len(b) >= 32 { |
||||
v1 = round(v1, u64(b[0:8:len(b)])) |
||||
v2 = round(v2, u64(b[8:16:len(b)])) |
||||
v3 = round(v3, u64(b[16:24:len(b)])) |
||||
v4 = round(v4, u64(b[24:32:len(b)])) |
||||
b = b[32:len(b):len(b)] |
||||
} |
||||
h = rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4) |
||||
h = mergeRound(h, v1) |
||||
h = mergeRound(h, v2) |
||||
h = mergeRound(h, v3) |
||||
h = mergeRound(h, v4) |
||||
} else { |
||||
h = prime5 |
||||
} |
||||
|
||||
h += uint64(n) |
||||
|
||||
i, end := 0, len(b) |
||||
for ; i+8 <= end; i += 8 { |
||||
k1 := round(0, u64(b[i:i+8:len(b)])) |
||||
h ^= k1 |
||||
h = rol27(h)*prime1 + prime4 |
||||
} |
||||
if i+4 <= end { |
||||
h ^= uint64(u32(b[i:i+4:len(b)])) * prime1 |
||||
h = rol23(h)*prime2 + prime3 |
||||
i += 4 |
||||
} |
||||
for ; i < end; i++ { |
||||
h ^= uint64(b[i]) * prime5 |
||||
h = rol11(h) * prime1 |
||||
} |
||||
|
||||
h ^= h >> 33 |
||||
h *= prime2 |
||||
h ^= h >> 29 |
||||
h *= prime3 |
||||
h ^= h >> 32 |
||||
|
||||
return h |
||||
} |
||||
|
||||
func writeBlocks(x *xxh, b []byte) []byte { |
||||
v1, v2, v3, v4 := x.v1, x.v2, x.v3, x.v4 |
||||
for len(b) >= 32 { |
||||
v1 = round(v1, u64(b[0:8:len(b)])) |
||||
v2 = round(v2, u64(b[8:16:len(b)])) |
||||
v3 = round(v3, u64(b[16:24:len(b)])) |
||||
v4 = round(v4, u64(b[24:32:len(b)])) |
||||
b = b[32:len(b):len(b)] |
||||
} |
||||
x.v1, x.v2, x.v3, x.v4 = v1, v2, v3, v4 |
||||
return b |
||||
} |
||||
@ -1,10 +0,0 @@ |
||||
// +build appengine
|
||||
|
||||
// This file contains the safe implementations of otherwise unsafe-using code.
|
||||
|
||||
package xxhash |
||||
|
||||
// Sum64String computes the 64-bit xxHash digest of s.
|
||||
func Sum64String(s string) uint64 { |
||||
return Sum64([]byte(s)) |
||||
} |
||||
@ -1,30 +0,0 @@ |
||||
// +build !appengine
|
||||
|
||||
// This file encapsulates usage of unsafe.
|
||||
// xxhash_safe.go contains the safe implementations.
|
||||
|
||||
package xxhash |
||||
|
||||
import ( |
||||
"reflect" |
||||
"unsafe" |
||||
) |
||||
|
||||
// Sum64String computes the 64-bit xxHash digest of s.
|
||||
// It may be faster than Sum64([]byte(s)) by avoiding a copy.
|
||||
//
|
||||
// TODO(caleb): Consider removing this if an optimization is ever added to make
|
||||
// it unnecessary: https://golang.org/issue/2205.
|
||||
//
|
||||
// TODO(caleb): We still have a function call; we could instead write Go/asm
|
||||
// copies of Sum64 for strings to squeeze out a bit more speed.
|
||||
func Sum64String(s string) uint64 { |
||||
// See https://groups.google.com/d/msg/golang-nuts/dcjzJy-bSpw/tcZYBzQqAQAJ
|
||||
// for some discussion about this unsafe conversion.
|
||||
var b []byte |
||||
bh := (*reflect.SliceHeader)(unsafe.Pointer(&b)) |
||||
bh.Data = (*reflect.StringHeader)(unsafe.Pointer(&s)).Data |
||||
bh.Len = len(s) |
||||
bh.Cap = len(s) |
||||
return Sum64(b) |
||||
} |
||||
Loading…
Reference in new issue