go.mod: github.com/containerd/ttrpc v1.2.0
Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
This commit is contained in:
		
							
								
								
									
										31
									
								
								vendor/github.com/cespare/xxhash/v2/README.md
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										31
									
								
								vendor/github.com/cespare/xxhash/v2/README.md
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -3,8 +3,7 @@ | ||||
| [](https://pkg.go.dev/github.com/cespare/xxhash/v2) | ||||
| [](https://github.com/cespare/xxhash/actions/workflows/test.yml) | ||||
|  | ||||
| xxhash is a Go implementation of the 64-bit | ||||
| [xxHash](http://cyan4973.github.io/xxHash/) algorithm, XXH64. This is a | ||||
| xxhash is a Go implementation of the 64-bit [xxHash] algorithm, XXH64. This is a | ||||
| high-quality hashing algorithm that is much faster than anything in the Go | ||||
| standard library. | ||||
|  | ||||
| @@ -25,8 +24,11 @@ func (*Digest) WriteString(string) (int, error) | ||||
| func (*Digest) Sum64() uint64 | ||||
| ``` | ||||
|  | ||||
| This implementation provides a fast pure-Go implementation and an even faster | ||||
| assembly implementation for amd64. | ||||
| The package is written with optimized pure Go and also contains even faster | ||||
| assembly implementations for amd64 and arm64. If desired, the `purego` build tag | ||||
| opts into using the Go code even on those architectures. | ||||
|  | ||||
| [xxHash]: http://cyan4973.github.io/xxHash/ | ||||
|  | ||||
| ## Compatibility | ||||
|  | ||||
| @@ -45,19 +47,20 @@ I recommend using the latest release of Go. | ||||
| Here are some quick benchmarks comparing the pure-Go and assembly | ||||
| implementations of Sum64. | ||||
|  | ||||
| | input size | purego | asm | | ||||
| | --- | --- | --- | | ||||
| | 5 B   |  979.66 MB/s |  1291.17 MB/s  | | ||||
| | 100 B | 7475.26 MB/s | 7973.40 MB/s  | | ||||
| | 4 KB  | 17573.46 MB/s | 17602.65 MB/s | | ||||
| | 10 MB | 17131.46 MB/s | 17142.16 MB/s | | ||||
| | input size | purego    | asm       | | ||||
| | ---------- | --------- | --------- | | ||||
| | 4 B        |  1.3 GB/s |  1.2 GB/s | | ||||
| | 16 B       |  2.9 GB/s |  3.5 GB/s | | ||||
| | 100 B      |  6.9 GB/s |  8.1 GB/s | | ||||
| | 4 KB       | 11.7 GB/s | 16.7 GB/s | | ||||
| | 10 MB      | 12.0 GB/s | 17.3 GB/s | | ||||
|  | ||||
| These numbers were generated on Ubuntu 18.04 with an Intel i7-8700K CPU using | ||||
| the following commands under Go 1.11.2: | ||||
| These numbers were generated on Ubuntu 20.04 with an Intel Xeon Platinum 8252C | ||||
| CPU using the following commands under Go 1.19.2: | ||||
|  | ||||
| ``` | ||||
| $ go test -tags purego -benchtime 10s -bench '/xxhash,direct,bytes' | ||||
| $ go test -benchtime 10s -bench '/xxhash,direct,bytes' | ||||
| benchstat <(go test -tags purego -benchtime 500ms -count 15 -bench 'Sum64$') | ||||
| benchstat <(go test -benchtime 500ms -count 15 -bench 'Sum64$') | ||||
| ``` | ||||
|  | ||||
| ## Projects using this package | ||||
|   | ||||
							
								
								
									
										10
									
								
								vendor/github.com/cespare/xxhash/v2/testall.sh
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								vendor/github.com/cespare/xxhash/v2/testall.sh
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,10 @@ | ||||
| #!/bin/bash | ||||
| set -eu -o pipefail | ||||
|  | ||||
| # Small convenience script for running the tests with various combinations of | ||||
| # arch/tags. This assumes we're running on amd64 and have qemu available. | ||||
|  | ||||
| go test ./... | ||||
| go test -tags purego ./... | ||||
| GOARCH=arm64 go test | ||||
| GOARCH=arm64 go test -tags purego | ||||
							
								
								
									
										47
									
								
								vendor/github.com/cespare/xxhash/v2/xxhash.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										47
									
								
								vendor/github.com/cespare/xxhash/v2/xxhash.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -16,19 +16,11 @@ const ( | ||||
| 	prime5 uint64 = 2870177450012600261 | ||||
| ) | ||||
|  | ||||
| // NOTE(caleb): I'm using both consts and vars of the primes. Using consts where | ||||
| // possible in the Go code is worth a small (but measurable) performance boost | ||||
| // by avoiding some MOVQs. Vars are needed for the asm and also are useful for | ||||
| // convenience in the Go code in a few places where we need to intentionally | ||||
| // avoid constant arithmetic (e.g., v1 := prime1 + prime2 fails because the | ||||
| // result overflows a uint64). | ||||
| var ( | ||||
| 	prime1v = prime1 | ||||
| 	prime2v = prime2 | ||||
| 	prime3v = prime3 | ||||
| 	prime4v = prime4 | ||||
| 	prime5v = prime5 | ||||
| ) | ||||
| // Store the primes in an array as well. | ||||
| // | ||||
| // The consts are used when possible in Go code to avoid MOVs but we need a | ||||
| // contiguous array of the assembly code. | ||||
| var primes = [...]uint64{prime1, prime2, prime3, prime4, prime5} | ||||
|  | ||||
| // Digest implements hash.Hash64. | ||||
| type Digest struct { | ||||
| @@ -50,10 +42,10 @@ func New() *Digest { | ||||
|  | ||||
| // Reset clears the Digest's state so that it can be reused. | ||||
| func (d *Digest) Reset() { | ||||
| 	d.v1 = prime1v + prime2 | ||||
| 	d.v1 = primes[0] + prime2 | ||||
| 	d.v2 = prime2 | ||||
| 	d.v3 = 0 | ||||
| 	d.v4 = -prime1v | ||||
| 	d.v4 = -primes[0] | ||||
| 	d.total = 0 | ||||
| 	d.n = 0 | ||||
| } | ||||
| @@ -69,21 +61,23 @@ func (d *Digest) Write(b []byte) (n int, err error) { | ||||
| 	n = len(b) | ||||
| 	d.total += uint64(n) | ||||
|  | ||||
| 	memleft := d.mem[d.n&(len(d.mem)-1):] | ||||
|  | ||||
| 	if d.n+n < 32 { | ||||
| 		// This new data doesn't even fill the current block. | ||||
| 		copy(d.mem[d.n:], b) | ||||
| 		copy(memleft, b) | ||||
| 		d.n += n | ||||
| 		return | ||||
| 	} | ||||
|  | ||||
| 	if d.n > 0 { | ||||
| 		// Finish off the partial block. | ||||
| 		copy(d.mem[d.n:], b) | ||||
| 		c := copy(memleft, b) | ||||
| 		d.v1 = round(d.v1, u64(d.mem[0:8])) | ||||
| 		d.v2 = round(d.v2, u64(d.mem[8:16])) | ||||
| 		d.v3 = round(d.v3, u64(d.mem[16:24])) | ||||
| 		d.v4 = round(d.v4, u64(d.mem[24:32])) | ||||
| 		b = b[32-d.n:] | ||||
| 		b = b[c:] | ||||
| 		d.n = 0 | ||||
| 	} | ||||
|  | ||||
| @@ -133,21 +127,20 @@ func (d *Digest) Sum64() uint64 { | ||||
|  | ||||
| 	h += d.total | ||||
|  | ||||
| 	i, end := 0, d.n | ||||
| 	for ; i+8 <= end; i += 8 { | ||||
| 		k1 := round(0, u64(d.mem[i:i+8])) | ||||
| 	b := d.mem[:d.n&(len(d.mem)-1)] | ||||
| 	for ; len(b) >= 8; b = b[8:] { | ||||
| 		k1 := round(0, u64(b[:8])) | ||||
| 		h ^= k1 | ||||
| 		h = rol27(h)*prime1 + prime4 | ||||
| 	} | ||||
| 	if i+4 <= end { | ||||
| 		h ^= uint64(u32(d.mem[i:i+4])) * prime1 | ||||
| 	if len(b) >= 4 { | ||||
| 		h ^= uint64(u32(b[:4])) * prime1 | ||||
| 		h = rol23(h)*prime2 + prime3 | ||||
| 		i += 4 | ||||
| 		b = b[4:] | ||||
| 	} | ||||
| 	for i < end { | ||||
| 		h ^= uint64(d.mem[i]) * prime5 | ||||
| 	for ; len(b) > 0; b = b[1:] { | ||||
| 		h ^= uint64(b[0]) * prime5 | ||||
| 		h = rol11(h) * prime1 | ||||
| 		i++ | ||||
| 	} | ||||
|  | ||||
| 	h ^= h >> 33 | ||||
|   | ||||
							
								
								
									
										308
									
								
								vendor/github.com/cespare/xxhash/v2/xxhash_amd64.s
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										308
									
								
								vendor/github.com/cespare/xxhash/v2/xxhash_amd64.s
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -1,215 +1,209 @@ | ||||
| //go:build !appengine && gc && !purego | ||||
| // +build !appengine | ||||
| // +build gc | ||||
| // +build !purego | ||||
|  | ||||
| #include "textflag.h" | ||||
|  | ||||
| // Register allocation: | ||||
| // AX	h | ||||
| // SI	pointer to advance through b | ||||
| // DX	n | ||||
| // BX	loop end | ||||
| // R8	v1, k1 | ||||
| // R9	v2 | ||||
| // R10	v3 | ||||
| // R11	v4 | ||||
| // R12	tmp | ||||
| // R13	prime1v | ||||
| // R14	prime2v | ||||
| // DI	prime4v | ||||
| // Registers: | ||||
| #define h      AX | ||||
| #define d      AX | ||||
| #define p      SI // pointer to advance through b | ||||
| #define n      DX | ||||
| #define end    BX // loop end | ||||
| #define v1     R8 | ||||
| #define v2     R9 | ||||
| #define v3     R10 | ||||
| #define v4     R11 | ||||
| #define x      R12 | ||||
| #define prime1 R13 | ||||
| #define prime2 R14 | ||||
| #define prime4 DI | ||||
|  | ||||
| // round reads from and advances the buffer pointer in SI. | ||||
| // It assumes that R13 has prime1v and R14 has prime2v. | ||||
| #define round(r) \ | ||||
| 	MOVQ  (SI), R12 \ | ||||
| 	ADDQ  $8, SI    \ | ||||
| 	IMULQ R14, R12  \ | ||||
| 	ADDQ  R12, r    \ | ||||
| 	ROLQ  $31, r    \ | ||||
| 	IMULQ R13, r | ||||
| #define round(acc, x) \ | ||||
| 	IMULQ prime2, x   \ | ||||
| 	ADDQ  x, acc      \ | ||||
| 	ROLQ  $31, acc    \ | ||||
| 	IMULQ prime1, acc | ||||
|  | ||||
| // mergeRound applies a merge round on the two registers acc and val. | ||||
| // It assumes that R13 has prime1v, R14 has prime2v, and DI has prime4v. | ||||
| #define mergeRound(acc, val) \ | ||||
| 	IMULQ R14, val \ | ||||
| 	ROLQ  $31, val \ | ||||
| 	IMULQ R13, val \ | ||||
| 	XORQ  val, acc \ | ||||
| 	IMULQ R13, acc \ | ||||
| 	ADDQ  DI, acc | ||||
| // round0 performs the operation x = round(0, x). | ||||
| #define round0(x) \ | ||||
| 	IMULQ prime2, x \ | ||||
| 	ROLQ  $31, x    \ | ||||
| 	IMULQ prime1, x | ||||
|  | ||||
| // mergeRound applies a merge round on the two registers acc and x. | ||||
| // It assumes that prime1, prime2, and prime4 have been loaded. | ||||
| #define mergeRound(acc, x) \ | ||||
| 	round0(x)         \ | ||||
| 	XORQ  x, acc      \ | ||||
| 	IMULQ prime1, acc \ | ||||
| 	ADDQ  prime4, acc | ||||
|  | ||||
| // blockLoop processes as many 32-byte blocks as possible, | ||||
| // updating v1, v2, v3, and v4. It assumes that there is at least one block | ||||
| // to process. | ||||
| #define blockLoop() \ | ||||
| loop:  \ | ||||
| 	MOVQ +0(p), x  \ | ||||
| 	round(v1, x)   \ | ||||
| 	MOVQ +8(p), x  \ | ||||
| 	round(v2, x)   \ | ||||
| 	MOVQ +16(p), x \ | ||||
| 	round(v3, x)   \ | ||||
| 	MOVQ +24(p), x \ | ||||
| 	round(v4, x)   \ | ||||
| 	ADDQ $32, p    \ | ||||
| 	CMPQ p, end    \ | ||||
| 	JLE  loop | ||||
|  | ||||
| // func Sum64(b []byte) uint64 | ||||
| TEXT ·Sum64(SB), NOSPLIT, $0-32 | ||||
| TEXT ·Sum64(SB), NOSPLIT|NOFRAME, $0-32 | ||||
| 	// Load fixed primes. | ||||
| 	MOVQ ·prime1v(SB), R13 | ||||
| 	MOVQ ·prime2v(SB), R14 | ||||
| 	MOVQ ·prime4v(SB), DI | ||||
| 	MOVQ ·primes+0(SB), prime1 | ||||
| 	MOVQ ·primes+8(SB), prime2 | ||||
| 	MOVQ ·primes+24(SB), prime4 | ||||
|  | ||||
| 	// Load slice. | ||||
| 	MOVQ b_base+0(FP), SI | ||||
| 	MOVQ b_len+8(FP), DX | ||||
| 	LEAQ (SI)(DX*1), BX | ||||
| 	MOVQ b_base+0(FP), p | ||||
| 	MOVQ b_len+8(FP), n | ||||
| 	LEAQ (p)(n*1), end | ||||
|  | ||||
| 	// The first loop limit will be len(b)-32. | ||||
| 	SUBQ $32, BX | ||||
| 	SUBQ $32, end | ||||
|  | ||||
| 	// Check whether we have at least one block. | ||||
| 	CMPQ DX, $32 | ||||
| 	CMPQ n, $32 | ||||
| 	JLT  noBlocks | ||||
|  | ||||
| 	// Set up initial state (v1, v2, v3, v4). | ||||
| 	MOVQ R13, R8 | ||||
| 	ADDQ R14, R8 | ||||
| 	MOVQ R14, R9 | ||||
| 	XORQ R10, R10 | ||||
| 	XORQ R11, R11 | ||||
| 	SUBQ R13, R11 | ||||
| 	MOVQ prime1, v1 | ||||
| 	ADDQ prime2, v1 | ||||
| 	MOVQ prime2, v2 | ||||
| 	XORQ v3, v3 | ||||
| 	XORQ v4, v4 | ||||
| 	SUBQ prime1, v4 | ||||
|  | ||||
| 	// Loop until SI > BX. | ||||
| blockLoop: | ||||
| 	round(R8) | ||||
| 	round(R9) | ||||
| 	round(R10) | ||||
| 	round(R11) | ||||
| 	blockLoop() | ||||
|  | ||||
| 	CMPQ SI, BX | ||||
| 	JLE  blockLoop | ||||
| 	MOVQ v1, h | ||||
| 	ROLQ $1, h | ||||
| 	MOVQ v2, x | ||||
| 	ROLQ $7, x | ||||
| 	ADDQ x, h | ||||
| 	MOVQ v3, x | ||||
| 	ROLQ $12, x | ||||
| 	ADDQ x, h | ||||
| 	MOVQ v4, x | ||||
| 	ROLQ $18, x | ||||
| 	ADDQ x, h | ||||
|  | ||||
| 	MOVQ R8, AX | ||||
| 	ROLQ $1, AX | ||||
| 	MOVQ R9, R12 | ||||
| 	ROLQ $7, R12 | ||||
| 	ADDQ R12, AX | ||||
| 	MOVQ R10, R12 | ||||
| 	ROLQ $12, R12 | ||||
| 	ADDQ R12, AX | ||||
| 	MOVQ R11, R12 | ||||
| 	ROLQ $18, R12 | ||||
| 	ADDQ R12, AX | ||||
|  | ||||
| 	mergeRound(AX, R8) | ||||
| 	mergeRound(AX, R9) | ||||
| 	mergeRound(AX, R10) | ||||
| 	mergeRound(AX, R11) | ||||
| 	mergeRound(h, v1) | ||||
| 	mergeRound(h, v2) | ||||
| 	mergeRound(h, v3) | ||||
| 	mergeRound(h, v4) | ||||
|  | ||||
| 	JMP afterBlocks | ||||
|  | ||||
| noBlocks: | ||||
| 	MOVQ ·prime5v(SB), AX | ||||
| 	MOVQ ·primes+32(SB), h | ||||
|  | ||||
| afterBlocks: | ||||
| 	ADDQ DX, AX | ||||
| 	ADDQ n, h | ||||
|  | ||||
| 	// Right now BX has len(b)-32, and we want to loop until SI > len(b)-8. | ||||
| 	ADDQ $24, BX | ||||
| 	ADDQ $24, end | ||||
| 	CMPQ p, end | ||||
| 	JG   try4 | ||||
|  | ||||
| 	CMPQ SI, BX | ||||
| 	JG   fourByte | ||||
| loop8: | ||||
| 	MOVQ  (p), x | ||||
| 	ADDQ  $8, p | ||||
| 	round0(x) | ||||
| 	XORQ  x, h | ||||
| 	ROLQ  $27, h | ||||
| 	IMULQ prime1, h | ||||
| 	ADDQ  prime4, h | ||||
|  | ||||
| wordLoop: | ||||
| 	// Calculate k1. | ||||
| 	MOVQ  (SI), R8 | ||||
| 	ADDQ  $8, SI | ||||
| 	IMULQ R14, R8 | ||||
| 	ROLQ  $31, R8 | ||||
| 	IMULQ R13, R8 | ||||
| 	CMPQ p, end | ||||
| 	JLE  loop8 | ||||
|  | ||||
| 	XORQ  R8, AX | ||||
| 	ROLQ  $27, AX | ||||
| 	IMULQ R13, AX | ||||
| 	ADDQ  DI, AX | ||||
| try4: | ||||
| 	ADDQ $4, end | ||||
| 	CMPQ p, end | ||||
| 	JG   try1 | ||||
|  | ||||
| 	CMPQ SI, BX | ||||
| 	JLE  wordLoop | ||||
| 	MOVL  (p), x | ||||
| 	ADDQ  $4, p | ||||
| 	IMULQ prime1, x | ||||
| 	XORQ  x, h | ||||
|  | ||||
| fourByte: | ||||
| 	ADDQ $4, BX | ||||
| 	CMPQ SI, BX | ||||
| 	JG   singles | ||||
| 	ROLQ  $23, h | ||||
| 	IMULQ prime2, h | ||||
| 	ADDQ  ·primes+16(SB), h | ||||
|  | ||||
| 	MOVL  (SI), R8 | ||||
| 	ADDQ  $4, SI | ||||
| 	IMULQ R13, R8 | ||||
| 	XORQ  R8, AX | ||||
|  | ||||
| 	ROLQ  $23, AX | ||||
| 	IMULQ R14, AX | ||||
| 	ADDQ  ·prime3v(SB), AX | ||||
|  | ||||
| singles: | ||||
| 	ADDQ $4, BX | ||||
| 	CMPQ SI, BX | ||||
| try1: | ||||
| 	ADDQ $4, end | ||||
| 	CMPQ p, end | ||||
| 	JGE  finalize | ||||
|  | ||||
| singlesLoop: | ||||
| 	MOVBQZX (SI), R12 | ||||
| 	ADDQ    $1, SI | ||||
| 	IMULQ   ·prime5v(SB), R12 | ||||
| 	XORQ    R12, AX | ||||
| loop1: | ||||
| 	MOVBQZX (p), x | ||||
| 	ADDQ    $1, p | ||||
| 	IMULQ   ·primes+32(SB), x | ||||
| 	XORQ    x, h | ||||
| 	ROLQ    $11, h | ||||
| 	IMULQ   prime1, h | ||||
|  | ||||
| 	ROLQ  $11, AX | ||||
| 	IMULQ R13, AX | ||||
|  | ||||
| 	CMPQ SI, BX | ||||
| 	JL   singlesLoop | ||||
| 	CMPQ p, end | ||||
| 	JL   loop1 | ||||
|  | ||||
| finalize: | ||||
| 	MOVQ  AX, R12 | ||||
| 	SHRQ  $33, R12 | ||||
| 	XORQ  R12, AX | ||||
| 	IMULQ R14, AX | ||||
| 	MOVQ  AX, R12 | ||||
| 	SHRQ  $29, R12 | ||||
| 	XORQ  R12, AX | ||||
| 	IMULQ ·prime3v(SB), AX | ||||
| 	MOVQ  AX, R12 | ||||
| 	SHRQ  $32, R12 | ||||
| 	XORQ  R12, AX | ||||
| 	MOVQ  h, x | ||||
| 	SHRQ  $33, x | ||||
| 	XORQ  x, h | ||||
| 	IMULQ prime2, h | ||||
| 	MOVQ  h, x | ||||
| 	SHRQ  $29, x | ||||
| 	XORQ  x, h | ||||
| 	IMULQ ·primes+16(SB), h | ||||
| 	MOVQ  h, x | ||||
| 	SHRQ  $32, x | ||||
| 	XORQ  x, h | ||||
|  | ||||
| 	MOVQ AX, ret+24(FP) | ||||
| 	MOVQ h, ret+24(FP) | ||||
| 	RET | ||||
|  | ||||
| // writeBlocks uses the same registers as above except that it uses AX to store | ||||
| // the d pointer. | ||||
|  | ||||
| // func writeBlocks(d *Digest, b []byte) int | ||||
| TEXT ·writeBlocks(SB), NOSPLIT, $0-40 | ||||
| TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40 | ||||
| 	// Load fixed primes needed for round. | ||||
| 	MOVQ ·prime1v(SB), R13 | ||||
| 	MOVQ ·prime2v(SB), R14 | ||||
| 	MOVQ ·primes+0(SB), prime1 | ||||
| 	MOVQ ·primes+8(SB), prime2 | ||||
|  | ||||
| 	// Load slice. | ||||
| 	MOVQ b_base+8(FP), SI | ||||
| 	MOVQ b_len+16(FP), DX | ||||
| 	LEAQ (SI)(DX*1), BX | ||||
| 	SUBQ $32, BX | ||||
| 	MOVQ b_base+8(FP), p | ||||
| 	MOVQ b_len+16(FP), n | ||||
| 	LEAQ (p)(n*1), end | ||||
| 	SUBQ $32, end | ||||
|  | ||||
| 	// Load vN from d. | ||||
| 	MOVQ d+0(FP), AX | ||||
| 	MOVQ 0(AX), R8   // v1 | ||||
| 	MOVQ 8(AX), R9   // v2 | ||||
| 	MOVQ 16(AX), R10 // v3 | ||||
| 	MOVQ 24(AX), R11 // v4 | ||||
| 	MOVQ s+0(FP), d | ||||
| 	MOVQ 0(d), v1 | ||||
| 	MOVQ 8(d), v2 | ||||
| 	MOVQ 16(d), v3 | ||||
| 	MOVQ 24(d), v4 | ||||
|  | ||||
| 	// We don't need to check the loop condition here; this function is | ||||
| 	// always called with at least one block of data to process. | ||||
| blockLoop: | ||||
| 	round(R8) | ||||
| 	round(R9) | ||||
| 	round(R10) | ||||
| 	round(R11) | ||||
|  | ||||
| 	CMPQ SI, BX | ||||
| 	JLE  blockLoop | ||||
| 	blockLoop() | ||||
|  | ||||
| 	// Copy vN back to d. | ||||
| 	MOVQ R8, 0(AX) | ||||
| 	MOVQ R9, 8(AX) | ||||
| 	MOVQ R10, 16(AX) | ||||
| 	MOVQ R11, 24(AX) | ||||
| 	MOVQ v1, 0(d) | ||||
| 	MOVQ v2, 8(d) | ||||
| 	MOVQ v3, 16(d) | ||||
| 	MOVQ v4, 24(d) | ||||
|  | ||||
| 	// The number of bytes written is SI minus the old base pointer. | ||||
| 	SUBQ b_base+8(FP), SI | ||||
| 	MOVQ SI, ret+32(FP) | ||||
| 	// The number of bytes written is p minus the old base pointer. | ||||
| 	SUBQ b_base+8(FP), p | ||||
| 	MOVQ p, ret+32(FP) | ||||
|  | ||||
| 	RET | ||||
|   | ||||
							
								
								
									
										183
									
								
								vendor/github.com/cespare/xxhash/v2/xxhash_arm64.s
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										183
									
								
								vendor/github.com/cespare/xxhash/v2/xxhash_arm64.s
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,183 @@ | ||||
| //go:build !appengine && gc && !purego | ||||
| // +build !appengine | ||||
| // +build gc | ||||
| // +build !purego | ||||
|  | ||||
| #include "textflag.h" | ||||
|  | ||||
| // Registers: | ||||
| #define digest	R1 | ||||
| #define h	R2 // return value | ||||
| #define p	R3 // input pointer | ||||
| #define n	R4 // input length | ||||
| #define nblocks	R5 // n / 32 | ||||
| #define prime1	R7 | ||||
| #define prime2	R8 | ||||
| #define prime3	R9 | ||||
| #define prime4	R10 | ||||
| #define prime5	R11 | ||||
| #define v1	R12 | ||||
| #define v2	R13 | ||||
| #define v3	R14 | ||||
| #define v4	R15 | ||||
| #define x1	R20 | ||||
| #define x2	R21 | ||||
| #define x3	R22 | ||||
| #define x4	R23 | ||||
|  | ||||
| #define round(acc, x) \ | ||||
| 	MADD prime2, acc, x, acc \ | ||||
| 	ROR  $64-31, acc         \ | ||||
| 	MUL  prime1, acc | ||||
|  | ||||
| // round0 performs the operation x = round(0, x). | ||||
| #define round0(x) \ | ||||
| 	MUL prime2, x \ | ||||
| 	ROR $64-31, x \ | ||||
| 	MUL prime1, x | ||||
|  | ||||
| #define mergeRound(acc, x) \ | ||||
| 	round0(x)                     \ | ||||
| 	EOR  x, acc                   \ | ||||
| 	MADD acc, prime4, prime1, acc | ||||
|  | ||||
| // blockLoop processes as many 32-byte blocks as possible, | ||||
| // updating v1, v2, v3, and v4. It assumes that n >= 32. | ||||
| #define blockLoop() \ | ||||
| 	LSR     $5, n, nblocks  \ | ||||
| 	PCALIGN $16             \ | ||||
| 	loop:                   \ | ||||
| 	LDP.P   16(p), (x1, x2) \ | ||||
| 	LDP.P   16(p), (x3, x4) \ | ||||
| 	round(v1, x1)           \ | ||||
| 	round(v2, x2)           \ | ||||
| 	round(v3, x3)           \ | ||||
| 	round(v4, x4)           \ | ||||
| 	SUB     $1, nblocks     \ | ||||
| 	CBNZ    nblocks, loop | ||||
|  | ||||
| // func Sum64(b []byte) uint64 | ||||
| TEXT ·Sum64(SB), NOSPLIT|NOFRAME, $0-32 | ||||
| 	LDP b_base+0(FP), (p, n) | ||||
|  | ||||
| 	LDP  ·primes+0(SB), (prime1, prime2) | ||||
| 	LDP  ·primes+16(SB), (prime3, prime4) | ||||
| 	MOVD ·primes+32(SB), prime5 | ||||
|  | ||||
| 	CMP  $32, n | ||||
| 	CSEL LT, prime5, ZR, h // if n < 32 { h = prime5 } else { h = 0 } | ||||
| 	BLT  afterLoop | ||||
|  | ||||
| 	ADD  prime1, prime2, v1 | ||||
| 	MOVD prime2, v2 | ||||
| 	MOVD $0, v3 | ||||
| 	NEG  prime1, v4 | ||||
|  | ||||
| 	blockLoop() | ||||
|  | ||||
| 	ROR $64-1, v1, x1 | ||||
| 	ROR $64-7, v2, x2 | ||||
| 	ADD x1, x2 | ||||
| 	ROR $64-12, v3, x3 | ||||
| 	ROR $64-18, v4, x4 | ||||
| 	ADD x3, x4 | ||||
| 	ADD x2, x4, h | ||||
|  | ||||
| 	mergeRound(h, v1) | ||||
| 	mergeRound(h, v2) | ||||
| 	mergeRound(h, v3) | ||||
| 	mergeRound(h, v4) | ||||
|  | ||||
| afterLoop: | ||||
| 	ADD n, h | ||||
|  | ||||
| 	TBZ   $4, n, try8 | ||||
| 	LDP.P 16(p), (x1, x2) | ||||
|  | ||||
| 	round0(x1) | ||||
|  | ||||
| 	// NOTE: here and below, sequencing the EOR after the ROR (using a | ||||
| 	// rotated register) is worth a small but measurable speedup for small | ||||
| 	// inputs. | ||||
| 	ROR  $64-27, h | ||||
| 	EOR  x1 @> 64-27, h, h | ||||
| 	MADD h, prime4, prime1, h | ||||
|  | ||||
| 	round0(x2) | ||||
| 	ROR  $64-27, h | ||||
| 	EOR  x2 @> 64-27, h, h | ||||
| 	MADD h, prime4, prime1, h | ||||
|  | ||||
| try8: | ||||
| 	TBZ    $3, n, try4 | ||||
| 	MOVD.P 8(p), x1 | ||||
|  | ||||
| 	round0(x1) | ||||
| 	ROR  $64-27, h | ||||
| 	EOR  x1 @> 64-27, h, h | ||||
| 	MADD h, prime4, prime1, h | ||||
|  | ||||
| try4: | ||||
| 	TBZ     $2, n, try2 | ||||
| 	MOVWU.P 4(p), x2 | ||||
|  | ||||
| 	MUL  prime1, x2 | ||||
| 	ROR  $64-23, h | ||||
| 	EOR  x2 @> 64-23, h, h | ||||
| 	MADD h, prime3, prime2, h | ||||
|  | ||||
| try2: | ||||
| 	TBZ     $1, n, try1 | ||||
| 	MOVHU.P 2(p), x3 | ||||
| 	AND     $255, x3, x1 | ||||
| 	LSR     $8, x3, x2 | ||||
|  | ||||
| 	MUL prime5, x1 | ||||
| 	ROR $64-11, h | ||||
| 	EOR x1 @> 64-11, h, h | ||||
| 	MUL prime1, h | ||||
|  | ||||
| 	MUL prime5, x2 | ||||
| 	ROR $64-11, h | ||||
| 	EOR x2 @> 64-11, h, h | ||||
| 	MUL prime1, h | ||||
|  | ||||
| try1: | ||||
| 	TBZ   $0, n, finalize | ||||
| 	MOVBU (p), x4 | ||||
|  | ||||
| 	MUL prime5, x4 | ||||
| 	ROR $64-11, h | ||||
| 	EOR x4 @> 64-11, h, h | ||||
| 	MUL prime1, h | ||||
|  | ||||
| finalize: | ||||
| 	EOR h >> 33, h | ||||
| 	MUL prime2, h | ||||
| 	EOR h >> 29, h | ||||
| 	MUL prime3, h | ||||
| 	EOR h >> 32, h | ||||
|  | ||||
| 	MOVD h, ret+24(FP) | ||||
| 	RET | ||||
|  | ||||
| // func writeBlocks(d *Digest, b []byte) int | ||||
| TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40 | ||||
| 	LDP ·primes+0(SB), (prime1, prime2) | ||||
|  | ||||
| 	// Load state. Assume v[1-4] are stored contiguously. | ||||
| 	MOVD d+0(FP), digest | ||||
| 	LDP  0(digest), (v1, v2) | ||||
| 	LDP  16(digest), (v3, v4) | ||||
|  | ||||
| 	LDP b_base+8(FP), (p, n) | ||||
|  | ||||
| 	blockLoop() | ||||
|  | ||||
| 	// Store updated state. | ||||
| 	STP (v1, v2), 0(digest) | ||||
| 	STP (v3, v4), 16(digest) | ||||
|  | ||||
| 	BIC  $31, n | ||||
| 	MOVD n, ret+32(FP) | ||||
| 	RET | ||||
| @@ -1,3 +1,5 @@ | ||||
| //go:build (amd64 || arm64) && !appengine && gc && !purego | ||||
| // +build amd64 arm64 | ||||
| // +build !appengine | ||||
| // +build gc | ||||
| // +build !purego | ||||
							
								
								
									
										22
									
								
								vendor/github.com/cespare/xxhash/v2/xxhash_other.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										22
									
								
								vendor/github.com/cespare/xxhash/v2/xxhash_other.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -1,4 +1,5 @@ | ||||
| // +build !amd64 appengine !gc purego | ||||
| //go:build (!amd64 && !arm64) || appengine || !gc || purego | ||||
| // +build !amd64,!arm64 appengine !gc purego | ||||
|  | ||||
| package xxhash | ||||
|  | ||||
| @@ -14,10 +15,10 @@ func Sum64(b []byte) uint64 { | ||||
| 	var h uint64 | ||||
|  | ||||
| 	if n >= 32 { | ||||
| 		v1 := prime1v + prime2 | ||||
| 		v1 := primes[0] + prime2 | ||||
| 		v2 := prime2 | ||||
| 		v3 := uint64(0) | ||||
| 		v4 := -prime1v | ||||
| 		v4 := -primes[0] | ||||
| 		for len(b) >= 32 { | ||||
| 			v1 = round(v1, u64(b[0:8:len(b)])) | ||||
| 			v2 = round(v2, u64(b[8:16:len(b)])) | ||||
| @@ -36,19 +37,18 @@ func Sum64(b []byte) uint64 { | ||||
|  | ||||
| 	h += uint64(n) | ||||
|  | ||||
| 	i, end := 0, len(b) | ||||
| 	for ; i+8 <= end; i += 8 { | ||||
| 		k1 := round(0, u64(b[i:i+8:len(b)])) | ||||
| 	for ; len(b) >= 8; b = b[8:] { | ||||
| 		k1 := round(0, u64(b[:8])) | ||||
| 		h ^= k1 | ||||
| 		h = rol27(h)*prime1 + prime4 | ||||
| 	} | ||||
| 	if i+4 <= end { | ||||
| 		h ^= uint64(u32(b[i:i+4:len(b)])) * prime1 | ||||
| 	if len(b) >= 4 { | ||||
| 		h ^= uint64(u32(b[:4])) * prime1 | ||||
| 		h = rol23(h)*prime2 + prime3 | ||||
| 		i += 4 | ||||
| 		b = b[4:] | ||||
| 	} | ||||
| 	for ; i < end; i++ { | ||||
| 		h ^= uint64(b[i]) * prime5 | ||||
| 	for ; len(b) > 0; b = b[1:] { | ||||
| 		h ^= uint64(b[0]) * prime5 | ||||
| 		h = rol11(h) * prime1 | ||||
| 	} | ||||
|  | ||||
|   | ||||
							
								
								
									
										1
									
								
								vendor/github.com/cespare/xxhash/v2/xxhash_safe.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								vendor/github.com/cespare/xxhash/v2/xxhash_safe.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -1,3 +1,4 @@ | ||||
| //go:build appengine | ||||
| // +build appengine | ||||
|  | ||||
| // This file contains the safe implementations of otherwise unsafe-using code. | ||||
|   | ||||
							
								
								
									
										3
									
								
								vendor/github.com/cespare/xxhash/v2/xxhash_unsafe.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								vendor/github.com/cespare/xxhash/v2/xxhash_unsafe.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -1,3 +1,4 @@ | ||||
| //go:build !appengine | ||||
| // +build !appengine | ||||
|  | ||||
| // This file encapsulates usage of unsafe. | ||||
| @@ -11,7 +12,7 @@ import ( | ||||
|  | ||||
| // In the future it's possible that compiler optimizations will make these | ||||
| // XxxString functions unnecessary by realizing that calls such as | ||||
| // Sum64([]byte(s)) don't need to copy s. See https://golang.org/issue/2205. | ||||
| // Sum64([]byte(s)) don't need to copy s. See https://go.dev/issue/2205. | ||||
| // If that happens, even if we keep these functions they can be replaced with | ||||
| // the trivial safe code. | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Akihiro Suda
					Akihiro Suda