go.mod: github.com/klauspost/compress v1.16.0
Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
This commit is contained in:
		
							
								
								
									
										2
									
								
								go.mod
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								go.mod
									
									
									
									
									
								
							| @@ -37,7 +37,7 @@ require ( | ||||
| 	github.com/hashicorp/go-multierror v1.1.1 | ||||
| 	github.com/imdario/mergo v0.3.13 | ||||
| 	github.com/intel/goresctrl v0.3.0 | ||||
| 	github.com/klauspost/compress v1.15.11 | ||||
| 	github.com/klauspost/compress v1.16.0 | ||||
| 	github.com/minio/sha256-simd v1.0.0 | ||||
| 	github.com/moby/locker v1.0.1 | ||||
| 	github.com/moby/sys/mountinfo v0.6.2 | ||||
|   | ||||
							
								
								
									
										4
									
								
								go.sum
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								go.sum
									
									
									
									
									
								
							| @@ -637,8 +637,8 @@ github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI | ||||
| github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= | ||||
| github.com/klauspost/compress v1.11.3/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= | ||||
| github.com/klauspost/compress v1.11.13/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= | ||||
| github.com/klauspost/compress v1.15.11 h1:Lcadnb3RKGin4FYM/orgq0qde+nc15E5Cbqg4B9Sx9c= | ||||
| github.com/klauspost/compress v1.15.11/go.mod h1:QPwzmACJjUTFsnSHH934V6woptycfrDDJnH7hvFVbGM= | ||||
| github.com/klauspost/compress v1.16.0 h1:iULayQNOReoYUe+1qtKOqw9CwJv3aNQu8ivo7lw1HU4= | ||||
| github.com/klauspost/compress v1.16.0/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= | ||||
| github.com/klauspost/cpuid/v2 v2.0.4 h1:g0I61F2K2DjRHz1cnxlkNSBIaePVoJIjjnHui8QHbiw= | ||||
| github.com/klauspost/cpuid/v2 v2.0.4/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= | ||||
| github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= | ||||
|   | ||||
| @@ -43,7 +43,7 @@ require ( | ||||
| 	github.com/google/go-cmp v0.5.9 // indirect | ||||
| 	github.com/google/uuid v1.3.0 // indirect | ||||
| 	github.com/imdario/mergo v0.3.13 // indirect | ||||
| 	github.com/klauspost/compress v1.15.11 // indirect | ||||
| 	github.com/klauspost/compress v1.16.0 // indirect | ||||
| 	github.com/moby/locker v1.0.1 // indirect | ||||
| 	github.com/moby/sys/mountinfo v0.6.2 // indirect | ||||
| 	github.com/moby/sys/sequential v0.5.0 // indirect | ||||
|   | ||||
| @@ -1037,8 +1037,9 @@ github.com/klauspost/compress v1.11.13/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdY | ||||
| github.com/klauspost/compress v1.12.3/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg= | ||||
| github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= | ||||
| github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU= | ||||
| github.com/klauspost/compress v1.15.11 h1:Lcadnb3RKGin4FYM/orgq0qde+nc15E5Cbqg4B9Sx9c= | ||||
| github.com/klauspost/compress v1.15.11/go.mod h1:QPwzmACJjUTFsnSHH934V6woptycfrDDJnH7hvFVbGM= | ||||
| github.com/klauspost/compress v1.16.0 h1:iULayQNOReoYUe+1qtKOqw9CwJv3aNQu8ivo7lw1HU4= | ||||
| github.com/klauspost/compress v1.16.0/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= | ||||
| github.com/klauspost/cpuid/v2 v2.0.4/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= | ||||
| github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= | ||||
| github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= | ||||
|   | ||||
							
								
								
									
										2
									
								
								vendor/github.com/klauspost/compress/.goreleaser.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								vendor/github.com/klauspost/compress/.goreleaser.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -3,7 +3,7 @@ | ||||
| before: | ||||
|   hooks: | ||||
|     - ./gen.sh | ||||
|     - go install mvdan.cc/garble@latest | ||||
|     - go install mvdan.cc/garble@v0.9.3 | ||||
|  | ||||
| builds: | ||||
|   - | ||||
|   | ||||
							
								
								
									
										30
									
								
								vendor/github.com/klauspost/compress/README.md
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										30
									
								
								vendor/github.com/klauspost/compress/README.md
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -9,7 +9,6 @@ This package provides various compression algorithms. | ||||
| * [huff0](https://github.com/klauspost/compress/tree/master/huff0) and [FSE](https://github.com/klauspost/compress/tree/master/fse) implementations for raw entropy encoding. | ||||
| * [gzhttp](https://github.com/klauspost/compress/tree/master/gzhttp) Provides client and server wrappers for handling gzipped requests efficiently. | ||||
| * [pgzip](https://github.com/klauspost/pgzip) is a separate package that provides a very fast parallel gzip implementation. | ||||
| * [fuzz package](https://github.com/klauspost/compress-fuzz) for fuzz testing all compressors/decompressors here. | ||||
|  | ||||
| [](https://pkg.go.dev/github.com/klauspost/compress?tab=subdirectories) | ||||
| [](https://github.com/klauspost/compress/actions/workflows/go.yml) | ||||
| @@ -17,6 +16,35 @@ This package provides various compression algorithms. | ||||
|  | ||||
| # changelog | ||||
|  | ||||
| * Jan 21st, 2023 (v1.15.15) | ||||
| 	* deflate: Improve level 7-9 by @klauspost in https://github.com/klauspost/compress/pull/739 | ||||
| 	* zstd: Add delta encoding support by @greatroar in https://github.com/klauspost/compress/pull/728 | ||||
| 	* zstd: Various speed improvements by @greatroar https://github.com/klauspost/compress/pull/741 https://github.com/klauspost/compress/pull/734 https://github.com/klauspost/compress/pull/736 https://github.com/klauspost/compress/pull/744 https://github.com/klauspost/compress/pull/743 https://github.com/klauspost/compress/pull/745 | ||||
| 	* gzhttp: Add SuffixETag() and DropETag() options to prevent ETag collisions on compressed responses by @willbicks in https://github.com/klauspost/compress/pull/740 | ||||
|  | ||||
| * Jan 3rd, 2023 (v1.15.14) | ||||
|  | ||||
| 	* flate: Improve speed in big stateless blocks https://github.com/klauspost/compress/pull/718 | ||||
| 	* zstd: Minor speed tweaks by @greatroar in https://github.com/klauspost/compress/pull/716 https://github.com/klauspost/compress/pull/720 | ||||
| 	* export NoGzipResponseWriter for custom ResponseWriter wrappers by @harshavardhana in https://github.com/klauspost/compress/pull/722 | ||||
| 	* s2: Add example for indexing and existing stream https://github.com/klauspost/compress/pull/723 | ||||
|  | ||||
| * Dec 11, 2022 (v1.15.13) | ||||
| 	* zstd: Add [MaxEncodedSize](https://pkg.go.dev/github.com/klauspost/compress@v1.15.13/zstd#Encoder.MaxEncodedSize) to encoder  https://github.com/klauspost/compress/pull/691 | ||||
| 	* zstd: Various tweaks and improvements https://github.com/klauspost/compress/pull/693 https://github.com/klauspost/compress/pull/695 https://github.com/klauspost/compress/pull/696 https://github.com/klauspost/compress/pull/701 https://github.com/klauspost/compress/pull/702 https://github.com/klauspost/compress/pull/703 https://github.com/klauspost/compress/pull/704 https://github.com/klauspost/compress/pull/705 https://github.com/klauspost/compress/pull/706 https://github.com/klauspost/compress/pull/707 https://github.com/klauspost/compress/pull/708 | ||||
|  | ||||
| * Oct 26, 2022 (v1.15.12) | ||||
|  | ||||
| 	* zstd: Tweak decoder allocs. https://github.com/klauspost/compress/pull/680 | ||||
| 	* gzhttp: Always delete `HeaderNoCompression` https://github.com/klauspost/compress/pull/683 | ||||
|  | ||||
| * Sept 26, 2022 (v1.15.11) | ||||
|  | ||||
| 	* flate: Improve level 1-3 compression  https://github.com/klauspost/compress/pull/678 | ||||
| 	* zstd: Improve "best" compression by @nightwolfz in https://github.com/klauspost/compress/pull/677 | ||||
| 	* zstd: Fix+reduce decompression allocations https://github.com/klauspost/compress/pull/668 | ||||
| 	* zstd: Fix non-effective noescape tag https://github.com/klauspost/compress/pull/667 | ||||
|  | ||||
| * Sept 16, 2022 (v1.15.10) | ||||
|  | ||||
| 	* zstd: Add [WithDecodeAllCapLimit](https://pkg.go.dev/github.com/klauspost/compress@v1.15.10/zstd#WithDecodeAllCapLimit) https://github.com/klauspost/compress/pull/649 | ||||
|   | ||||
							
								
								
									
										31
									
								
								vendor/github.com/klauspost/compress/fse/compress.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										31
									
								
								vendor/github.com/klauspost/compress/fse/compress.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -146,54 +146,51 @@ func (s *Scratch) compress(src []byte) error { | ||||
| 		c1.encodeZero(tt[src[ip-2]]) | ||||
| 		ip -= 2 | ||||
| 	} | ||||
| 	src = src[:ip] | ||||
|  | ||||
| 	// Main compression loop. | ||||
| 	switch { | ||||
| 	case !s.zeroBits && s.actualTableLog <= 8: | ||||
| 		// We can encode 4 symbols without requiring a flush. | ||||
| 		// We do not need to check if any output is 0 bits. | ||||
| 		for ip >= 4 { | ||||
| 		for ; len(src) >= 4; src = src[:len(src)-4] { | ||||
| 			s.bw.flush32() | ||||
| 			v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1] | ||||
| 			v3, v2, v1, v0 := src[len(src)-4], src[len(src)-3], src[len(src)-2], src[len(src)-1] | ||||
| 			c2.encode(tt[v0]) | ||||
| 			c1.encode(tt[v1]) | ||||
| 			c2.encode(tt[v2]) | ||||
| 			c1.encode(tt[v3]) | ||||
| 			ip -= 4 | ||||
| 		} | ||||
| 	case !s.zeroBits: | ||||
| 		// We do not need to check if any output is 0 bits. | ||||
| 		for ip >= 4 { | ||||
| 		for ; len(src) >= 4; src = src[:len(src)-4] { | ||||
| 			s.bw.flush32() | ||||
| 			v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1] | ||||
| 			v3, v2, v1, v0 := src[len(src)-4], src[len(src)-3], src[len(src)-2], src[len(src)-1] | ||||
| 			c2.encode(tt[v0]) | ||||
| 			c1.encode(tt[v1]) | ||||
| 			s.bw.flush32() | ||||
| 			c2.encode(tt[v2]) | ||||
| 			c1.encode(tt[v3]) | ||||
| 			ip -= 4 | ||||
| 		} | ||||
| 	case s.actualTableLog <= 8: | ||||
| 		// We can encode 4 symbols without requiring a flush | ||||
| 		for ip >= 4 { | ||||
| 		for ; len(src) >= 4; src = src[:len(src)-4] { | ||||
| 			s.bw.flush32() | ||||
| 			v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1] | ||||
| 			v3, v2, v1, v0 := src[len(src)-4], src[len(src)-3], src[len(src)-2], src[len(src)-1] | ||||
| 			c2.encodeZero(tt[v0]) | ||||
| 			c1.encodeZero(tt[v1]) | ||||
| 			c2.encodeZero(tt[v2]) | ||||
| 			c1.encodeZero(tt[v3]) | ||||
| 			ip -= 4 | ||||
| 		} | ||||
| 	default: | ||||
| 		for ip >= 4 { | ||||
| 		for ; len(src) >= 4; src = src[:len(src)-4] { | ||||
| 			s.bw.flush32() | ||||
| 			v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1] | ||||
| 			v3, v2, v1, v0 := src[len(src)-4], src[len(src)-3], src[len(src)-2], src[len(src)-1] | ||||
| 			c2.encodeZero(tt[v0]) | ||||
| 			c1.encodeZero(tt[v1]) | ||||
| 			s.bw.flush32() | ||||
| 			c2.encodeZero(tt[v2]) | ||||
| 			c1.encodeZero(tt[v3]) | ||||
| 			ip -= 4 | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| @@ -459,15 +456,17 @@ func (s *Scratch) countSimple(in []byte) (max int) { | ||||
| 	for _, v := range in { | ||||
| 		s.count[v]++ | ||||
| 	} | ||||
| 	m := uint32(0) | ||||
| 	m, symlen := uint32(0), s.symbolLen | ||||
| 	for i, v := range s.count[:] { | ||||
| 		if v == 0 { | ||||
| 			continue | ||||
| 		} | ||||
| 		if v > m { | ||||
| 			m = v | ||||
| 		} | ||||
| 		if v > 0 { | ||||
| 			s.symbolLen = uint16(i) + 1 | ||||
| 		} | ||||
| 		symlen = uint16(i) + 1 | ||||
| 	} | ||||
| 	s.symbolLen = symlen | ||||
| 	return int(m) | ||||
| } | ||||
|  | ||||
|   | ||||
							
								
								
									
										8
									
								
								vendor/github.com/klauspost/compress/huff0/bitreader.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										8
									
								
								vendor/github.com/klauspost/compress/huff0/bitreader.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -67,7 +67,6 @@ func (b *bitReaderBytes) fillFast() { | ||||
|  | ||||
| 	// 2 bounds checks. | ||||
| 	v := b.in[b.off-4 : b.off] | ||||
| 	v = v[:4] | ||||
| 	low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) | ||||
| 	b.value |= uint64(low) << (b.bitsRead - 32) | ||||
| 	b.bitsRead -= 32 | ||||
| @@ -88,8 +87,7 @@ func (b *bitReaderBytes) fill() { | ||||
| 		return | ||||
| 	} | ||||
| 	if b.off > 4 { | ||||
| 		v := b.in[b.off-4:] | ||||
| 		v = v[:4] | ||||
| 		v := b.in[b.off-4 : b.off] | ||||
| 		low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) | ||||
| 		b.value |= uint64(low) << (b.bitsRead - 32) | ||||
| 		b.bitsRead -= 32 | ||||
| @@ -179,7 +177,6 @@ func (b *bitReaderShifted) fillFast() { | ||||
|  | ||||
| 	// 2 bounds checks. | ||||
| 	v := b.in[b.off-4 : b.off] | ||||
| 	v = v[:4] | ||||
| 	low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) | ||||
| 	b.value |= uint64(low) << ((b.bitsRead - 32) & 63) | ||||
| 	b.bitsRead -= 32 | ||||
| @@ -200,8 +197,7 @@ func (b *bitReaderShifted) fill() { | ||||
| 		return | ||||
| 	} | ||||
| 	if b.off > 4 { | ||||
| 		v := b.in[b.off-4:] | ||||
| 		v = v[:4] | ||||
| 		v := b.in[b.off-4 : b.off] | ||||
| 		low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) | ||||
| 		b.value |= uint64(low) << ((b.bitsRead - 32) & 63) | ||||
| 		b.bitsRead -= 32 | ||||
|   | ||||
							
								
								
									
										104
									
								
								vendor/github.com/klauspost/compress/huff0/compress.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										104
									
								
								vendor/github.com/klauspost/compress/huff0/compress.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -365,30 +365,30 @@ func (s *Scratch) countSimple(in []byte) (max int, reuse bool) { | ||||
| 	m := uint32(0) | ||||
| 	if len(s.prevTable) > 0 { | ||||
| 		for i, v := range s.count[:] { | ||||
| 			if v == 0 { | ||||
| 				continue | ||||
| 			} | ||||
| 			if v > m { | ||||
| 				m = v | ||||
| 			} | ||||
| 			if v > 0 { | ||||
| 			s.symbolLen = uint16(i) + 1 | ||||
| 			if i >= len(s.prevTable) { | ||||
| 				reuse = false | ||||
| 				} else { | ||||
| 					if s.prevTable[i].nBits == 0 { | ||||
| 			} else if s.prevTable[i].nBits == 0 { | ||||
| 				reuse = false | ||||
| 			} | ||||
| 		} | ||||
| 			} | ||||
| 		} | ||||
| 		return int(m), reuse | ||||
| 	} | ||||
| 	for i, v := range s.count[:] { | ||||
| 		if v == 0 { | ||||
| 			continue | ||||
| 		} | ||||
| 		if v > m { | ||||
| 			m = v | ||||
| 		} | ||||
| 		if v > 0 { | ||||
| 		s.symbolLen = uint16(i) + 1 | ||||
| 	} | ||||
| 	} | ||||
| 	return int(m), false | ||||
| } | ||||
|  | ||||
| @@ -484,34 +484,35 @@ func (s *Scratch) buildCTable() error { | ||||
| 	// Different from reference implementation. | ||||
| 	huffNode0 := s.nodes[0 : huffNodesLen+1] | ||||
|  | ||||
| 	for huffNode[nonNullRank].count == 0 { | ||||
| 	for huffNode[nonNullRank].count() == 0 { | ||||
| 		nonNullRank-- | ||||
| 	} | ||||
|  | ||||
| 	lowS := int16(nonNullRank) | ||||
| 	nodeRoot := nodeNb + lowS - 1 | ||||
| 	lowN := nodeNb | ||||
| 	huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count | ||||
| 	huffNode[lowS].parent, huffNode[lowS-1].parent = uint16(nodeNb), uint16(nodeNb) | ||||
| 	huffNode[nodeNb].setCount(huffNode[lowS].count() + huffNode[lowS-1].count()) | ||||
| 	huffNode[lowS].setParent(nodeNb) | ||||
| 	huffNode[lowS-1].setParent(nodeNb) | ||||
| 	nodeNb++ | ||||
| 	lowS -= 2 | ||||
| 	for n := nodeNb; n <= nodeRoot; n++ { | ||||
| 		huffNode[n].count = 1 << 30 | ||||
| 		huffNode[n].setCount(1 << 30) | ||||
| 	} | ||||
| 	// fake entry, strong barrier | ||||
| 	huffNode0[0].count = 1 << 31 | ||||
| 	huffNode0[0].setCount(1 << 31) | ||||
|  | ||||
| 	// create parents | ||||
| 	for nodeNb <= nodeRoot { | ||||
| 		var n1, n2 int16 | ||||
| 		if huffNode0[lowS+1].count < huffNode0[lowN+1].count { | ||||
| 		if huffNode0[lowS+1].count() < huffNode0[lowN+1].count() { | ||||
| 			n1 = lowS | ||||
| 			lowS-- | ||||
| 		} else { | ||||
| 			n1 = lowN | ||||
| 			lowN++ | ||||
| 		} | ||||
| 		if huffNode0[lowS+1].count < huffNode0[lowN+1].count { | ||||
| 		if huffNode0[lowS+1].count() < huffNode0[lowN+1].count() { | ||||
| 			n2 = lowS | ||||
| 			lowS-- | ||||
| 		} else { | ||||
| @@ -519,18 +520,19 @@ func (s *Scratch) buildCTable() error { | ||||
| 			lowN++ | ||||
| 		} | ||||
|  | ||||
| 		huffNode[nodeNb].count = huffNode0[n1+1].count + huffNode0[n2+1].count | ||||
| 		huffNode0[n1+1].parent, huffNode0[n2+1].parent = uint16(nodeNb), uint16(nodeNb) | ||||
| 		huffNode[nodeNb].setCount(huffNode0[n1+1].count() + huffNode0[n2+1].count()) | ||||
| 		huffNode0[n1+1].setParent(nodeNb) | ||||
| 		huffNode0[n2+1].setParent(nodeNb) | ||||
| 		nodeNb++ | ||||
| 	} | ||||
|  | ||||
| 	// distribute weights (unlimited tree height) | ||||
| 	huffNode[nodeRoot].nbBits = 0 | ||||
| 	huffNode[nodeRoot].setNbBits(0) | ||||
| 	for n := nodeRoot - 1; n >= startNode; n-- { | ||||
| 		huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1 | ||||
| 		huffNode[n].setNbBits(huffNode[huffNode[n].parent()].nbBits() + 1) | ||||
| 	} | ||||
| 	for n := uint16(0); n <= nonNullRank; n++ { | ||||
| 		huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1 | ||||
| 		huffNode[n].setNbBits(huffNode[huffNode[n].parent()].nbBits() + 1) | ||||
| 	} | ||||
| 	s.actualTableLog = s.setMaxHeight(int(nonNullRank)) | ||||
| 	maxNbBits := s.actualTableLog | ||||
| @@ -542,7 +544,7 @@ func (s *Scratch) buildCTable() error { | ||||
| 	var nbPerRank [tableLogMax + 1]uint16 | ||||
| 	var valPerRank [16]uint16 | ||||
| 	for _, v := range huffNode[:nonNullRank+1] { | ||||
| 		nbPerRank[v.nbBits]++ | ||||
| 		nbPerRank[v.nbBits()]++ | ||||
| 	} | ||||
| 	// determine stating value per rank | ||||
| 	{ | ||||
| @@ -557,7 +559,7 @@ func (s *Scratch) buildCTable() error { | ||||
|  | ||||
| 	// push nbBits per symbol, symbol order | ||||
| 	for _, v := range huffNode[:nonNullRank+1] { | ||||
| 		s.cTable[v.symbol].nBits = v.nbBits | ||||
| 		s.cTable[v.symbol()].nBits = v.nbBits() | ||||
| 	} | ||||
|  | ||||
| 	// assign value within rank, symbol order | ||||
| @@ -603,12 +605,12 @@ func (s *Scratch) huffSort() { | ||||
| 		pos := rank[r].current | ||||
| 		rank[r].current++ | ||||
| 		prev := nodes[(pos-1)&huffNodesMask] | ||||
| 		for pos > rank[r].base && c > prev.count { | ||||
| 		for pos > rank[r].base && c > prev.count() { | ||||
| 			nodes[pos&huffNodesMask] = prev | ||||
| 			pos-- | ||||
| 			prev = nodes[(pos-1)&huffNodesMask] | ||||
| 		} | ||||
| 		nodes[pos&huffNodesMask] = nodeElt{count: c, symbol: byte(n)} | ||||
| 		nodes[pos&huffNodesMask] = makeNodeElt(c, byte(n)) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| @@ -617,7 +619,7 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 { | ||||
| 	huffNode := s.nodes[1 : huffNodesLen+1] | ||||
| 	//huffNode = huffNode[: huffNodesLen] | ||||
|  | ||||
| 	largestBits := huffNode[lastNonNull].nbBits | ||||
| 	largestBits := huffNode[lastNonNull].nbBits() | ||||
|  | ||||
| 	// early exit : no elt > maxNbBits | ||||
| 	if largestBits <= maxNbBits { | ||||
| @@ -627,14 +629,14 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 { | ||||
| 	baseCost := int(1) << (largestBits - maxNbBits) | ||||
| 	n := uint32(lastNonNull) | ||||
|  | ||||
| 	for huffNode[n].nbBits > maxNbBits { | ||||
| 		totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)) | ||||
| 		huffNode[n].nbBits = maxNbBits | ||||
| 	for huffNode[n].nbBits() > maxNbBits { | ||||
| 		totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits())) | ||||
| 		huffNode[n].setNbBits(maxNbBits) | ||||
| 		n-- | ||||
| 	} | ||||
| 	// n stops at huffNode[n].nbBits <= maxNbBits | ||||
|  | ||||
| 	for huffNode[n].nbBits == maxNbBits { | ||||
| 	for huffNode[n].nbBits() == maxNbBits { | ||||
| 		n-- | ||||
| 	} | ||||
| 	// n end at index of smallest symbol using < maxNbBits | ||||
| @@ -655,10 +657,10 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 { | ||||
| 		{ | ||||
| 			currentNbBits := maxNbBits | ||||
| 			for pos := int(n); pos >= 0; pos-- { | ||||
| 				if huffNode[pos].nbBits >= currentNbBits { | ||||
| 				if huffNode[pos].nbBits() >= currentNbBits { | ||||
| 					continue | ||||
| 				} | ||||
| 				currentNbBits = huffNode[pos].nbBits // < maxNbBits | ||||
| 				currentNbBits = huffNode[pos].nbBits() // < maxNbBits | ||||
| 				rankLast[maxNbBits-currentNbBits] = uint32(pos) | ||||
| 			} | ||||
| 		} | ||||
| @@ -675,8 +677,8 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 { | ||||
| 				if lowPos == noSymbol { | ||||
| 					break | ||||
| 				} | ||||
| 				highTotal := huffNode[highPos].count | ||||
| 				lowTotal := 2 * huffNode[lowPos].count | ||||
| 				highTotal := huffNode[highPos].count() | ||||
| 				lowTotal := 2 * huffNode[lowPos].count() | ||||
| 				if highTotal <= lowTotal { | ||||
| 					break | ||||
| 				} | ||||
| @@ -692,13 +694,14 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 { | ||||
| 				// this rank is no longer empty | ||||
| 				rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease] | ||||
| 			} | ||||
| 			huffNode[rankLast[nBitsToDecrease]].nbBits++ | ||||
| 			huffNode[rankLast[nBitsToDecrease]].setNbBits(1 + | ||||
| 				huffNode[rankLast[nBitsToDecrease]].nbBits()) | ||||
| 			if rankLast[nBitsToDecrease] == 0 { | ||||
| 				/* special case, reached largest symbol */ | ||||
| 				rankLast[nBitsToDecrease] = noSymbol | ||||
| 			} else { | ||||
| 				rankLast[nBitsToDecrease]-- | ||||
| 				if huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease { | ||||
| 				if huffNode[rankLast[nBitsToDecrease]].nbBits() != maxNbBits-nBitsToDecrease { | ||||
| 					rankLast[nBitsToDecrease] = noSymbol /* this rank is now empty */ | ||||
| 				} | ||||
| 			} | ||||
| @@ -706,15 +709,15 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 { | ||||
|  | ||||
| 		for totalCost < 0 { /* Sometimes, cost correction overshoot */ | ||||
| 			if rankLast[1] == noSymbol { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */ | ||||
| 				for huffNode[n].nbBits == maxNbBits { | ||||
| 				for huffNode[n].nbBits() == maxNbBits { | ||||
| 					n-- | ||||
| 				} | ||||
| 				huffNode[n+1].nbBits-- | ||||
| 				huffNode[n+1].setNbBits(huffNode[n+1].nbBits() - 1) | ||||
| 				rankLast[1] = n + 1 | ||||
| 				totalCost++ | ||||
| 				continue | ||||
| 			} | ||||
| 			huffNode[rankLast[1]+1].nbBits-- | ||||
| 			huffNode[rankLast[1]+1].setNbBits(huffNode[rankLast[1]+1].nbBits() - 1) | ||||
| 			rankLast[1]++ | ||||
| 			totalCost++ | ||||
| 		} | ||||
| @@ -722,9 +725,26 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 { | ||||
| 	return maxNbBits | ||||
| } | ||||
|  | ||||
| type nodeElt struct { | ||||
| 	count  uint32 | ||||
| 	parent uint16 | ||||
| 	symbol byte | ||||
| 	nbBits uint8 | ||||
| // A nodeElt is the fields | ||||
| // | ||||
| //	count  uint32 | ||||
| //	parent uint16 | ||||
| //	symbol byte | ||||
| //	nbBits uint8 | ||||
| // | ||||
| // in some order, all squashed into an integer so that the compiler | ||||
| // always loads and stores entire nodeElts instead of separate fields. | ||||
| type nodeElt uint64 | ||||
|  | ||||
| func makeNodeElt(count uint32, symbol byte) nodeElt { | ||||
| 	return nodeElt(count) | nodeElt(symbol)<<48 | ||||
| } | ||||
|  | ||||
| func (e *nodeElt) count() uint32  { return uint32(*e) } | ||||
| func (e *nodeElt) parent() uint16 { return uint16(*e >> 32) } | ||||
| func (e *nodeElt) symbol() byte   { return byte(*e >> 48) } | ||||
| func (e *nodeElt) nbBits() uint8  { return uint8(*e >> 56) } | ||||
|  | ||||
| func (e *nodeElt) setCount(c uint32) { *e = (*e)&0xffffffff00000000 | nodeElt(c) } | ||||
| func (e *nodeElt) setParent(p int16) { *e = (*e)&0xffff0000ffffffff | nodeElt(uint16(p))<<32 } | ||||
| func (e *nodeElt) setNbBits(n uint8) { *e = (*e)&0x00ffffffffffffff | nodeElt(n)<<56 } | ||||
|   | ||||
							
								
								
									
										2
									
								
								vendor/github.com/klauspost/compress/huff0/decompress.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								vendor/github.com/klauspost/compress/huff0/decompress.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -61,7 +61,7 @@ func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) { | ||||
| 		b, err := fse.Decompress(in[:iSize], s.fse) | ||||
| 		s.fse.Out = nil | ||||
| 		if err != nil { | ||||
| 			return s, nil, err | ||||
| 			return s, nil, fmt.Errorf("fse decompress returned: %w", err) | ||||
| 		} | ||||
| 		if len(b) > 255 { | ||||
| 			return s, nil, errors.New("corrupt input: output table too large") | ||||
|   | ||||
							
								
								
									
										576
									
								
								vendor/github.com/klauspost/compress/huff0/decompress_amd64.s
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										576
									
								
								vendor/github.com/klauspost/compress/huff0/decompress_amd64.s
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -4,360 +4,349 @@ | ||||
|  | ||||
| // func decompress4x_main_loop_amd64(ctx *decompress4xContext) | ||||
| TEXT ·decompress4x_main_loop_amd64(SB), $0-8 | ||||
| 	XORQ DX, DX | ||||
|  | ||||
| 	// Preload values | ||||
| 	MOVQ    ctx+0(FP), AX | ||||
| 	MOVBQZX 8(AX), DI | ||||
| 	MOVQ    16(AX), SI | ||||
| 	MOVQ    48(AX), BX | ||||
| 	MOVQ    24(AX), R9 | ||||
| 	MOVQ    32(AX), R10 | ||||
| 	MOVQ    (AX), R11 | ||||
| 	MOVQ    16(AX), BX | ||||
| 	MOVQ    48(AX), SI | ||||
| 	MOVQ    24(AX), R8 | ||||
| 	MOVQ    32(AX), R9 | ||||
| 	MOVQ    (AX), R10 | ||||
|  | ||||
| 	// Main loop | ||||
| main_loop: | ||||
| 	MOVQ  SI, R8 | ||||
| 	CMPQ  R8, BX | ||||
| 	XORL  DX, DX | ||||
| 	CMPQ  BX, SI | ||||
| 	SETGE DL | ||||
|  | ||||
| 	// br0.fillFast32() | ||||
| 	MOVQ    32(R11), R12 | ||||
| 	MOVBQZX 40(R11), R13 | ||||
| 	CMPQ    R13, $0x20 | ||||
| 	MOVQ    32(R10), R11 | ||||
| 	MOVBQZX 40(R10), R12 | ||||
| 	CMPQ    R12, $0x20 | ||||
| 	JBE     skip_fill0 | ||||
| 	MOVQ    24(R11), AX | ||||
| 	SUBQ    $0x20, R13 | ||||
| 	MOVQ    24(R10), AX | ||||
| 	SUBQ    $0x20, R12 | ||||
| 	SUBQ    $0x04, AX | ||||
| 	MOVQ    (R11), R14 | ||||
| 	MOVQ    (R10), R13 | ||||
|  | ||||
| 	// b.value |= uint64(low) << (b.bitsRead & 63) | ||||
| 	MOVL (AX)(R14*1), R14 | ||||
| 	MOVQ R13, CX | ||||
| 	SHLQ CL, R14 | ||||
| 	MOVQ AX, 24(R11) | ||||
| 	ORQ  R14, R12 | ||||
| 	MOVL (AX)(R13*1), R13 | ||||
| 	MOVQ R12, CX | ||||
| 	SHLQ CL, R13 | ||||
| 	MOVQ AX, 24(R10) | ||||
| 	ORQ  R13, R11 | ||||
|  | ||||
| 	// exhausted = exhausted || (br0.off < 4) | ||||
| 	// exhausted += (br0.off < 4) | ||||
| 	CMPQ AX, $0x04 | ||||
| 	SETLT AL | ||||
| 	ORB   AL, DL | ||||
| 	ADCB $+0, DL | ||||
|  | ||||
| skip_fill0: | ||||
| 	// val0 := br0.peekTopBits(peekBits) | ||||
| 	MOVQ R12, R14 | ||||
| 	MOVQ R11, R13 | ||||
| 	MOVQ DI, CX | ||||
| 	SHRQ CL, R14 | ||||
| 	SHRQ CL, R13 | ||||
|  | ||||
| 	// v0 := table[val0&mask] | ||||
| 	MOVW (R10)(R14*2), CX | ||||
| 	MOVW (R9)(R13*2), CX | ||||
|  | ||||
| 	// br0.advance(uint8(v0.entry) | ||||
| 	MOVB CH, AL | ||||
| 	SHLQ CL, R12 | ||||
| 	ADDB CL, R13 | ||||
| 	SHLQ CL, R11 | ||||
| 	ADDB CL, R12 | ||||
|  | ||||
| 	// val1 := br0.peekTopBits(peekBits) | ||||
| 	MOVQ DI, CX | ||||
| 	MOVQ R12, R14 | ||||
| 	SHRQ CL, R14 | ||||
| 	MOVQ R11, R13 | ||||
| 	SHRQ CL, R13 | ||||
|  | ||||
| 	// v1 := table[val1&mask] | ||||
| 	MOVW (R10)(R14*2), CX | ||||
| 	MOVW (R9)(R13*2), CX | ||||
|  | ||||
| 	// br0.advance(uint8(v1.entry)) | ||||
| 	MOVB CH, AH | ||||
| 	SHLQ CL, R12 | ||||
| 	ADDB CL, R13 | ||||
| 	SHLQ CL, R11 | ||||
| 	ADDB CL, R12 | ||||
|  | ||||
| 	// these two writes get coalesced | ||||
| 	// out[id * dstEvery + 0] = uint8(v0.entry >> 8) | ||||
| 	// out[id * dstEvery + 1] = uint8(v1.entry >> 8) | ||||
| 	MOVW AX, (R8) | ||||
| 	MOVW AX, (BX) | ||||
|  | ||||
| 	// update the bitreader structure | ||||
| 	MOVQ R12, 32(R11) | ||||
| 	MOVB R13, 40(R11) | ||||
| 	ADDQ R9, R8 | ||||
| 	MOVQ R11, 32(R10) | ||||
| 	MOVB R12, 40(R10) | ||||
|  | ||||
| 	// br1.fillFast32() | ||||
| 	MOVQ    80(R11), R12 | ||||
| 	MOVBQZX 88(R11), R13 | ||||
| 	CMPQ    R13, $0x20 | ||||
| 	MOVQ    80(R10), R11 | ||||
| 	MOVBQZX 88(R10), R12 | ||||
| 	CMPQ    R12, $0x20 | ||||
| 	JBE     skip_fill1 | ||||
| 	MOVQ    72(R11), AX | ||||
| 	SUBQ    $0x20, R13 | ||||
| 	MOVQ    72(R10), AX | ||||
| 	SUBQ    $0x20, R12 | ||||
| 	SUBQ    $0x04, AX | ||||
| 	MOVQ    48(R11), R14 | ||||
| 	MOVQ    48(R10), R13 | ||||
|  | ||||
| 	// b.value |= uint64(low) << (b.bitsRead & 63) | ||||
| 	MOVL (AX)(R14*1), R14 | ||||
| 	MOVQ R13, CX | ||||
| 	SHLQ CL, R14 | ||||
| 	MOVQ AX, 72(R11) | ||||
| 	ORQ  R14, R12 | ||||
| 	MOVL (AX)(R13*1), R13 | ||||
| 	MOVQ R12, CX | ||||
| 	SHLQ CL, R13 | ||||
| 	MOVQ AX, 72(R10) | ||||
| 	ORQ  R13, R11 | ||||
|  | ||||
| 	// exhausted = exhausted || (br1.off < 4) | ||||
| 	// exhausted += (br1.off < 4) | ||||
| 	CMPQ AX, $0x04 | ||||
| 	SETLT AL | ||||
| 	ORB   AL, DL | ||||
| 	ADCB $+0, DL | ||||
|  | ||||
| skip_fill1: | ||||
| 	// val0 := br1.peekTopBits(peekBits) | ||||
| 	MOVQ R12, R14 | ||||
| 	MOVQ R11, R13 | ||||
| 	MOVQ DI, CX | ||||
| 	SHRQ CL, R14 | ||||
| 	SHRQ CL, R13 | ||||
|  | ||||
| 	// v0 := table[val0&mask] | ||||
| 	MOVW (R10)(R14*2), CX | ||||
| 	MOVW (R9)(R13*2), CX | ||||
|  | ||||
| 	// br1.advance(uint8(v0.entry) | ||||
| 	MOVB CH, AL | ||||
| 	SHLQ CL, R12 | ||||
| 	ADDB CL, R13 | ||||
| 	SHLQ CL, R11 | ||||
| 	ADDB CL, R12 | ||||
|  | ||||
| 	// val1 := br1.peekTopBits(peekBits) | ||||
| 	MOVQ DI, CX | ||||
| 	MOVQ R12, R14 | ||||
| 	SHRQ CL, R14 | ||||
| 	MOVQ R11, R13 | ||||
| 	SHRQ CL, R13 | ||||
|  | ||||
| 	// v1 := table[val1&mask] | ||||
| 	MOVW (R10)(R14*2), CX | ||||
| 	MOVW (R9)(R13*2), CX | ||||
|  | ||||
| 	// br1.advance(uint8(v1.entry)) | ||||
| 	MOVB CH, AH | ||||
| 	SHLQ CL, R12 | ||||
| 	ADDB CL, R13 | ||||
| 	SHLQ CL, R11 | ||||
| 	ADDB CL, R12 | ||||
|  | ||||
| 	// these two writes get coalesced | ||||
| 	// out[id * dstEvery + 0] = uint8(v0.entry >> 8) | ||||
| 	// out[id * dstEvery + 1] = uint8(v1.entry >> 8) | ||||
| 	MOVW AX, (R8) | ||||
| 	MOVW AX, (BX)(R8*1) | ||||
|  | ||||
| 	// update the bitreader structure | ||||
| 	MOVQ R12, 80(R11) | ||||
| 	MOVB R13, 88(R11) | ||||
| 	ADDQ R9, R8 | ||||
| 	MOVQ R11, 80(R10) | ||||
| 	MOVB R12, 88(R10) | ||||
|  | ||||
| 	// br2.fillFast32() | ||||
| 	MOVQ    128(R11), R12 | ||||
| 	MOVBQZX 136(R11), R13 | ||||
| 	CMPQ    R13, $0x20 | ||||
| 	MOVQ    128(R10), R11 | ||||
| 	MOVBQZX 136(R10), R12 | ||||
| 	CMPQ    R12, $0x20 | ||||
| 	JBE     skip_fill2 | ||||
| 	MOVQ    120(R11), AX | ||||
| 	SUBQ    $0x20, R13 | ||||
| 	MOVQ    120(R10), AX | ||||
| 	SUBQ    $0x20, R12 | ||||
| 	SUBQ    $0x04, AX | ||||
| 	MOVQ    96(R11), R14 | ||||
| 	MOVQ    96(R10), R13 | ||||
|  | ||||
| 	// b.value |= uint64(low) << (b.bitsRead & 63) | ||||
| 	MOVL (AX)(R14*1), R14 | ||||
| 	MOVQ R13, CX | ||||
| 	SHLQ CL, R14 | ||||
| 	MOVQ AX, 120(R11) | ||||
| 	ORQ  R14, R12 | ||||
| 	MOVL (AX)(R13*1), R13 | ||||
| 	MOVQ R12, CX | ||||
| 	SHLQ CL, R13 | ||||
| 	MOVQ AX, 120(R10) | ||||
| 	ORQ  R13, R11 | ||||
|  | ||||
| 	// exhausted = exhausted || (br2.off < 4) | ||||
| 	// exhausted += (br2.off < 4) | ||||
| 	CMPQ AX, $0x04 | ||||
| 	SETLT AL | ||||
| 	ORB   AL, DL | ||||
| 	ADCB $+0, DL | ||||
|  | ||||
| skip_fill2: | ||||
| 	// val0 := br2.peekTopBits(peekBits) | ||||
| 	MOVQ R12, R14 | ||||
| 	MOVQ R11, R13 | ||||
| 	MOVQ DI, CX | ||||
| 	SHRQ CL, R14 | ||||
| 	SHRQ CL, R13 | ||||
|  | ||||
| 	// v0 := table[val0&mask] | ||||
| 	MOVW (R10)(R14*2), CX | ||||
| 	MOVW (R9)(R13*2), CX | ||||
|  | ||||
| 	// br2.advance(uint8(v0.entry) | ||||
| 	MOVB CH, AL | ||||
| 	SHLQ CL, R12 | ||||
| 	ADDB CL, R13 | ||||
| 	SHLQ CL, R11 | ||||
| 	ADDB CL, R12 | ||||
|  | ||||
| 	// val1 := br2.peekTopBits(peekBits) | ||||
| 	MOVQ DI, CX | ||||
| 	MOVQ R12, R14 | ||||
| 	SHRQ CL, R14 | ||||
| 	MOVQ R11, R13 | ||||
| 	SHRQ CL, R13 | ||||
|  | ||||
| 	// v1 := table[val1&mask] | ||||
| 	MOVW (R10)(R14*2), CX | ||||
| 	MOVW (R9)(R13*2), CX | ||||
|  | ||||
| 	// br2.advance(uint8(v1.entry)) | ||||
| 	MOVB CH, AH | ||||
| 	SHLQ CL, R12 | ||||
| 	ADDB CL, R13 | ||||
| 	SHLQ CL, R11 | ||||
| 	ADDB CL, R12 | ||||
|  | ||||
| 	// these two writes get coalesced | ||||
| 	// out[id * dstEvery + 0] = uint8(v0.entry >> 8) | ||||
| 	// out[id * dstEvery + 1] = uint8(v1.entry >> 8) | ||||
| 	MOVW AX, (R8) | ||||
| 	MOVW AX, (BX)(R8*2) | ||||
|  | ||||
| 	// update the bitreader structure | ||||
| 	MOVQ R12, 128(R11) | ||||
| 	MOVB R13, 136(R11) | ||||
| 	ADDQ R9, R8 | ||||
| 	MOVQ R11, 128(R10) | ||||
| 	MOVB R12, 136(R10) | ||||
|  | ||||
| 	// br3.fillFast32() | ||||
| 	MOVQ    176(R11), R12 | ||||
| 	MOVBQZX 184(R11), R13 | ||||
| 	CMPQ    R13, $0x20 | ||||
| 	MOVQ    176(R10), R11 | ||||
| 	MOVBQZX 184(R10), R12 | ||||
| 	CMPQ    R12, $0x20 | ||||
| 	JBE     skip_fill3 | ||||
| 	MOVQ    168(R11), AX | ||||
| 	SUBQ    $0x20, R13 | ||||
| 	MOVQ    168(R10), AX | ||||
| 	SUBQ    $0x20, R12 | ||||
| 	SUBQ    $0x04, AX | ||||
| 	MOVQ    144(R11), R14 | ||||
| 	MOVQ    144(R10), R13 | ||||
|  | ||||
| 	// b.value |= uint64(low) << (b.bitsRead & 63) | ||||
| 	MOVL (AX)(R14*1), R14 | ||||
| 	MOVQ R13, CX | ||||
| 	SHLQ CL, R14 | ||||
| 	MOVQ AX, 168(R11) | ||||
| 	ORQ  R14, R12 | ||||
| 	MOVL (AX)(R13*1), R13 | ||||
| 	MOVQ R12, CX | ||||
| 	SHLQ CL, R13 | ||||
| 	MOVQ AX, 168(R10) | ||||
| 	ORQ  R13, R11 | ||||
|  | ||||
| 	// exhausted = exhausted || (br3.off < 4) | ||||
| 	// exhausted += (br3.off < 4) | ||||
| 	CMPQ AX, $0x04 | ||||
| 	SETLT AL | ||||
| 	ORB   AL, DL | ||||
| 	ADCB $+0, DL | ||||
|  | ||||
| skip_fill3: | ||||
| 	// val0 := br3.peekTopBits(peekBits) | ||||
| 	MOVQ R12, R14 | ||||
| 	MOVQ R11, R13 | ||||
| 	MOVQ DI, CX | ||||
| 	SHRQ CL, R14 | ||||
| 	SHRQ CL, R13 | ||||
|  | ||||
| 	// v0 := table[val0&mask] | ||||
| 	MOVW (R10)(R14*2), CX | ||||
| 	MOVW (R9)(R13*2), CX | ||||
|  | ||||
| 	// br3.advance(uint8(v0.entry) | ||||
| 	MOVB CH, AL | ||||
| 	SHLQ CL, R12 | ||||
| 	ADDB CL, R13 | ||||
| 	SHLQ CL, R11 | ||||
| 	ADDB CL, R12 | ||||
|  | ||||
| 	// val1 := br3.peekTopBits(peekBits) | ||||
| 	MOVQ DI, CX | ||||
| 	MOVQ R12, R14 | ||||
| 	SHRQ CL, R14 | ||||
| 	MOVQ R11, R13 | ||||
| 	SHRQ CL, R13 | ||||
|  | ||||
| 	// v1 := table[val1&mask] | ||||
| 	MOVW (R10)(R14*2), CX | ||||
| 	MOVW (R9)(R13*2), CX | ||||
|  | ||||
| 	// br3.advance(uint8(v1.entry)) | ||||
| 	MOVB CH, AH | ||||
| 	SHLQ CL, R12 | ||||
| 	ADDB CL, R13 | ||||
| 	SHLQ CL, R11 | ||||
| 	ADDB CL, R12 | ||||
|  | ||||
| 	// these two writes get coalesced | ||||
| 	// out[id * dstEvery + 0] = uint8(v0.entry >> 8) | ||||
| 	// out[id * dstEvery + 1] = uint8(v1.entry >> 8) | ||||
| 	MOVW AX, (R8) | ||||
| 	LEAQ (R8)(R8*2), CX | ||||
| 	MOVW AX, (BX)(CX*1) | ||||
|  | ||||
| 	// update the bitreader structure | ||||
| 	MOVQ  R12, 176(R11) | ||||
| 	MOVB  R13, 184(R11) | ||||
| 	ADDQ  $0x02, SI | ||||
| 	MOVQ  R11, 176(R10) | ||||
| 	MOVB  R12, 184(R10) | ||||
| 	ADDQ  $0x02, BX | ||||
| 	TESTB DL, DL | ||||
| 	JZ    main_loop | ||||
| 	MOVQ  ctx+0(FP), AX | ||||
| 	SUBQ  16(AX), SI | ||||
| 	SHLQ  $0x02, SI | ||||
| 	MOVQ  SI, 40(AX) | ||||
| 	SUBQ  16(AX), BX | ||||
| 	SHLQ  $0x02, BX | ||||
| 	MOVQ  BX, 40(AX) | ||||
| 	RET | ||||
|  | ||||
| // func decompress4x_8b_main_loop_amd64(ctx *decompress4xContext) | ||||
| TEXT ·decompress4x_8b_main_loop_amd64(SB), $0-8 | ||||
| 	XORQ DX, DX | ||||
|  | ||||
| 	// Preload values | ||||
| 	MOVQ    ctx+0(FP), CX | ||||
| 	MOVBQZX 8(CX), DI | ||||
| 	MOVQ    16(CX), BX | ||||
| 	MOVQ    48(CX), SI | ||||
| 	MOVQ    24(CX), R9 | ||||
| 	MOVQ    32(CX), R10 | ||||
| 	MOVQ    (CX), R11 | ||||
| 	MOVQ    24(CX), R8 | ||||
| 	MOVQ    32(CX), R9 | ||||
| 	MOVQ    (CX), R10 | ||||
|  | ||||
| 	// Main loop | ||||
| main_loop: | ||||
| 	MOVQ  BX, R8 | ||||
| 	CMPQ  R8, SI | ||||
| 	XORL  DX, DX | ||||
| 	CMPQ  BX, SI | ||||
| 	SETGE DL | ||||
|  | ||||
| 	// br0.fillFast32() | ||||
| 	MOVQ    32(R11), R12 | ||||
| 	MOVBQZX 40(R11), R13 | ||||
| 	CMPQ    R13, $0x20 | ||||
| 	MOVQ    32(R10), R11 | ||||
| 	MOVBQZX 40(R10), R12 | ||||
| 	CMPQ    R12, $0x20 | ||||
| 	JBE     skip_fill0 | ||||
| 	MOVQ    24(R11), R14 | ||||
| 	SUBQ    $0x20, R13 | ||||
| 	SUBQ    $0x04, R14 | ||||
| 	MOVQ    (R11), R15 | ||||
| 	MOVQ    24(R10), R13 | ||||
| 	SUBQ    $0x20, R12 | ||||
| 	SUBQ    $0x04, R13 | ||||
| 	MOVQ    (R10), R14 | ||||
|  | ||||
| 	// b.value |= uint64(low) << (b.bitsRead & 63) | ||||
| 	MOVL (R14)(R15*1), R15 | ||||
| 	MOVQ R13, CX | ||||
| 	SHLQ CL, R15 | ||||
| 	MOVQ R14, 24(R11) | ||||
| 	ORQ  R15, R12 | ||||
| 	MOVL (R13)(R14*1), R14 | ||||
| 	MOVQ R12, CX | ||||
| 	SHLQ CL, R14 | ||||
| 	MOVQ R13, 24(R10) | ||||
| 	ORQ  R14, R11 | ||||
|  | ||||
| 	// exhausted = exhausted || (br0.off < 4) | ||||
| 	CMPQ  R14, $0x04 | ||||
| 	SETLT AL | ||||
| 	ORB   AL, DL | ||||
| 	// exhausted += (br0.off < 4) | ||||
| 	CMPQ R13, $0x04 | ||||
| 	ADCB $+0, DL | ||||
|  | ||||
| skip_fill0: | ||||
| 	// val0 := br0.peekTopBits(peekBits) | ||||
| 	MOVQ R12, R14 | ||||
| 	MOVQ R11, R13 | ||||
| 	MOVQ DI, CX | ||||
| 	SHRQ CL, R14 | ||||
| 	SHRQ CL, R13 | ||||
|  | ||||
| 	// v0 := table[val0&mask] | ||||
| 	MOVW (R10)(R14*2), CX | ||||
| 	MOVW (R9)(R13*2), CX | ||||
|  | ||||
| 	// br0.advance(uint8(v0.entry) | ||||
| 	MOVB CH, AL | ||||
| 	SHLQ CL, R12 | ||||
| 	ADDB CL, R13 | ||||
| 	SHLQ CL, R11 | ||||
| 	ADDB CL, R12 | ||||
|  | ||||
| 	// val1 := br0.peekTopBits(peekBits) | ||||
| 	MOVQ R12, R14 | ||||
| 	MOVQ R11, R13 | ||||
| 	MOVQ DI, CX | ||||
| 	SHRQ CL, R14 | ||||
| 	SHRQ CL, R13 | ||||
|  | ||||
| 	// v1 := table[val0&mask] | ||||
| 	MOVW (R10)(R14*2), CX | ||||
| 	MOVW (R9)(R13*2), CX | ||||
|  | ||||
| 	// br0.advance(uint8(v1.entry) | ||||
| 	MOVB   CH, AH | ||||
| 	SHLQ   CL, R12 | ||||
| 	ADDB   CL, R13 | ||||
| 	SHLQ   CL, R11 | ||||
| 	ADDB   CL, R12 | ||||
| 	BSWAPL AX | ||||
|  | ||||
| 	// val2 := br0.peekTopBits(peekBits) | ||||
| 	MOVQ R12, R14 | ||||
| 	MOVQ R11, R13 | ||||
| 	MOVQ DI, CX | ||||
| 	SHRQ CL, R14 | ||||
| 	SHRQ CL, R13 | ||||
|  | ||||
| 	// v2 := table[val0&mask] | ||||
| 	MOVW (R10)(R14*2), CX | ||||
| 	MOVW (R9)(R13*2), CX | ||||
|  | ||||
| 	// br0.advance(uint8(v2.entry) | ||||
| 	MOVB CH, AH | ||||
| 	SHLQ CL, R12 | ||||
| 	ADDB CL, R13 | ||||
| 	SHLQ CL, R11 | ||||
| 	ADDB CL, R12 | ||||
|  | ||||
| 	// val3 := br0.peekTopBits(peekBits) | ||||
| 	MOVQ R12, R14 | ||||
| 	MOVQ R11, R13 | ||||
| 	MOVQ DI, CX | ||||
| 	SHRQ CL, R14 | ||||
| 	SHRQ CL, R13 | ||||
|  | ||||
| 	// v3 := table[val0&mask] | ||||
| 	MOVW (R10)(R14*2), CX | ||||
| 	MOVW (R9)(R13*2), CX | ||||
|  | ||||
| 	// br0.advance(uint8(v3.entry) | ||||
| 	MOVB   CH, AL | ||||
| 	SHLQ   CL, R12 | ||||
| 	ADDB   CL, R13 | ||||
| 	SHLQ   CL, R11 | ||||
| 	ADDB   CL, R12 | ||||
| 	BSWAPL AX | ||||
|  | ||||
| 	// these four writes get coalesced | ||||
| @@ -365,88 +354,86 @@ skip_fill0: | ||||
| 	// out[id * dstEvery + 1] = uint8(v1.entry >> 8) | ||||
| 	// out[id * dstEvery + 3] = uint8(v2.entry >> 8) | ||||
| 	// out[id * dstEvery + 4] = uint8(v3.entry >> 8) | ||||
| 	MOVL AX, (R8) | ||||
| 	MOVL AX, (BX) | ||||
|  | ||||
| 	// update the bitreader structure | ||||
| 	MOVQ R12, 32(R11) | ||||
| 	MOVB R13, 40(R11) | ||||
| 	ADDQ R9, R8 | ||||
| 	MOVQ R11, 32(R10) | ||||
| 	MOVB R12, 40(R10) | ||||
|  | ||||
| 	// br1.fillFast32() | ||||
| 	MOVQ    80(R11), R12 | ||||
| 	MOVBQZX 88(R11), R13 | ||||
| 	CMPQ    R13, $0x20 | ||||
| 	MOVQ    80(R10), R11 | ||||
| 	MOVBQZX 88(R10), R12 | ||||
| 	CMPQ    R12, $0x20 | ||||
| 	JBE     skip_fill1 | ||||
| 	MOVQ    72(R11), R14 | ||||
| 	SUBQ    $0x20, R13 | ||||
| 	SUBQ    $0x04, R14 | ||||
| 	MOVQ    48(R11), R15 | ||||
| 	MOVQ    72(R10), R13 | ||||
| 	SUBQ    $0x20, R12 | ||||
| 	SUBQ    $0x04, R13 | ||||
| 	MOVQ    48(R10), R14 | ||||
|  | ||||
| 	// b.value |= uint64(low) << (b.bitsRead & 63) | ||||
| 	MOVL (R14)(R15*1), R15 | ||||
| 	MOVQ R13, CX | ||||
| 	SHLQ CL, R15 | ||||
| 	MOVQ R14, 72(R11) | ||||
| 	ORQ  R15, R12 | ||||
| 	MOVL (R13)(R14*1), R14 | ||||
| 	MOVQ R12, CX | ||||
| 	SHLQ CL, R14 | ||||
| 	MOVQ R13, 72(R10) | ||||
| 	ORQ  R14, R11 | ||||
|  | ||||
| 	// exhausted = exhausted || (br1.off < 4) | ||||
| 	CMPQ  R14, $0x04 | ||||
| 	SETLT AL | ||||
| 	ORB   AL, DL | ||||
| 	// exhausted += (br1.off < 4) | ||||
| 	CMPQ R13, $0x04 | ||||
| 	ADCB $+0, DL | ||||
|  | ||||
| skip_fill1: | ||||
| 	// val0 := br1.peekTopBits(peekBits) | ||||
| 	MOVQ R12, R14 | ||||
| 	MOVQ R11, R13 | ||||
| 	MOVQ DI, CX | ||||
| 	SHRQ CL, R14 | ||||
| 	SHRQ CL, R13 | ||||
|  | ||||
| 	// v0 := table[val0&mask] | ||||
| 	MOVW (R10)(R14*2), CX | ||||
| 	MOVW (R9)(R13*2), CX | ||||
|  | ||||
| 	// br1.advance(uint8(v0.entry) | ||||
| 	MOVB CH, AL | ||||
| 	SHLQ CL, R12 | ||||
| 	ADDB CL, R13 | ||||
| 	SHLQ CL, R11 | ||||
| 	ADDB CL, R12 | ||||
|  | ||||
| 	// val1 := br1.peekTopBits(peekBits) | ||||
| 	MOVQ R12, R14 | ||||
| 	MOVQ R11, R13 | ||||
| 	MOVQ DI, CX | ||||
| 	SHRQ CL, R14 | ||||
| 	SHRQ CL, R13 | ||||
|  | ||||
| 	// v1 := table[val0&mask] | ||||
| 	MOVW (R10)(R14*2), CX | ||||
| 	MOVW (R9)(R13*2), CX | ||||
|  | ||||
| 	// br1.advance(uint8(v1.entry) | ||||
| 	MOVB   CH, AH | ||||
| 	SHLQ   CL, R12 | ||||
| 	ADDB   CL, R13 | ||||
| 	SHLQ   CL, R11 | ||||
| 	ADDB   CL, R12 | ||||
| 	BSWAPL AX | ||||
|  | ||||
| 	// val2 := br1.peekTopBits(peekBits) | ||||
| 	MOVQ R12, R14 | ||||
| 	MOVQ R11, R13 | ||||
| 	MOVQ DI, CX | ||||
| 	SHRQ CL, R14 | ||||
| 	SHRQ CL, R13 | ||||
|  | ||||
| 	// v2 := table[val0&mask] | ||||
| 	MOVW (R10)(R14*2), CX | ||||
| 	MOVW (R9)(R13*2), CX | ||||
|  | ||||
| 	// br1.advance(uint8(v2.entry) | ||||
| 	MOVB CH, AH | ||||
| 	SHLQ CL, R12 | ||||
| 	ADDB CL, R13 | ||||
| 	SHLQ CL, R11 | ||||
| 	ADDB CL, R12 | ||||
|  | ||||
| 	// val3 := br1.peekTopBits(peekBits) | ||||
| 	MOVQ R12, R14 | ||||
| 	MOVQ R11, R13 | ||||
| 	MOVQ DI, CX | ||||
| 	SHRQ CL, R14 | ||||
| 	SHRQ CL, R13 | ||||
|  | ||||
| 	// v3 := table[val0&mask] | ||||
| 	MOVW (R10)(R14*2), CX | ||||
| 	MOVW (R9)(R13*2), CX | ||||
|  | ||||
| 	// br1.advance(uint8(v3.entry) | ||||
| 	MOVB   CH, AL | ||||
| 	SHLQ   CL, R12 | ||||
| 	ADDB   CL, R13 | ||||
| 	SHLQ   CL, R11 | ||||
| 	ADDB   CL, R12 | ||||
| 	BSWAPL AX | ||||
|  | ||||
| 	// these four writes get coalesced | ||||
| @@ -454,88 +441,86 @@ skip_fill1: | ||||
| 	// out[id * dstEvery + 1] = uint8(v1.entry >> 8) | ||||
| 	// out[id * dstEvery + 3] = uint8(v2.entry >> 8) | ||||
| 	// out[id * dstEvery + 4] = uint8(v3.entry >> 8) | ||||
| 	MOVL AX, (R8) | ||||
| 	MOVL AX, (BX)(R8*1) | ||||
|  | ||||
| 	// update the bitreader structure | ||||
| 	MOVQ R12, 80(R11) | ||||
| 	MOVB R13, 88(R11) | ||||
| 	ADDQ R9, R8 | ||||
| 	MOVQ R11, 80(R10) | ||||
| 	MOVB R12, 88(R10) | ||||
|  | ||||
| 	// br2.fillFast32() | ||||
| 	MOVQ    128(R11), R12 | ||||
| 	MOVBQZX 136(R11), R13 | ||||
| 	CMPQ    R13, $0x20 | ||||
| 	MOVQ    128(R10), R11 | ||||
| 	MOVBQZX 136(R10), R12 | ||||
| 	CMPQ    R12, $0x20 | ||||
| 	JBE     skip_fill2 | ||||
| 	MOVQ    120(R11), R14 | ||||
| 	SUBQ    $0x20, R13 | ||||
| 	SUBQ    $0x04, R14 | ||||
| 	MOVQ    96(R11), R15 | ||||
| 	MOVQ    120(R10), R13 | ||||
| 	SUBQ    $0x20, R12 | ||||
| 	SUBQ    $0x04, R13 | ||||
| 	MOVQ    96(R10), R14 | ||||
|  | ||||
| 	// b.value |= uint64(low) << (b.bitsRead & 63) | ||||
| 	MOVL (R14)(R15*1), R15 | ||||
| 	MOVQ R13, CX | ||||
| 	SHLQ CL, R15 | ||||
| 	MOVQ R14, 120(R11) | ||||
| 	ORQ  R15, R12 | ||||
| 	MOVL (R13)(R14*1), R14 | ||||
| 	MOVQ R12, CX | ||||
| 	SHLQ CL, R14 | ||||
| 	MOVQ R13, 120(R10) | ||||
| 	ORQ  R14, R11 | ||||
|  | ||||
| 	// exhausted = exhausted || (br2.off < 4) | ||||
| 	CMPQ  R14, $0x04 | ||||
| 	SETLT AL | ||||
| 	ORB   AL, DL | ||||
| 	// exhausted += (br2.off < 4) | ||||
| 	CMPQ R13, $0x04 | ||||
| 	ADCB $+0, DL | ||||
|  | ||||
| skip_fill2: | ||||
| 	// val0 := br2.peekTopBits(peekBits) | ||||
| 	MOVQ R12, R14 | ||||
| 	MOVQ R11, R13 | ||||
| 	MOVQ DI, CX | ||||
| 	SHRQ CL, R14 | ||||
| 	SHRQ CL, R13 | ||||
|  | ||||
| 	// v0 := table[val0&mask] | ||||
| 	MOVW (R10)(R14*2), CX | ||||
| 	MOVW (R9)(R13*2), CX | ||||
|  | ||||
| 	// br2.advance(uint8(v0.entry) | ||||
| 	MOVB CH, AL | ||||
| 	SHLQ CL, R12 | ||||
| 	ADDB CL, R13 | ||||
| 	SHLQ CL, R11 | ||||
| 	ADDB CL, R12 | ||||
|  | ||||
| 	// val1 := br2.peekTopBits(peekBits) | ||||
| 	MOVQ R12, R14 | ||||
| 	MOVQ R11, R13 | ||||
| 	MOVQ DI, CX | ||||
| 	SHRQ CL, R14 | ||||
| 	SHRQ CL, R13 | ||||
|  | ||||
| 	// v1 := table[val0&mask] | ||||
| 	MOVW (R10)(R14*2), CX | ||||
| 	MOVW (R9)(R13*2), CX | ||||
|  | ||||
| 	// br2.advance(uint8(v1.entry) | ||||
| 	MOVB   CH, AH | ||||
| 	SHLQ   CL, R12 | ||||
| 	ADDB   CL, R13 | ||||
| 	SHLQ   CL, R11 | ||||
| 	ADDB   CL, R12 | ||||
| 	BSWAPL AX | ||||
|  | ||||
| 	// val2 := br2.peekTopBits(peekBits) | ||||
| 	MOVQ R12, R14 | ||||
| 	MOVQ R11, R13 | ||||
| 	MOVQ DI, CX | ||||
| 	SHRQ CL, R14 | ||||
| 	SHRQ CL, R13 | ||||
|  | ||||
| 	// v2 := table[val0&mask] | ||||
| 	MOVW (R10)(R14*2), CX | ||||
| 	MOVW (R9)(R13*2), CX | ||||
|  | ||||
| 	// br2.advance(uint8(v2.entry) | ||||
| 	MOVB CH, AH | ||||
| 	SHLQ CL, R12 | ||||
| 	ADDB CL, R13 | ||||
| 	SHLQ CL, R11 | ||||
| 	ADDB CL, R12 | ||||
|  | ||||
| 	// val3 := br2.peekTopBits(peekBits) | ||||
| 	MOVQ R12, R14 | ||||
| 	MOVQ R11, R13 | ||||
| 	MOVQ DI, CX | ||||
| 	SHRQ CL, R14 | ||||
| 	SHRQ CL, R13 | ||||
|  | ||||
| 	// v3 := table[val0&mask] | ||||
| 	MOVW (R10)(R14*2), CX | ||||
| 	MOVW (R9)(R13*2), CX | ||||
|  | ||||
| 	// br2.advance(uint8(v3.entry) | ||||
| 	MOVB   CH, AL | ||||
| 	SHLQ   CL, R12 | ||||
| 	ADDB   CL, R13 | ||||
| 	SHLQ   CL, R11 | ||||
| 	ADDB   CL, R12 | ||||
| 	BSWAPL AX | ||||
|  | ||||
| 	// these four writes get coalesced | ||||
| @@ -543,88 +528,86 @@ skip_fill2: | ||||
| 	// out[id * dstEvery + 1] = uint8(v1.entry >> 8) | ||||
| 	// out[id * dstEvery + 3] = uint8(v2.entry >> 8) | ||||
| 	// out[id * dstEvery + 4] = uint8(v3.entry >> 8) | ||||
| 	MOVL AX, (R8) | ||||
| 	MOVL AX, (BX)(R8*2) | ||||
|  | ||||
| 	// update the bitreader structure | ||||
| 	MOVQ R12, 128(R11) | ||||
| 	MOVB R13, 136(R11) | ||||
| 	ADDQ R9, R8 | ||||
| 	MOVQ R11, 128(R10) | ||||
| 	MOVB R12, 136(R10) | ||||
|  | ||||
| 	// br3.fillFast32() | ||||
| 	MOVQ    176(R11), R12 | ||||
| 	MOVBQZX 184(R11), R13 | ||||
| 	CMPQ    R13, $0x20 | ||||
| 	MOVQ    176(R10), R11 | ||||
| 	MOVBQZX 184(R10), R12 | ||||
| 	CMPQ    R12, $0x20 | ||||
| 	JBE     skip_fill3 | ||||
| 	MOVQ    168(R11), R14 | ||||
| 	SUBQ    $0x20, R13 | ||||
| 	SUBQ    $0x04, R14 | ||||
| 	MOVQ    144(R11), R15 | ||||
| 	MOVQ    168(R10), R13 | ||||
| 	SUBQ    $0x20, R12 | ||||
| 	SUBQ    $0x04, R13 | ||||
| 	MOVQ    144(R10), R14 | ||||
|  | ||||
| 	// b.value |= uint64(low) << (b.bitsRead & 63) | ||||
| 	MOVL (R14)(R15*1), R15 | ||||
| 	MOVQ R13, CX | ||||
| 	SHLQ CL, R15 | ||||
| 	MOVQ R14, 168(R11) | ||||
| 	ORQ  R15, R12 | ||||
| 	MOVL (R13)(R14*1), R14 | ||||
| 	MOVQ R12, CX | ||||
| 	SHLQ CL, R14 | ||||
| 	MOVQ R13, 168(R10) | ||||
| 	ORQ  R14, R11 | ||||
|  | ||||
| 	// exhausted = exhausted || (br3.off < 4) | ||||
| 	CMPQ  R14, $0x04 | ||||
| 	SETLT AL | ||||
| 	ORB   AL, DL | ||||
| 	// exhausted += (br3.off < 4) | ||||
| 	CMPQ R13, $0x04 | ||||
| 	ADCB $+0, DL | ||||
|  | ||||
| skip_fill3: | ||||
| 	// val0 := br3.peekTopBits(peekBits) | ||||
| 	MOVQ R12, R14 | ||||
| 	MOVQ R11, R13 | ||||
| 	MOVQ DI, CX | ||||
| 	SHRQ CL, R14 | ||||
| 	SHRQ CL, R13 | ||||
|  | ||||
| 	// v0 := table[val0&mask] | ||||
| 	MOVW (R10)(R14*2), CX | ||||
| 	MOVW (R9)(R13*2), CX | ||||
|  | ||||
| 	// br3.advance(uint8(v0.entry) | ||||
| 	MOVB CH, AL | ||||
| 	SHLQ CL, R12 | ||||
| 	ADDB CL, R13 | ||||
| 	SHLQ CL, R11 | ||||
| 	ADDB CL, R12 | ||||
|  | ||||
| 	// val1 := br3.peekTopBits(peekBits) | ||||
| 	MOVQ R12, R14 | ||||
| 	MOVQ R11, R13 | ||||
| 	MOVQ DI, CX | ||||
| 	SHRQ CL, R14 | ||||
| 	SHRQ CL, R13 | ||||
|  | ||||
| 	// v1 := table[val0&mask] | ||||
| 	MOVW (R10)(R14*2), CX | ||||
| 	MOVW (R9)(R13*2), CX | ||||
|  | ||||
| 	// br3.advance(uint8(v1.entry) | ||||
| 	MOVB   CH, AH | ||||
| 	SHLQ   CL, R12 | ||||
| 	ADDB   CL, R13 | ||||
| 	SHLQ   CL, R11 | ||||
| 	ADDB   CL, R12 | ||||
| 	BSWAPL AX | ||||
|  | ||||
| 	// val2 := br3.peekTopBits(peekBits) | ||||
| 	MOVQ R12, R14 | ||||
| 	MOVQ R11, R13 | ||||
| 	MOVQ DI, CX | ||||
| 	SHRQ CL, R14 | ||||
| 	SHRQ CL, R13 | ||||
|  | ||||
| 	// v2 := table[val0&mask] | ||||
| 	MOVW (R10)(R14*2), CX | ||||
| 	MOVW (R9)(R13*2), CX | ||||
|  | ||||
| 	// br3.advance(uint8(v2.entry) | ||||
| 	MOVB CH, AH | ||||
| 	SHLQ CL, R12 | ||||
| 	ADDB CL, R13 | ||||
| 	SHLQ CL, R11 | ||||
| 	ADDB CL, R12 | ||||
|  | ||||
| 	// val3 := br3.peekTopBits(peekBits) | ||||
| 	MOVQ R12, R14 | ||||
| 	MOVQ R11, R13 | ||||
| 	MOVQ DI, CX | ||||
| 	SHRQ CL, R14 | ||||
| 	SHRQ CL, R13 | ||||
|  | ||||
| 	// v3 := table[val0&mask] | ||||
| 	MOVW (R10)(R14*2), CX | ||||
| 	MOVW (R9)(R13*2), CX | ||||
|  | ||||
| 	// br3.advance(uint8(v3.entry) | ||||
| 	MOVB   CH, AL | ||||
| 	SHLQ   CL, R12 | ||||
| 	ADDB   CL, R13 | ||||
| 	SHLQ   CL, R11 | ||||
| 	ADDB   CL, R12 | ||||
| 	BSWAPL AX | ||||
|  | ||||
| 	// these four writes get coalesced | ||||
| @@ -632,11 +615,12 @@ skip_fill3: | ||||
| 	// out[id * dstEvery + 1] = uint8(v1.entry >> 8) | ||||
| 	// out[id * dstEvery + 3] = uint8(v2.entry >> 8) | ||||
| 	// out[id * dstEvery + 4] = uint8(v3.entry >> 8) | ||||
| 	MOVL AX, (R8) | ||||
| 	LEAQ (R8)(R8*2), CX | ||||
| 	MOVL AX, (BX)(CX*1) | ||||
|  | ||||
| 	// update the bitreader structure | ||||
| 	MOVQ  R12, 176(R11) | ||||
| 	MOVB  R13, 184(R11) | ||||
| 	MOVQ  R11, 176(R10) | ||||
| 	MOVB  R12, 184(R10) | ||||
| 	ADDQ  $0x04, BX | ||||
| 	TESTB DL, DL | ||||
| 	JZ    main_loop | ||||
| @@ -652,7 +636,7 @@ TEXT ·decompress1x_main_loop_amd64(SB), $0-8 | ||||
| 	MOVQ    16(CX), DX | ||||
| 	MOVQ    24(CX), BX | ||||
| 	CMPQ    BX, $0x04 | ||||
| 	JB      error_max_decoded_size_exeeded | ||||
| 	JB      error_max_decoded_size_exceeded | ||||
| 	LEAQ    (DX)(BX*1), BX | ||||
| 	MOVQ    (CX), SI | ||||
| 	MOVQ    (SI), R8 | ||||
| @@ -667,7 +651,7 @@ main_loop: | ||||
| 	// Check if we have room for 4 bytes in the output buffer | ||||
| 	LEAQ 4(DX), CX | ||||
| 	CMPQ CX, BX | ||||
| 	JGE  error_max_decoded_size_exeeded | ||||
| 	JGE  error_max_decoded_size_exceeded | ||||
|  | ||||
| 	// Decode 4 values | ||||
| 	CMPQ R11, $0x20 | ||||
| @@ -744,7 +728,7 @@ loop_condition: | ||||
| 	RET | ||||
|  | ||||
| 	// Report error | ||||
| error_max_decoded_size_exeeded: | ||||
| error_max_decoded_size_exceeded: | ||||
| 	MOVQ ctx+0(FP), AX | ||||
| 	MOVQ $-1, CX | ||||
| 	MOVQ CX, 40(AX) | ||||
| @@ -757,7 +741,7 @@ TEXT ·decompress1x_main_loop_bmi2(SB), $0-8 | ||||
| 	MOVQ    16(CX), DX | ||||
| 	MOVQ    24(CX), BX | ||||
| 	CMPQ    BX, $0x04 | ||||
| 	JB      error_max_decoded_size_exeeded | ||||
| 	JB      error_max_decoded_size_exceeded | ||||
| 	LEAQ    (DX)(BX*1), BX | ||||
| 	MOVQ    (CX), SI | ||||
| 	MOVQ    (SI), R8 | ||||
| @@ -772,7 +756,7 @@ main_loop: | ||||
| 	// Check if we have room for 4 bytes in the output buffer | ||||
| 	LEAQ 4(DX), CX | ||||
| 	CMPQ CX, BX | ||||
| 	JGE  error_max_decoded_size_exeeded | ||||
| 	JGE  error_max_decoded_size_exceeded | ||||
|  | ||||
| 	// Decode 4 values | ||||
| 	CMPQ  R11, $0x20 | ||||
| @@ -839,7 +823,7 @@ loop_condition: | ||||
| 	RET | ||||
|  | ||||
| 	// Report error | ||||
| error_max_decoded_size_exeeded: | ||||
| error_max_decoded_size_exceeded: | ||||
| 	MOVQ ctx+0(FP), AX | ||||
| 	MOVQ $-1, CX | ||||
| 	MOVQ CX, 40(AX) | ||||
|   | ||||
							
								
								
									
										22
									
								
								vendor/github.com/klauspost/compress/internal/snapref/encode_other.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										22
									
								
								vendor/github.com/klauspost/compress/internal/snapref/encode_other.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -103,6 +103,28 @@ func hash(u, shift uint32) uint32 { | ||||
| 	return (u * 0x1e35a7bd) >> shift | ||||
| } | ||||
|  | ||||
| // EncodeBlockInto exposes encodeBlock but checks dst size. | ||||
| func EncodeBlockInto(dst, src []byte) (d int) { | ||||
| 	if MaxEncodedLen(len(src)) > len(dst) { | ||||
| 		return 0 | ||||
| 	} | ||||
|  | ||||
| 	// encodeBlock breaks on too big blocks, so split. | ||||
| 	for len(src) > 0 { | ||||
| 		p := src | ||||
| 		src = nil | ||||
| 		if len(p) > maxBlockSize { | ||||
| 			p, src = p[:maxBlockSize], p[maxBlockSize:] | ||||
| 		} | ||||
| 		if len(p) < minNonLiteralBlockSize { | ||||
| 			d += emitLiteral(dst[d:], p) | ||||
| 		} else { | ||||
| 			d += encodeBlock(dst[d:], p) | ||||
| 		} | ||||
| 	} | ||||
| 	return d | ||||
| } | ||||
|  | ||||
| // encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It | ||||
| // assumes that the varint-encoded length of the decompressed bytes has already | ||||
| // been written. | ||||
|   | ||||
							
								
								
									
										16
									
								
								vendor/github.com/klauspost/compress/zstd/blockdec.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										16
									
								
								vendor/github.com/klauspost/compress/zstd/blockdec.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -82,8 +82,9 @@ type blockDec struct { | ||||
|  | ||||
| 	err error | ||||
|  | ||||
| 	// Check against this crc | ||||
| 	checkCRC []byte | ||||
| 	// Check against this crc, if hasCRC is true. | ||||
| 	checkCRC uint32 | ||||
| 	hasCRC   bool | ||||
|  | ||||
| 	// Frame to use for singlethreaded decoding. | ||||
| 	// Should not be used by the decoder itself since parent may be another frame. | ||||
| @@ -191,16 +192,14 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error { | ||||
| 	} | ||||
|  | ||||
| 	// Read block data. | ||||
| 	if cap(b.dataStorage) < cSize { | ||||
| 	if _, ok := br.(*byteBuf); !ok && cap(b.dataStorage) < cSize { | ||||
| 		// byteBuf doesn't need a destination buffer. | ||||
| 		if b.lowMem || cSize > maxCompressedBlockSize { | ||||
| 			b.dataStorage = make([]byte, 0, cSize+compressedBlockOverAlloc) | ||||
| 		} else { | ||||
| 			b.dataStorage = make([]byte, 0, maxCompressedBlockSizeAlloc) | ||||
| 		} | ||||
| 	} | ||||
| 	if cap(b.dst) <= maxSize { | ||||
| 		b.dst = make([]byte, 0, maxSize+1) | ||||
| 	} | ||||
| 	b.data, err = br.readBig(cSize, b.dataStorage) | ||||
| 	if err != nil { | ||||
| 		if debugDecoder { | ||||
| @@ -209,6 +208,9 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error { | ||||
| 		} | ||||
| 		return err | ||||
| 	} | ||||
| 	if cap(b.dst) <= maxSize { | ||||
| 		b.dst = make([]byte, 0, maxSize+1) | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| @@ -232,7 +234,7 @@ func (b *blockDec) decodeBuf(hist *history) error { | ||||
| 			if b.lowMem { | ||||
| 				b.dst = make([]byte, b.RLESize) | ||||
| 			} else { | ||||
| 				b.dst = make([]byte, maxBlockSize) | ||||
| 				b.dst = make([]byte, maxCompressedBlockSize) | ||||
| 			} | ||||
| 		} | ||||
| 		b.dst = b.dst[:b.RLESize] | ||||
|   | ||||
							
								
								
									
										9
									
								
								vendor/github.com/klauspost/compress/zstd/decodeheader.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										9
									
								
								vendor/github.com/klauspost/compress/zstd/decodeheader.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -4,7 +4,6 @@ | ||||
| package zstd | ||||
|  | ||||
| import ( | ||||
| 	"bytes" | ||||
| 	"encoding/binary" | ||||
| 	"errors" | ||||
| 	"io" | ||||
| @@ -102,8 +101,8 @@ func (h *Header) Decode(in []byte) error { | ||||
| 	} | ||||
| 	h.HeaderSize += 4 | ||||
| 	b, in := in[:4], in[4:] | ||||
| 	if !bytes.Equal(b, frameMagic) { | ||||
| 		if !bytes.Equal(b[1:4], skippableFrameMagic) || b[0]&0xf0 != 0x50 { | ||||
| 	if string(b) != frameMagic { | ||||
| 		if string(b[1:4]) != skippableFrameMagic || b[0]&0xf0 != 0x50 { | ||||
| 			return ErrMagicMismatch | ||||
| 		} | ||||
| 		if len(in) < 4 { | ||||
| @@ -153,7 +152,7 @@ func (h *Header) Decode(in []byte) error { | ||||
| 		} | ||||
| 		b, in = in[:size], in[size:] | ||||
| 		h.HeaderSize += int(size) | ||||
| 		switch size { | ||||
| 		switch len(b) { | ||||
| 		case 1: | ||||
| 			h.DictionaryID = uint32(b[0]) | ||||
| 		case 2: | ||||
| @@ -183,7 +182,7 @@ func (h *Header) Decode(in []byte) error { | ||||
| 		} | ||||
| 		b, in = in[:fcsSize], in[fcsSize:] | ||||
| 		h.HeaderSize += int(fcsSize) | ||||
| 		switch fcsSize { | ||||
| 		switch len(b) { | ||||
| 		case 1: | ||||
| 			h.FrameContentSize = uint64(b[0]) | ||||
| 		case 2: | ||||
|   | ||||
							
								
								
									
										93
									
								
								vendor/github.com/klauspost/compress/zstd/decoder.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										93
									
								
								vendor/github.com/klauspost/compress/zstd/decoder.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -5,7 +5,6 @@ | ||||
| package zstd | ||||
|  | ||||
| import ( | ||||
| 	"bytes" | ||||
| 	"context" | ||||
| 	"encoding/binary" | ||||
| 	"io" | ||||
| @@ -41,8 +40,7 @@ type Decoder struct { | ||||
| 	frame *frameDec | ||||
|  | ||||
| 	// Custom dictionaries. | ||||
| 	// Always uses copies. | ||||
| 	dicts map[uint32]dict | ||||
| 	dicts map[uint32]*dict | ||||
|  | ||||
| 	// streamWg is the waitgroup for all streams | ||||
| 	streamWg sync.WaitGroup | ||||
| @@ -104,7 +102,7 @@ func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) { | ||||
| 	} | ||||
|  | ||||
| 	// Transfer option dicts. | ||||
| 	d.dicts = make(map[uint32]dict, len(d.o.dicts)) | ||||
| 	d.dicts = make(map[uint32]*dict, len(d.o.dicts)) | ||||
| 	for _, dc := range d.o.dicts { | ||||
| 		d.dicts[dc.id] = dc | ||||
| 	} | ||||
| @@ -342,15 +340,8 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) { | ||||
| 			} | ||||
| 			return dst, err | ||||
| 		} | ||||
| 		if frame.DictionaryID != nil { | ||||
| 			dict, ok := d.dicts[*frame.DictionaryID] | ||||
| 			if !ok { | ||||
| 				return nil, ErrUnknownDictionary | ||||
| 			} | ||||
| 			if debugDecoder { | ||||
| 				println("setting dict", frame.DictionaryID) | ||||
| 			} | ||||
| 			frame.history.setDict(&dict) | ||||
| 		if err = d.setDict(frame); err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 		if frame.WindowSize > d.o.maxWindowSize { | ||||
| 			if debugDecoder { | ||||
| @@ -459,7 +450,11 @@ func (d *Decoder) nextBlock(blocking bool) (ok bool) { | ||||
| 		println("got", len(d.current.b), "bytes, error:", d.current.err, "data crc:", tmp) | ||||
| 	} | ||||
|  | ||||
| 	if !d.o.ignoreChecksum && len(next.b) > 0 { | ||||
| 	if d.o.ignoreChecksum { | ||||
| 		return true | ||||
| 	} | ||||
|  | ||||
| 	if len(next.b) > 0 { | ||||
| 		n, err := d.current.crc.Write(next.b) | ||||
| 		if err == nil { | ||||
| 			if n != len(next.b) { | ||||
| @@ -467,18 +462,16 @@ func (d *Decoder) nextBlock(blocking bool) (ok bool) { | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	if next.err == nil && next.d != nil && len(next.d.checkCRC) != 0 { | ||||
| 		got := d.current.crc.Sum64() | ||||
| 		var tmp [4]byte | ||||
| 		binary.LittleEndian.PutUint32(tmp[:], uint32(got)) | ||||
| 		if !d.o.ignoreChecksum && !bytes.Equal(tmp[:], next.d.checkCRC) { | ||||
| 	if next.err == nil && next.d != nil && next.d.hasCRC { | ||||
| 		got := uint32(d.current.crc.Sum64()) | ||||
| 		if got != next.d.checkCRC { | ||||
| 			if debugDecoder { | ||||
| 				println("CRC Check Failed:", tmp[:], " (got) !=", next.d.checkCRC, "(on stream)") | ||||
| 				printf("CRC Check Failed: %08x (got) != %08x (on stream)\n", got, next.d.checkCRC) | ||||
| 			} | ||||
| 			d.current.err = ErrCRCMismatch | ||||
| 		} else { | ||||
| 			if debugDecoder { | ||||
| 				println("CRC ok", tmp[:]) | ||||
| 				printf("CRC ok %08x\n", got) | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| @@ -494,18 +487,12 @@ func (d *Decoder) nextBlockSync() (ok bool) { | ||||
| 		if !d.syncStream.inFrame { | ||||
| 			d.frame.history.reset() | ||||
| 			d.current.err = d.frame.reset(&d.syncStream.br) | ||||
| 			if d.current.err == nil { | ||||
| 				d.current.err = d.setDict(d.frame) | ||||
| 			} | ||||
| 			if d.current.err != nil { | ||||
| 				return false | ||||
| 			} | ||||
| 			if d.frame.DictionaryID != nil { | ||||
| 				dict, ok := d.dicts[*d.frame.DictionaryID] | ||||
| 				if !ok { | ||||
| 					d.current.err = ErrUnknownDictionary | ||||
| 					return false | ||||
| 				} else { | ||||
| 					d.frame.history.setDict(&dict) | ||||
| 				} | ||||
| 			} | ||||
| 			if d.frame.WindowSize > d.o.maxDecodedSize || d.frame.WindowSize > d.o.maxWindowSize { | ||||
| 				d.current.err = ErrDecoderSizeExceeded | ||||
| 				return false | ||||
| @@ -770,7 +757,7 @@ func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output ch | ||||
| 					if block.lowMem { | ||||
| 						block.dst = make([]byte, block.RLESize) | ||||
| 					} else { | ||||
| 						block.dst = make([]byte, maxBlockSize) | ||||
| 						block.dst = make([]byte, maxCompressedBlockSize) | ||||
| 					} | ||||
| 				} | ||||
| 				block.dst = block.dst[:block.RLESize] | ||||
| @@ -864,13 +851,8 @@ decodeStream: | ||||
| 		if debugDecoder && err != nil { | ||||
| 			println("Frame decoder returned", err) | ||||
| 		} | ||||
| 		if err == nil && frame.DictionaryID != nil { | ||||
| 			dict, ok := d.dicts[*frame.DictionaryID] | ||||
| 			if !ok { | ||||
| 				err = ErrUnknownDictionary | ||||
| 			} else { | ||||
| 				frame.history.setDict(&dict) | ||||
| 			} | ||||
| 		if err == nil { | ||||
| 			err = d.setDict(frame) | ||||
| 		} | ||||
| 		if err == nil && d.frame.WindowSize > d.o.maxWindowSize { | ||||
| 			if debugDecoder { | ||||
| @@ -918,18 +900,22 @@ decodeStream: | ||||
| 				println("next block returned error:", err) | ||||
| 			} | ||||
| 			dec.err = err | ||||
| 			dec.checkCRC = nil | ||||
| 			dec.hasCRC = false | ||||
| 			if dec.Last && frame.HasCheckSum && err == nil { | ||||
| 				crc, err := frame.rawInput.readSmall(4) | ||||
| 				if err != nil { | ||||
| 				if len(crc) < 4 { | ||||
| 					if err == nil { | ||||
| 						err = io.ErrUnexpectedEOF | ||||
|  | ||||
| 					} | ||||
| 					println("CRC missing?", err) | ||||
| 					dec.err = err | ||||
| 				} | ||||
| 				var tmp [4]byte | ||||
| 				copy(tmp[:], crc) | ||||
| 				dec.checkCRC = tmp[:] | ||||
| 				} else { | ||||
| 					dec.checkCRC = binary.LittleEndian.Uint32(crc) | ||||
| 					dec.hasCRC = true | ||||
| 					if debugDecoder { | ||||
| 					println("found crc to check:", dec.checkCRC) | ||||
| 						printf("found crc to check: %08x\n", dec.checkCRC) | ||||
| 					} | ||||
| 				} | ||||
| 			} | ||||
| 			err = dec.err | ||||
| @@ -948,3 +934,20 @@ decodeStream: | ||||
| 	hist.reset() | ||||
| 	d.frame.history.b = frameHistCache | ||||
| } | ||||
|  | ||||
| func (d *Decoder) setDict(frame *frameDec) (err error) { | ||||
| 	dict, ok := d.dicts[frame.DictionaryID] | ||||
| 	if ok { | ||||
| 		if debugDecoder { | ||||
| 			println("setting dict", frame.DictionaryID) | ||||
| 		} | ||||
| 		frame.history.setDict(dict) | ||||
| 	} else if frame.DictionaryID != 0 { | ||||
| 		// A zero or missing dictionary id is ambiguous: | ||||
| 		// either dictionary zero, or no dictionary. In particular, | ||||
| 		// zstd --patch-from uses this id for the source file, | ||||
| 		// so only return an error if the dictionary id is not zero. | ||||
| 		err = ErrUnknownDictionary | ||||
| 	} | ||||
| 	return err | ||||
| } | ||||
|   | ||||
							
								
								
									
										26
									
								
								vendor/github.com/klauspost/compress/zstd/decoder_options.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										26
									
								
								vendor/github.com/klauspost/compress/zstd/decoder_options.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -6,6 +6,8 @@ package zstd | ||||
|  | ||||
| import ( | ||||
| 	"errors" | ||||
| 	"fmt" | ||||
| 	"math/bits" | ||||
| 	"runtime" | ||||
| ) | ||||
|  | ||||
| @@ -18,7 +20,7 @@ type decoderOptions struct { | ||||
| 	concurrent      int | ||||
| 	maxDecodedSize  uint64 | ||||
| 	maxWindowSize   uint64 | ||||
| 	dicts           []dict | ||||
| 	dicts           []*dict | ||||
| 	ignoreChecksum  bool | ||||
| 	limitToCap      bool | ||||
| 	decodeBufsBelow int | ||||
| @@ -85,7 +87,13 @@ func WithDecoderMaxMemory(n uint64) DOption { | ||||
| } | ||||
|  | ||||
| // WithDecoderDicts allows to register one or more dictionaries for the decoder. | ||||
| // If several dictionaries with the same ID is provided the last one will be used. | ||||
| // | ||||
| // Each slice in dict must be in the [dictionary format] produced by | ||||
| // "zstd --train" from the Zstandard reference implementation. | ||||
| // | ||||
| // If several dictionaries with the same ID are provided, the last one will be used. | ||||
| // | ||||
| // [dictionary format]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format | ||||
| func WithDecoderDicts(dicts ...[]byte) DOption { | ||||
| 	return func(o *decoderOptions) error { | ||||
| 		for _, b := range dicts { | ||||
| @@ -93,12 +101,24 @@ func WithDecoderDicts(dicts ...[]byte) DOption { | ||||
| 			if err != nil { | ||||
| 				return err | ||||
| 			} | ||||
| 			o.dicts = append(o.dicts, *d) | ||||
| 			o.dicts = append(o.dicts, d) | ||||
| 		} | ||||
| 		return nil | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // WithEncoderDictRaw registers a dictionary that may be used by the decoder. | ||||
| // The slice content can be arbitrary data. | ||||
| func WithDecoderDictRaw(id uint32, content []byte) DOption { | ||||
| 	return func(o *decoderOptions) error { | ||||
| 		if bits.UintSize > 32 && uint(len(content)) > dictMaxLength { | ||||
| 			return fmt.Errorf("dictionary of size %d > 2GiB too large", len(content)) | ||||
| 		} | ||||
| 		o.dicts = append(o.dicts, &dict{id: id, content: content, offsets: [3]int{1, 4, 8}}) | ||||
| 		return nil | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // WithDecoderMaxWindow allows to set a maximum window size for decodes. | ||||
| // This allows rejecting packets that will cause big memory usage. | ||||
| // The Decoder will likely allocate more memory based on the WithDecoderLowmem setting. | ||||
|   | ||||
							
								
								
									
										51
									
								
								vendor/github.com/klauspost/compress/zstd/dict.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										51
									
								
								vendor/github.com/klauspost/compress/zstd/dict.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -1,7 +1,6 @@ | ||||
| package zstd | ||||
|  | ||||
| import ( | ||||
| 	"bytes" | ||||
| 	"encoding/binary" | ||||
| 	"errors" | ||||
| 	"fmt" | ||||
| @@ -20,7 +19,10 @@ type dict struct { | ||||
| 	content []byte | ||||
| } | ||||
|  | ||||
| var dictMagic = [4]byte{0x37, 0xa4, 0x30, 0xec} | ||||
| const dictMagic = "\x37\xa4\x30\xec" | ||||
|  | ||||
| // Maximum dictionary size for the reference implementation (1.5.3) is 2 GiB. | ||||
| const dictMaxLength = 1 << 31 | ||||
|  | ||||
| // ID returns the dictionary id or 0 if d is nil. | ||||
| func (d *dict) ID() uint32 { | ||||
| @@ -30,14 +32,38 @@ func (d *dict) ID() uint32 { | ||||
| 	return d.id | ||||
| } | ||||
|  | ||||
| // DictContentSize returns the dictionary content size or 0 if d is nil. | ||||
| func (d *dict) DictContentSize() int { | ||||
| // ContentSize returns the dictionary content size or 0 if d is nil. | ||||
| func (d *dict) ContentSize() int { | ||||
| 	if d == nil { | ||||
| 		return 0 | ||||
| 	} | ||||
| 	return len(d.content) | ||||
| } | ||||
|  | ||||
| // Content returns the dictionary content. | ||||
| func (d *dict) Content() []byte { | ||||
| 	if d == nil { | ||||
| 		return nil | ||||
| 	} | ||||
| 	return d.content | ||||
| } | ||||
|  | ||||
| // Offsets returns the initial offsets. | ||||
| func (d *dict) Offsets() [3]int { | ||||
| 	if d == nil { | ||||
| 		return [3]int{} | ||||
| 	} | ||||
| 	return d.offsets | ||||
| } | ||||
|  | ||||
| // LitEncoder returns the literal encoder. | ||||
| func (d *dict) LitEncoder() *huff0.Scratch { | ||||
| 	if d == nil { | ||||
| 		return nil | ||||
| 	} | ||||
| 	return d.litEnc | ||||
| } | ||||
|  | ||||
| // Load a dictionary as described in | ||||
| // https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format | ||||
| func loadDict(b []byte) (*dict, error) { | ||||
| @@ -50,7 +76,7 @@ func loadDict(b []byte) (*dict, error) { | ||||
| 		ofDec: sequenceDec{fse: &fseDecoder{}}, | ||||
| 		mlDec: sequenceDec{fse: &fseDecoder{}}, | ||||
| 	} | ||||
| 	if !bytes.Equal(b[:4], dictMagic[:]) { | ||||
| 	if string(b[:4]) != dictMagic { | ||||
| 		return nil, ErrMagicMismatch | ||||
| 	} | ||||
| 	d.id = binary.LittleEndian.Uint32(b[4:8]) | ||||
| @@ -62,7 +88,7 @@ func loadDict(b []byte) (*dict, error) { | ||||
| 	var err error | ||||
| 	d.litEnc, b, err = huff0.ReadTable(b[8:], nil) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 		return nil, fmt.Errorf("loading literal table: %w", err) | ||||
| 	} | ||||
| 	d.litEnc.Reuse = huff0.ReusePolicyMust | ||||
|  | ||||
| @@ -120,3 +146,16 @@ func loadDict(b []byte) (*dict, error) { | ||||
|  | ||||
| 	return &d, nil | ||||
| } | ||||
|  | ||||
| // InspectDictionary loads a zstd dictionary and provides functions to inspect the content. | ||||
| func InspectDictionary(b []byte) (interface { | ||||
| 	ID() uint32 | ||||
| 	ContentSize() int | ||||
| 	Content() []byte | ||||
| 	Offsets() [3]int | ||||
| 	LitEncoder() *huff0.Scratch | ||||
| }, error) { | ||||
| 	initPredefined() | ||||
| 	d, err := loadDict(b) | ||||
| 	return d, err | ||||
| } | ||||
|   | ||||
							
								
								
									
										28
									
								
								vendor/github.com/klauspost/compress/zstd/enc_base.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										28
									
								
								vendor/github.com/klauspost/compress/zstd/enc_base.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -16,6 +16,7 @@ type fastBase struct { | ||||
| 	cur int32 | ||||
| 	// maximum offset. Should be at least 2x block size. | ||||
| 	maxMatchOff int32 | ||||
| 	bufferReset int32 | ||||
| 	hist        []byte | ||||
| 	crc         *xxhash.Digest | ||||
| 	tmp         [8]byte | ||||
| @@ -56,8 +57,8 @@ func (e *fastBase) Block() *blockEnc { | ||||
| } | ||||
|  | ||||
| func (e *fastBase) addBlock(src []byte) int32 { | ||||
| 	if debugAsserts && e.cur > bufferReset { | ||||
| 		panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, bufferReset)) | ||||
| 	if debugAsserts && e.cur > e.bufferReset { | ||||
| 		panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, e.bufferReset)) | ||||
| 	} | ||||
| 	// check if we have space already | ||||
| 	if len(e.hist)+len(src) > cap(e.hist) { | ||||
| @@ -126,24 +127,7 @@ func (e *fastBase) matchlen(s, t int32, src []byte) int32 { | ||||
| 			panic(fmt.Sprintf("len(src)-s (%d) > maxCompressedBlockSize (%d)", len(src)-int(s), maxCompressedBlockSize)) | ||||
| 		} | ||||
| 	} | ||||
| 	a := src[s:] | ||||
| 	b := src[t:] | ||||
| 	b = b[:len(a)] | ||||
| 	end := int32((len(a) >> 3) << 3) | ||||
| 	for i := int32(0); i < end; i += 8 { | ||||
| 		if diff := load6432(a, i) ^ load6432(b, i); diff != 0 { | ||||
| 			return i + int32(bits.TrailingZeros64(diff)>>3) | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	a = a[end:] | ||||
| 	b = b[end:] | ||||
| 	for i := range a { | ||||
| 		if a[i] != b[i] { | ||||
| 			return int32(i) + end | ||||
| 		} | ||||
| 	} | ||||
| 	return int32(len(a)) + end | ||||
| 	return int32(matchLen(src[s:], src[t:])) | ||||
| } | ||||
|  | ||||
| // Reset the encoding table. | ||||
| @@ -165,13 +149,13 @@ func (e *fastBase) resetBase(d *dict, singleBlock bool) { | ||||
| 		if singleBlock { | ||||
| 			e.lowMem = true | ||||
| 		} | ||||
| 		e.ensureHist(d.DictContentSize() + maxCompressedBlockSize) | ||||
| 		e.ensureHist(d.ContentSize() + maxCompressedBlockSize) | ||||
| 		e.lowMem = low | ||||
| 	} | ||||
|  | ||||
| 	// We offset current position so everything will be out of reach. | ||||
| 	// If above reset line, history will be purged. | ||||
| 	if e.cur < bufferReset { | ||||
| 	if e.cur < e.bufferReset { | ||||
| 		e.cur += e.maxMatchOff + int32(len(e.hist)) | ||||
| 	} | ||||
| 	e.hist = e.hist[:0] | ||||
|   | ||||
							
								
								
									
										63
									
								
								vendor/github.com/klauspost/compress/zstd/enc_best.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										63
									
								
								vendor/github.com/klauspost/compress/zstd/enc_best.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -85,14 +85,10 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) { | ||||
| 	) | ||||
|  | ||||
| 	// Protect against e.cur wraparound. | ||||
| 	for e.cur >= bufferReset { | ||||
| 	for e.cur >= e.bufferReset-int32(len(e.hist)) { | ||||
| 		if len(e.hist) == 0 { | ||||
| 			for i := range e.table[:] { | ||||
| 				e.table[i] = prevEntry{} | ||||
| 			} | ||||
| 			for i := range e.longTable[:] { | ||||
| 				e.longTable[i] = prevEntry{} | ||||
| 			} | ||||
| 			e.table = [bestShortTableSize]prevEntry{} | ||||
| 			e.longTable = [bestLongTableSize]prevEntry{} | ||||
| 			e.cur = e.maxMatchOff | ||||
| 			break | ||||
| 		} | ||||
| @@ -193,8 +189,8 @@ encodeLoop: | ||||
| 			panic("offset0 was 0") | ||||
| 		} | ||||
|  | ||||
| 		bestOf := func(a, b match) match { | ||||
| 			if a.est+(a.s-b.s)*bitsPerByte>>10 < b.est+(b.s-a.s)*bitsPerByte>>10 { | ||||
| 		bestOf := func(a, b *match) *match { | ||||
| 			if a.est-b.est+(a.s-b.s)*bitsPerByte>>10 < 0 { | ||||
| 				return a | ||||
| 			} | ||||
| 			return b | ||||
| @@ -220,22 +216,26 @@ encodeLoop: | ||||
| 			return m | ||||
| 		} | ||||
|  | ||||
| 		best := bestOf(matchAt(candidateL.offset-e.cur, s, uint32(cv), -1), matchAt(candidateL.prev-e.cur, s, uint32(cv), -1)) | ||||
| 		best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1)) | ||||
| 		best = bestOf(best, matchAt(candidateS.prev-e.cur, s, uint32(cv), -1)) | ||||
| 		m1 := matchAt(candidateL.offset-e.cur, s, uint32(cv), -1) | ||||
| 		m2 := matchAt(candidateL.prev-e.cur, s, uint32(cv), -1) | ||||
| 		m3 := matchAt(candidateS.offset-e.cur, s, uint32(cv), -1) | ||||
| 		m4 := matchAt(candidateS.prev-e.cur, s, uint32(cv), -1) | ||||
| 		best := bestOf(bestOf(&m1, &m2), bestOf(&m3, &m4)) | ||||
|  | ||||
| 		if canRepeat && best.length < goodEnough { | ||||
| 			cv32 := uint32(cv >> 8) | ||||
| 			spp := s + 1 | ||||
| 			best = bestOf(best, matchAt(spp-offset1, spp, cv32, 1)) | ||||
| 			best = bestOf(best, matchAt(spp-offset2, spp, cv32, 2)) | ||||
| 			best = bestOf(best, matchAt(spp-offset3, spp, cv32, 3)) | ||||
| 			m1 := matchAt(spp-offset1, spp, cv32, 1) | ||||
| 			m2 := matchAt(spp-offset2, spp, cv32, 2) | ||||
| 			m3 := matchAt(spp-offset3, spp, cv32, 3) | ||||
| 			best = bestOf(bestOf(best, &m1), bestOf(&m2, &m3)) | ||||
| 			if best.length > 0 { | ||||
| 				cv32 = uint32(cv >> 24) | ||||
| 				spp += 2 | ||||
| 				best = bestOf(best, matchAt(spp-offset1, spp, cv32, 1)) | ||||
| 				best = bestOf(best, matchAt(spp-offset2, spp, cv32, 2)) | ||||
| 				best = bestOf(best, matchAt(spp-offset3, spp, cv32, 3)) | ||||
| 				m1 := matchAt(spp-offset1, spp, cv32, 1) | ||||
| 				m2 := matchAt(spp-offset2, spp, cv32, 2) | ||||
| 				m3 := matchAt(spp-offset3, spp, cv32, 3) | ||||
| 				best = bestOf(bestOf(best, &m1), bestOf(&m2, &m3)) | ||||
| 			} | ||||
| 		} | ||||
| 		// Load next and check... | ||||
| @@ -262,26 +262,33 @@ encodeLoop: | ||||
| 			candidateL2 := e.longTable[hashLen(cv2, bestLongTableBits, bestLongLen)] | ||||
|  | ||||
| 			// Short at s+1 | ||||
| 			best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1)) | ||||
| 			m1 := matchAt(candidateS.offset-e.cur, s, uint32(cv), -1) | ||||
| 			// Long at s+1, s+2 | ||||
| 			best = bestOf(best, matchAt(candidateL.offset-e.cur, s, uint32(cv), -1)) | ||||
| 			best = bestOf(best, matchAt(candidateL.prev-e.cur, s, uint32(cv), -1)) | ||||
| 			best = bestOf(best, matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1)) | ||||
| 			best = bestOf(best, matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1)) | ||||
| 			m2 := matchAt(candidateL.offset-e.cur, s, uint32(cv), -1) | ||||
| 			m3 := matchAt(candidateL.prev-e.cur, s, uint32(cv), -1) | ||||
| 			m4 := matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1) | ||||
| 			m5 := matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1) | ||||
| 			best = bestOf(bestOf(bestOf(best, &m1), &m2), bestOf(bestOf(&m3, &m4), &m5)) | ||||
| 			if false { | ||||
| 				// Short at s+3. | ||||
| 				// Too often worse... | ||||
| 				best = bestOf(best, matchAt(e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+2, uint32(cv2>>8), -1)) | ||||
| 				m := matchAt(e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+2, uint32(cv2>>8), -1) | ||||
| 				best = bestOf(best, &m) | ||||
| 			} | ||||
| 			// See if we can find a better match by checking where the current best ends. | ||||
| 			// Use that offset to see if we can find a better full match. | ||||
| 			if sAt := best.s + best.length; sAt < sLimit { | ||||
| 				nextHashL := hashLen(load6432(src, sAt), bestLongTableBits, bestLongLen) | ||||
| 				candidateEnd := e.longTable[nextHashL] | ||||
| 				if pos := candidateEnd.offset - e.cur - best.length; pos >= 0 { | ||||
| 					bestEnd := bestOf(best, matchAt(pos, best.s, load3232(src, best.s), -1)) | ||||
| 					if pos := candidateEnd.prev - e.cur - best.length; pos >= 0 { | ||||
| 						bestEnd = bestOf(bestEnd, matchAt(pos, best.s, load3232(src, best.s), -1)) | ||||
| 				// Start check at a fixed offset to allow for a few mismatches. | ||||
| 				// For this compression level 2 yields the best results. | ||||
| 				const skipBeginning = 2 | ||||
| 				if pos := candidateEnd.offset - e.cur - best.length + skipBeginning; pos >= 0 { | ||||
| 					m := matchAt(pos, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1) | ||||
| 					bestEnd := bestOf(best, &m) | ||||
| 					if pos := candidateEnd.prev - e.cur - best.length + skipBeginning; pos >= 0 { | ||||
| 						m := matchAt(pos, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1) | ||||
| 						bestEnd = bestOf(bestEnd, &m) | ||||
| 					} | ||||
| 					best = bestEnd | ||||
| 				} | ||||
|   | ||||
							
								
								
									
										12
									
								
								vendor/github.com/klauspost/compress/zstd/enc_better.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										12
									
								
								vendor/github.com/klauspost/compress/zstd/enc_better.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -62,14 +62,10 @@ func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) { | ||||
| 	) | ||||
|  | ||||
| 	// Protect against e.cur wraparound. | ||||
| 	for e.cur >= bufferReset { | ||||
| 	for e.cur >= e.bufferReset-int32(len(e.hist)) { | ||||
| 		if len(e.hist) == 0 { | ||||
| 			for i := range e.table[:] { | ||||
| 				e.table[i] = tableEntry{} | ||||
| 			} | ||||
| 			for i := range e.longTable[:] { | ||||
| 				e.longTable[i] = prevEntry{} | ||||
| 			} | ||||
| 			e.table = [betterShortTableSize]tableEntry{} | ||||
| 			e.longTable = [betterLongTableSize]prevEntry{} | ||||
| 			e.cur = e.maxMatchOff | ||||
| 			break | ||||
| 		} | ||||
| @@ -587,7 +583,7 @@ func (e *betterFastEncoderDict) Encode(blk *blockEnc, src []byte) { | ||||
| 	) | ||||
|  | ||||
| 	// Protect against e.cur wraparound. | ||||
| 	for e.cur >= bufferReset { | ||||
| 	for e.cur >= e.bufferReset-int32(len(e.hist)) { | ||||
| 		if len(e.hist) == 0 { | ||||
| 			for i := range e.table[:] { | ||||
| 				e.table[i] = tableEntry{} | ||||
|   | ||||
							
								
								
									
										16
									
								
								vendor/github.com/klauspost/compress/zstd/enc_dfast.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										16
									
								
								vendor/github.com/klauspost/compress/zstd/enc_dfast.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -44,14 +44,10 @@ func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) { | ||||
| 	) | ||||
|  | ||||
| 	// Protect against e.cur wraparound. | ||||
| 	for e.cur >= bufferReset { | ||||
| 	for e.cur >= e.bufferReset-int32(len(e.hist)) { | ||||
| 		if len(e.hist) == 0 { | ||||
| 			for i := range e.table[:] { | ||||
| 				e.table[i] = tableEntry{} | ||||
| 			} | ||||
| 			for i := range e.longTable[:] { | ||||
| 				e.longTable[i] = tableEntry{} | ||||
| 			} | ||||
| 			e.table = [dFastShortTableSize]tableEntry{} | ||||
| 			e.longTable = [dFastLongTableSize]tableEntry{} | ||||
| 			e.cur = e.maxMatchOff | ||||
| 			break | ||||
| 		} | ||||
| @@ -388,7 +384,7 @@ func (e *doubleFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { | ||||
| 	) | ||||
|  | ||||
| 	// Protect against e.cur wraparound. | ||||
| 	if e.cur >= bufferReset { | ||||
| 	if e.cur >= e.bufferReset { | ||||
| 		for i := range e.table[:] { | ||||
| 			e.table[i] = tableEntry{} | ||||
| 		} | ||||
| @@ -685,7 +681,7 @@ encodeLoop: | ||||
| 	} | ||||
|  | ||||
| 	// We do not store history, so we must offset e.cur to avoid false matches for next user. | ||||
| 	if e.cur < bufferReset { | ||||
| 	if e.cur < e.bufferReset { | ||||
| 		e.cur += int32(len(src)) | ||||
| 	} | ||||
| } | ||||
| @@ -700,7 +696,7 @@ func (e *doubleFastEncoderDict) Encode(blk *blockEnc, src []byte) { | ||||
| 	) | ||||
|  | ||||
| 	// Protect against e.cur wraparound. | ||||
| 	for e.cur >= bufferReset { | ||||
| 	for e.cur >= e.bufferReset-int32(len(e.hist)) { | ||||
| 		if len(e.hist) == 0 { | ||||
| 			for i := range e.table[:] { | ||||
| 				e.table[i] = tableEntry{} | ||||
|   | ||||
							
								
								
									
										14
									
								
								vendor/github.com/klauspost/compress/zstd/enc_fast.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										14
									
								
								vendor/github.com/klauspost/compress/zstd/enc_fast.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -43,7 +43,7 @@ func (e *fastEncoder) Encode(blk *blockEnc, src []byte) { | ||||
| 	) | ||||
|  | ||||
| 	// Protect against e.cur wraparound. | ||||
| 	for e.cur >= bufferReset { | ||||
| 	for e.cur >= e.bufferReset-int32(len(e.hist)) { | ||||
| 		if len(e.hist) == 0 { | ||||
| 			for i := range e.table[:] { | ||||
| 				e.table[i] = tableEntry{} | ||||
| @@ -304,13 +304,13 @@ func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { | ||||
| 		minNonLiteralBlockSize = 1 + 1 + inputMargin | ||||
| 	) | ||||
| 	if debugEncoder { | ||||
| 		if len(src) > maxBlockSize { | ||||
| 		if len(src) > maxCompressedBlockSize { | ||||
| 			panic("src too big") | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	// Protect against e.cur wraparound. | ||||
| 	if e.cur >= bufferReset { | ||||
| 	if e.cur >= e.bufferReset { | ||||
| 		for i := range e.table[:] { | ||||
| 			e.table[i] = tableEntry{} | ||||
| 		} | ||||
| @@ -538,7 +538,7 @@ encodeLoop: | ||||
| 		println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) | ||||
| 	} | ||||
| 	// We do not store history, so we must offset e.cur to avoid false matches for next user. | ||||
| 	if e.cur < bufferReset { | ||||
| 	if e.cur < e.bufferReset { | ||||
| 		e.cur += int32(len(src)) | ||||
| 	} | ||||
| } | ||||
| @@ -555,11 +555,9 @@ func (e *fastEncoderDict) Encode(blk *blockEnc, src []byte) { | ||||
| 		return | ||||
| 	} | ||||
| 	// Protect against e.cur wraparound. | ||||
| 	for e.cur >= bufferReset { | ||||
| 	for e.cur >= e.bufferReset-int32(len(e.hist)) { | ||||
| 		if len(e.hist) == 0 { | ||||
| 			for i := range e.table[:] { | ||||
| 				e.table[i] = tableEntry{} | ||||
| 			} | ||||
| 			e.table = [tableSize]tableEntry{} | ||||
| 			e.cur = e.maxMatchOff | ||||
| 			break | ||||
| 		} | ||||
|   | ||||
							
								
								
									
										35
									
								
								vendor/github.com/klauspost/compress/zstd/encoder.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										35
									
								
								vendor/github.com/klauspost/compress/zstd/encoder.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -8,6 +8,7 @@ import ( | ||||
| 	"crypto/rand" | ||||
| 	"fmt" | ||||
| 	"io" | ||||
| 	"math" | ||||
| 	rdebug "runtime/debug" | ||||
| 	"sync" | ||||
|  | ||||
| @@ -639,3 +640,37 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { | ||||
| 	} | ||||
| 	return dst | ||||
| } | ||||
|  | ||||
| // MaxEncodedSize returns the expected maximum | ||||
| // size of an encoded block or stream. | ||||
| func (e *Encoder) MaxEncodedSize(size int) int { | ||||
| 	frameHeader := 4 + 2 // magic + frame header & window descriptor | ||||
| 	if e.o.dict != nil { | ||||
| 		frameHeader += 4 | ||||
| 	} | ||||
| 	// Frame content size: | ||||
| 	if size < 256 { | ||||
| 		frameHeader++ | ||||
| 	} else if size < 65536+256 { | ||||
| 		frameHeader += 2 | ||||
| 	} else if size < math.MaxInt32 { | ||||
| 		frameHeader += 4 | ||||
| 	} else { | ||||
| 		frameHeader += 8 | ||||
| 	} | ||||
| 	// Final crc | ||||
| 	if e.o.crc { | ||||
| 		frameHeader += 4 | ||||
| 	} | ||||
|  | ||||
| 	// Max overhead is 3 bytes/block. | ||||
| 	// There cannot be 0 blocks. | ||||
| 	blocks := (size + e.o.blockSize) / e.o.blockSize | ||||
|  | ||||
| 	// Combine, add padding. | ||||
| 	maxSz := frameHeader + 3*blocks + size | ||||
| 	if e.o.pad > 1 { | ||||
| 		maxSz += calcSkippableFrame(int64(maxSz), int64(e.o.pad)) | ||||
| 	} | ||||
| 	return maxSz | ||||
| } | ||||
|   | ||||
							
								
								
									
										36
									
								
								vendor/github.com/klauspost/compress/zstd/encoder_options.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										36
									
								
								vendor/github.com/klauspost/compress/zstd/encoder_options.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -3,6 +3,8 @@ package zstd | ||||
| import ( | ||||
| 	"errors" | ||||
| 	"fmt" | ||||
| 	"math" | ||||
| 	"math/bits" | ||||
| 	"runtime" | ||||
| 	"strings" | ||||
| ) | ||||
| @@ -47,22 +49,22 @@ func (o encoderOptions) encoder() encoder { | ||||
| 	switch o.level { | ||||
| 	case SpeedFastest: | ||||
| 		if o.dict != nil { | ||||
| 			return &fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}} | ||||
| 			return &fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}} | ||||
| 		} | ||||
| 		return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}} | ||||
| 		return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}} | ||||
|  | ||||
| 	case SpeedDefault: | ||||
| 		if o.dict != nil { | ||||
| 			return &doubleFastEncoderDict{fastEncoderDict: fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}} | ||||
| 			return &doubleFastEncoderDict{fastEncoderDict: fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}} | ||||
| 		} | ||||
| 		return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}} | ||||
| 		return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}} | ||||
| 	case SpeedBetterCompression: | ||||
| 		if o.dict != nil { | ||||
| 			return &betterFastEncoderDict{betterFastEncoder: betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}} | ||||
| 			return &betterFastEncoderDict{betterFastEncoder: betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}} | ||||
| 		} | ||||
| 		return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}} | ||||
| 		return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}} | ||||
| 	case SpeedBestCompression: | ||||
| 		return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}} | ||||
| 		return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}} | ||||
| 	} | ||||
| 	panic("unknown compression level") | ||||
| } | ||||
| @@ -304,7 +306,13 @@ func WithLowerEncoderMem(b bool) EOption { | ||||
| } | ||||
|  | ||||
| // WithEncoderDict allows to register a dictionary that will be used for the encode. | ||||
| // | ||||
| // The slice dict must be in the [dictionary format] produced by | ||||
| // "zstd --train" from the Zstandard reference implementation. | ||||
| // | ||||
| // The encoder *may* choose to use no dictionary instead for certain payloads. | ||||
| // | ||||
| // [dictionary format]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format | ||||
| func WithEncoderDict(dict []byte) EOption { | ||||
| 	return func(o *encoderOptions) error { | ||||
| 		d, err := loadDict(dict) | ||||
| @@ -315,3 +323,17 @@ func WithEncoderDict(dict []byte) EOption { | ||||
| 		return nil | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // WithEncoderDictRaw registers a dictionary that may be used by the encoder. | ||||
| // | ||||
| // The slice content may contain arbitrary data. It will be used as an initial | ||||
| // history. | ||||
| func WithEncoderDictRaw(id uint32, content []byte) EOption { | ||||
| 	return func(o *encoderOptions) error { | ||||
| 		if bits.UintSize > 32 && uint(len(content)) > dictMaxLength { | ||||
| 			return fmt.Errorf("dictionary of size %d > 2GiB too large", len(content)) | ||||
| 		} | ||||
| 		o.dict = &dict{id: id, content: content, offsets: [3]int{1, 4, 8}} | ||||
| 		return nil | ||||
| 	} | ||||
| } | ||||
|   | ||||
							
								
								
									
										56
									
								
								vendor/github.com/klauspost/compress/zstd/framedec.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										56
									
								
								vendor/github.com/klauspost/compress/zstd/framedec.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -5,7 +5,7 @@ | ||||
| package zstd | ||||
|  | ||||
| import ( | ||||
| 	"bytes" | ||||
| 	"encoding/binary" | ||||
| 	"encoding/hex" | ||||
| 	"errors" | ||||
| 	"io" | ||||
| @@ -29,7 +29,7 @@ type frameDec struct { | ||||
|  | ||||
| 	FrameContentSize uint64 | ||||
|  | ||||
| 	DictionaryID  *uint32 | ||||
| 	DictionaryID  uint32 | ||||
| 	HasCheckSum   bool | ||||
| 	SingleSegment bool | ||||
| } | ||||
| @@ -43,9 +43,9 @@ const ( | ||||
| 	MaxWindowSize = 1 << 29 | ||||
| ) | ||||
|  | ||||
| var ( | ||||
| 	frameMagic          = []byte{0x28, 0xb5, 0x2f, 0xfd} | ||||
| 	skippableFrameMagic = []byte{0x2a, 0x4d, 0x18} | ||||
| const ( | ||||
| 	frameMagic          = "\x28\xb5\x2f\xfd" | ||||
| 	skippableFrameMagic = "\x2a\x4d\x18" | ||||
| ) | ||||
|  | ||||
| func newFrameDec(o decoderOptions) *frameDec { | ||||
| @@ -89,9 +89,9 @@ func (d *frameDec) reset(br byteBuffer) error { | ||||
| 			copy(signature[1:], b) | ||||
| 		} | ||||
|  | ||||
| 		if !bytes.Equal(signature[1:4], skippableFrameMagic) || signature[0]&0xf0 != 0x50 { | ||||
| 		if string(signature[1:4]) != skippableFrameMagic || signature[0]&0xf0 != 0x50 { | ||||
| 			if debugDecoder { | ||||
| 				println("Not skippable", hex.EncodeToString(signature[:]), hex.EncodeToString(skippableFrameMagic)) | ||||
| 				println("Not skippable", hex.EncodeToString(signature[:]), hex.EncodeToString([]byte(skippableFrameMagic))) | ||||
| 			} | ||||
| 			// Break if not skippable frame. | ||||
| 			break | ||||
| @@ -114,9 +114,9 @@ func (d *frameDec) reset(br byteBuffer) error { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	if !bytes.Equal(signature[:], frameMagic) { | ||||
| 	if string(signature[:]) != frameMagic { | ||||
| 		if debugDecoder { | ||||
| 			println("Got magic numbers: ", signature, "want:", frameMagic) | ||||
| 			println("Got magic numbers: ", signature, "want:", []byte(frameMagic)) | ||||
| 		} | ||||
| 		return ErrMagicMismatch | ||||
| 	} | ||||
| @@ -155,7 +155,7 @@ func (d *frameDec) reset(br byteBuffer) error { | ||||
|  | ||||
| 	// Read Dictionary_ID | ||||
| 	// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary_id | ||||
| 	d.DictionaryID = nil | ||||
| 	d.DictionaryID = 0 | ||||
| 	if size := fhd & 3; size != 0 { | ||||
| 		if size == 3 { | ||||
| 			size = 4 | ||||
| @@ -167,7 +167,7 @@ func (d *frameDec) reset(br byteBuffer) error { | ||||
| 			return err | ||||
| 		} | ||||
| 		var id uint32 | ||||
| 		switch size { | ||||
| 		switch len(b) { | ||||
| 		case 1: | ||||
| 			id = uint32(b[0]) | ||||
| 		case 2: | ||||
| @@ -178,11 +178,7 @@ func (d *frameDec) reset(br byteBuffer) error { | ||||
| 		if debugDecoder { | ||||
| 			println("Dict size", size, "ID:", id) | ||||
| 		} | ||||
| 		if id > 0 { | ||||
| 			// ID 0 means "sorry, no dictionary anyway". | ||||
| 			// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format | ||||
| 			d.DictionaryID = &id | ||||
| 		} | ||||
| 		d.DictionaryID = id | ||||
| 	} | ||||
|  | ||||
| 	// Read Frame_Content_Size | ||||
| @@ -204,7 +200,7 @@ func (d *frameDec) reset(br byteBuffer) error { | ||||
| 			println("Reading Frame content", err) | ||||
| 			return err | ||||
| 		} | ||||
| 		switch fcsSize { | ||||
| 		switch len(b) { | ||||
| 		case 1: | ||||
| 			d.FrameContentSize = uint64(b[0]) | ||||
| 		case 2: | ||||
| @@ -261,12 +257,17 @@ func (d *frameDec) reset(br byteBuffer) error { | ||||
| 	} | ||||
| 	d.history.windowSize = int(d.WindowSize) | ||||
| 	if !d.o.lowMem || d.history.windowSize < maxBlockSize { | ||||
| 		// Alloc 2x window size if not low-mem, or very small window size. | ||||
| 		// Alloc 2x window size if not low-mem, or window size below 2MB. | ||||
| 		d.history.allocFrameBuffer = d.history.windowSize * 2 | ||||
| 	} else { | ||||
| 		// Alloc with one additional block | ||||
| 		if d.o.lowMem { | ||||
| 			// Alloc with 1MB extra. | ||||
| 			d.history.allocFrameBuffer = d.history.windowSize + maxBlockSize/2 | ||||
| 		} else { | ||||
| 			// Alloc with 2MB extra. | ||||
| 			d.history.allocFrameBuffer = d.history.windowSize + maxBlockSize | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	if debugDecoder { | ||||
| 		println("Frame: Dict:", d.DictionaryID, "FrameContentSize:", d.FrameContentSize, "singleseg:", d.SingleSegment, "window:", d.WindowSize, "crc:", d.HasCheckSum) | ||||
| @@ -300,7 +301,7 @@ func (d *frameDec) checkCRC() error { | ||||
| 	} | ||||
|  | ||||
| 	// We can overwrite upper tmp now | ||||
| 	want, err := d.rawInput.readSmall(4) | ||||
| 	buf, err := d.rawInput.readSmall(4) | ||||
| 	if err != nil { | ||||
| 		println("CRC missing?", err) | ||||
| 		return err | ||||
| @@ -310,22 +311,17 @@ func (d *frameDec) checkCRC() error { | ||||
| 		return nil | ||||
| 	} | ||||
|  | ||||
| 	var tmp [4]byte | ||||
| 	got := d.crc.Sum64() | ||||
| 	// Flip to match file order. | ||||
| 	tmp[0] = byte(got >> 0) | ||||
| 	tmp[1] = byte(got >> 8) | ||||
| 	tmp[2] = byte(got >> 16) | ||||
| 	tmp[3] = byte(got >> 24) | ||||
| 	want := binary.LittleEndian.Uint32(buf[:4]) | ||||
| 	got := uint32(d.crc.Sum64()) | ||||
|  | ||||
| 	if !bytes.Equal(tmp[:], want) { | ||||
| 	if got != want { | ||||
| 		if debugDecoder { | ||||
| 			println("CRC Check Failed:", tmp[:], "!=", want) | ||||
| 			printf("CRC check failed: got %08x, want %08x\n", got, want) | ||||
| 		} | ||||
| 		return ErrCRCMismatch | ||||
| 	} | ||||
| 	if debugDecoder { | ||||
| 		println("CRC ok", tmp[:]) | ||||
| 		printf("CRC ok %08x\n", got) | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|   | ||||
							
								
								
									
										47
									
								
								vendor/github.com/klauspost/compress/zstd/internal/xxhash/README.md
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										47
									
								
								vendor/github.com/klauspost/compress/zstd/internal/xxhash/README.md
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -2,12 +2,7 @@ | ||||
|  | ||||
| VENDORED: Go to [github.com/cespare/xxhash](https://github.com/cespare/xxhash) for original package. | ||||
|  | ||||
|  | ||||
| [](https://godoc.org/github.com/cespare/xxhash) | ||||
| [](https://travis-ci.org/cespare/xxhash) | ||||
|  | ||||
| xxhash is a Go implementation of the 64-bit | ||||
| [xxHash](http://cyan4973.github.io/xxHash/) algorithm, XXH64. This is a | ||||
| xxhash is a Go implementation of the 64-bit [xxHash] algorithm, XXH64. This is a | ||||
| high-quality hashing algorithm that is much faster than anything in the Go | ||||
| standard library. | ||||
|  | ||||
| @@ -28,8 +23,23 @@ func (*Digest) WriteString(string) (int, error) | ||||
| func (*Digest) Sum64() uint64 | ||||
| ``` | ||||
|  | ||||
| This implementation provides a fast pure-Go implementation and an even faster | ||||
| assembly implementation for amd64. | ||||
| The package is written with optimized pure Go and also contains even faster | ||||
| assembly implementations for amd64 and arm64. If desired, the `purego` build tag | ||||
| opts into using the Go code even on those architectures. | ||||
|  | ||||
| [xxHash]: http://cyan4973.github.io/xxHash/ | ||||
|  | ||||
| ## Compatibility | ||||
|  | ||||
| This package is in a module and the latest code is in version 2 of the module. | ||||
| You need a version of Go with at least "minimal module compatibility" to use | ||||
| github.com/cespare/xxhash/v2: | ||||
|  | ||||
| * 1.9.7+ for Go 1.9 | ||||
| * 1.10.3+ for Go 1.10 | ||||
| * Go 1.11 or later | ||||
|  | ||||
| I recommend using the latest release of Go. | ||||
|  | ||||
| ## Benchmarks | ||||
|  | ||||
| @@ -37,22 +47,25 @@ Here are some quick benchmarks comparing the pure-Go and assembly | ||||
| implementations of Sum64. | ||||
|  | ||||
| | input size | purego    | asm       | | ||||
| | --- | --- | --- | | ||||
| | 5 B   |  979.66 MB/s |  1291.17 MB/s  | | ||||
| | 100 B | 7475.26 MB/s | 7973.40 MB/s  | | ||||
| | 4 KB  | 17573.46 MB/s | 17602.65 MB/s | | ||||
| | 10 MB | 17131.46 MB/s | 17142.16 MB/s | | ||||
| | ---------- | --------- | --------- | | ||||
| | 4 B        |  1.3 GB/s |  1.2 GB/s | | ||||
| | 16 B       |  2.9 GB/s |  3.5 GB/s | | ||||
| | 100 B      |  6.9 GB/s |  8.1 GB/s | | ||||
| | 4 KB       | 11.7 GB/s | 16.7 GB/s | | ||||
| | 10 MB      | 12.0 GB/s | 17.3 GB/s | | ||||
|  | ||||
| These numbers were generated on Ubuntu 18.04 with an Intel i7-8700K CPU using | ||||
| the following commands under Go 1.11.2: | ||||
| These numbers were generated on Ubuntu 20.04 with an Intel Xeon Platinum 8252C | ||||
| CPU using the following commands under Go 1.19.2: | ||||
|  | ||||
| ``` | ||||
| $ go test -tags purego -benchtime 10s -bench '/xxhash,direct,bytes' | ||||
| $ go test -benchtime 10s -bench '/xxhash,direct,bytes' | ||||
| benchstat <(go test -tags purego -benchtime 500ms -count 15 -bench 'Sum64$') | ||||
| benchstat <(go test -benchtime 500ms -count 15 -bench 'Sum64$') | ||||
| ``` | ||||
|  | ||||
| ## Projects using this package | ||||
|  | ||||
| - [InfluxDB](https://github.com/influxdata/influxdb) | ||||
| - [Prometheus](https://github.com/prometheus/prometheus) | ||||
| - [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics) | ||||
| - [FreeCache](https://github.com/coocood/freecache) | ||||
| - [FastCache](https://github.com/VictoriaMetrics/fastcache) | ||||
|   | ||||
							
								
								
									
										47
									
								
								vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										47
									
								
								vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -18,19 +18,11 @@ const ( | ||||
| 	prime5 uint64 = 2870177450012600261 | ||||
| ) | ||||
|  | ||||
| // NOTE(caleb): I'm using both consts and vars of the primes. Using consts where | ||||
| // possible in the Go code is worth a small (but measurable) performance boost | ||||
| // by avoiding some MOVQs. Vars are needed for the asm and also are useful for | ||||
| // convenience in the Go code in a few places where we need to intentionally | ||||
| // avoid constant arithmetic (e.g., v1 := prime1 + prime2 fails because the | ||||
| // result overflows a uint64). | ||||
| var ( | ||||
| 	prime1v = prime1 | ||||
| 	prime2v = prime2 | ||||
| 	prime3v = prime3 | ||||
| 	prime4v = prime4 | ||||
| 	prime5v = prime5 | ||||
| ) | ||||
| // Store the primes in an array as well. | ||||
| // | ||||
| // The consts are used when possible in Go code to avoid MOVs but we need a | ||||
| // contiguous array of the assembly code. | ||||
| var primes = [...]uint64{prime1, prime2, prime3, prime4, prime5} | ||||
|  | ||||
| // Digest implements hash.Hash64. | ||||
| type Digest struct { | ||||
| @@ -52,10 +44,10 @@ func New() *Digest { | ||||
|  | ||||
| // Reset clears the Digest's state so that it can be reused. | ||||
| func (d *Digest) Reset() { | ||||
| 	d.v1 = prime1v + prime2 | ||||
| 	d.v1 = primes[0] + prime2 | ||||
| 	d.v2 = prime2 | ||||
| 	d.v3 = 0 | ||||
| 	d.v4 = -prime1v | ||||
| 	d.v4 = -primes[0] | ||||
| 	d.total = 0 | ||||
| 	d.n = 0 | ||||
| } | ||||
| @@ -71,21 +63,23 @@ func (d *Digest) Write(b []byte) (n int, err error) { | ||||
| 	n = len(b) | ||||
| 	d.total += uint64(n) | ||||
|  | ||||
| 	memleft := d.mem[d.n&(len(d.mem)-1):] | ||||
|  | ||||
| 	if d.n+n < 32 { | ||||
| 		// This new data doesn't even fill the current block. | ||||
| 		copy(d.mem[d.n:], b) | ||||
| 		copy(memleft, b) | ||||
| 		d.n += n | ||||
| 		return | ||||
| 	} | ||||
|  | ||||
| 	if d.n > 0 { | ||||
| 		// Finish off the partial block. | ||||
| 		copy(d.mem[d.n:], b) | ||||
| 		c := copy(memleft, b) | ||||
| 		d.v1 = round(d.v1, u64(d.mem[0:8])) | ||||
| 		d.v2 = round(d.v2, u64(d.mem[8:16])) | ||||
| 		d.v3 = round(d.v3, u64(d.mem[16:24])) | ||||
| 		d.v4 = round(d.v4, u64(d.mem[24:32])) | ||||
| 		b = b[32-d.n:] | ||||
| 		b = b[c:] | ||||
| 		d.n = 0 | ||||
| 	} | ||||
|  | ||||
| @@ -135,21 +129,20 @@ func (d *Digest) Sum64() uint64 { | ||||
|  | ||||
| 	h += d.total | ||||
|  | ||||
| 	i, end := 0, d.n | ||||
| 	for ; i+8 <= end; i += 8 { | ||||
| 		k1 := round(0, u64(d.mem[i:i+8])) | ||||
| 	b := d.mem[:d.n&(len(d.mem)-1)] | ||||
| 	for ; len(b) >= 8; b = b[8:] { | ||||
| 		k1 := round(0, u64(b[:8])) | ||||
| 		h ^= k1 | ||||
| 		h = rol27(h)*prime1 + prime4 | ||||
| 	} | ||||
| 	if i+4 <= end { | ||||
| 		h ^= uint64(u32(d.mem[i:i+4])) * prime1 | ||||
| 	if len(b) >= 4 { | ||||
| 		h ^= uint64(u32(b[:4])) * prime1 | ||||
| 		h = rol23(h)*prime2 + prime3 | ||||
| 		i += 4 | ||||
| 		b = b[4:] | ||||
| 	} | ||||
| 	for i < end { | ||||
| 		h ^= uint64(d.mem[i]) * prime5 | ||||
| 	for ; len(b) > 0; b = b[1:] { | ||||
| 		h ^= uint64(b[0]) * prime5 | ||||
| 		h = rol11(h) * prime1 | ||||
| 		i++ | ||||
| 	} | ||||
|  | ||||
| 	h ^= h >> 33 | ||||
|   | ||||
							
								
								
									
										308
									
								
								vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										308
									
								
								vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -1,3 +1,4 @@ | ||||
| //go:build !appengine && gc && !purego && !noasm | ||||
| // +build !appengine | ||||
| // +build gc | ||||
| // +build !purego | ||||
| @@ -5,212 +6,205 @@ | ||||
|  | ||||
| #include "textflag.h" | ||||
|  | ||||
| // Register allocation: | ||||
| // AX	h | ||||
| // SI	pointer to advance through b | ||||
| // DX	n | ||||
| // BX	loop end | ||||
| // R8	v1, k1 | ||||
| // R9	v2 | ||||
| // R10	v3 | ||||
| // R11	v4 | ||||
| // R12	tmp | ||||
| // R13	prime1v | ||||
| // R14	prime2v | ||||
| // DI	prime4v | ||||
| // Registers: | ||||
| #define h      AX | ||||
| #define d      AX | ||||
| #define p      SI // pointer to advance through b | ||||
| #define n      DX | ||||
| #define end    BX // loop end | ||||
| #define v1     R8 | ||||
| #define v2     R9 | ||||
| #define v3     R10 | ||||
| #define v4     R11 | ||||
| #define x      R12 | ||||
| #define prime1 R13 | ||||
| #define prime2 R14 | ||||
| #define prime4 DI | ||||
|  | ||||
| // round reads from and advances the buffer pointer in SI. | ||||
| // It assumes that R13 has prime1v and R14 has prime2v. | ||||
| #define round(r) \ | ||||
| 	MOVQ  (SI), R12 \ | ||||
| 	ADDQ  $8, SI    \ | ||||
| 	IMULQ R14, R12  \ | ||||
| 	ADDQ  R12, r    \ | ||||
| 	ROLQ  $31, r    \ | ||||
| 	IMULQ R13, r | ||||
| #define round(acc, x) \ | ||||
| 	IMULQ prime2, x   \ | ||||
| 	ADDQ  x, acc      \ | ||||
| 	ROLQ  $31, acc    \ | ||||
| 	IMULQ prime1, acc | ||||
|  | ||||
| // mergeRound applies a merge round on the two registers acc and val. | ||||
| // It assumes that R13 has prime1v, R14 has prime2v, and DI has prime4v. | ||||
| #define mergeRound(acc, val) \ | ||||
| 	IMULQ R14, val \ | ||||
| 	ROLQ  $31, val \ | ||||
| 	IMULQ R13, val \ | ||||
| 	XORQ  val, acc \ | ||||
| 	IMULQ R13, acc \ | ||||
| 	ADDQ  DI, acc | ||||
| // round0 performs the operation x = round(0, x). | ||||
| #define round0(x) \ | ||||
| 	IMULQ prime2, x \ | ||||
| 	ROLQ  $31, x    \ | ||||
| 	IMULQ prime1, x | ||||
|  | ||||
| // mergeRound applies a merge round on the two registers acc and x. | ||||
| // It assumes that prime1, prime2, and prime4 have been loaded. | ||||
| #define mergeRound(acc, x) \ | ||||
| 	round0(x)         \ | ||||
| 	XORQ  x, acc      \ | ||||
| 	IMULQ prime1, acc \ | ||||
| 	ADDQ  prime4, acc | ||||
|  | ||||
| // blockLoop processes as many 32-byte blocks as possible, | ||||
| // updating v1, v2, v3, and v4. It assumes that there is at least one block | ||||
| // to process. | ||||
| #define blockLoop() \ | ||||
| loop:  \ | ||||
| 	MOVQ +0(p), x  \ | ||||
| 	round(v1, x)   \ | ||||
| 	MOVQ +8(p), x  \ | ||||
| 	round(v2, x)   \ | ||||
| 	MOVQ +16(p), x \ | ||||
| 	round(v3, x)   \ | ||||
| 	MOVQ +24(p), x \ | ||||
| 	round(v4, x)   \ | ||||
| 	ADDQ $32, p    \ | ||||
| 	CMPQ p, end    \ | ||||
| 	JLE  loop | ||||
|  | ||||
| // func Sum64(b []byte) uint64 | ||||
| TEXT ·Sum64(SB), NOSPLIT, $0-32 | ||||
| TEXT ·Sum64(SB), NOSPLIT|NOFRAME, $0-32 | ||||
| 	// Load fixed primes. | ||||
| 	MOVQ ·prime1v(SB), R13 | ||||
| 	MOVQ ·prime2v(SB), R14 | ||||
| 	MOVQ ·prime4v(SB), DI | ||||
| 	MOVQ ·primes+0(SB), prime1 | ||||
| 	MOVQ ·primes+8(SB), prime2 | ||||
| 	MOVQ ·primes+24(SB), prime4 | ||||
|  | ||||
| 	// Load slice. | ||||
| 	MOVQ b_base+0(FP), SI | ||||
| 	MOVQ b_len+8(FP), DX | ||||
| 	LEAQ (SI)(DX*1), BX | ||||
| 	MOVQ b_base+0(FP), p | ||||
| 	MOVQ b_len+8(FP), n | ||||
| 	LEAQ (p)(n*1), end | ||||
|  | ||||
| 	// The first loop limit will be len(b)-32. | ||||
| 	SUBQ $32, BX | ||||
| 	SUBQ $32, end | ||||
|  | ||||
| 	// Check whether we have at least one block. | ||||
| 	CMPQ DX, $32 | ||||
| 	CMPQ n, $32 | ||||
| 	JLT  noBlocks | ||||
|  | ||||
| 	// Set up initial state (v1, v2, v3, v4). | ||||
| 	MOVQ R13, R8 | ||||
| 	ADDQ R14, R8 | ||||
| 	MOVQ R14, R9 | ||||
| 	XORQ R10, R10 | ||||
| 	XORQ R11, R11 | ||||
| 	SUBQ R13, R11 | ||||
| 	MOVQ prime1, v1 | ||||
| 	ADDQ prime2, v1 | ||||
| 	MOVQ prime2, v2 | ||||
| 	XORQ v3, v3 | ||||
| 	XORQ v4, v4 | ||||
| 	SUBQ prime1, v4 | ||||
|  | ||||
| 	// Loop until SI > BX. | ||||
| blockLoop: | ||||
| 	round(R8) | ||||
| 	round(R9) | ||||
| 	round(R10) | ||||
| 	round(R11) | ||||
| 	blockLoop() | ||||
|  | ||||
| 	CMPQ SI, BX | ||||
| 	JLE  blockLoop | ||||
| 	MOVQ v1, h | ||||
| 	ROLQ $1, h | ||||
| 	MOVQ v2, x | ||||
| 	ROLQ $7, x | ||||
| 	ADDQ x, h | ||||
| 	MOVQ v3, x | ||||
| 	ROLQ $12, x | ||||
| 	ADDQ x, h | ||||
| 	MOVQ v4, x | ||||
| 	ROLQ $18, x | ||||
| 	ADDQ x, h | ||||
|  | ||||
| 	MOVQ R8, AX | ||||
| 	ROLQ $1, AX | ||||
| 	MOVQ R9, R12 | ||||
| 	ROLQ $7, R12 | ||||
| 	ADDQ R12, AX | ||||
| 	MOVQ R10, R12 | ||||
| 	ROLQ $12, R12 | ||||
| 	ADDQ R12, AX | ||||
| 	MOVQ R11, R12 | ||||
| 	ROLQ $18, R12 | ||||
| 	ADDQ R12, AX | ||||
|  | ||||
| 	mergeRound(AX, R8) | ||||
| 	mergeRound(AX, R9) | ||||
| 	mergeRound(AX, R10) | ||||
| 	mergeRound(AX, R11) | ||||
| 	mergeRound(h, v1) | ||||
| 	mergeRound(h, v2) | ||||
| 	mergeRound(h, v3) | ||||
| 	mergeRound(h, v4) | ||||
|  | ||||
| 	JMP afterBlocks | ||||
|  | ||||
| noBlocks: | ||||
| 	MOVQ ·prime5v(SB), AX | ||||
| 	MOVQ ·primes+32(SB), h | ||||
|  | ||||
| afterBlocks: | ||||
| 	ADDQ DX, AX | ||||
| 	ADDQ n, h | ||||
|  | ||||
| 	// Right now BX has len(b)-32, and we want to loop until SI > len(b)-8. | ||||
| 	ADDQ $24, BX | ||||
| 	ADDQ $24, end | ||||
| 	CMPQ p, end | ||||
| 	JG   try4 | ||||
|  | ||||
| 	CMPQ SI, BX | ||||
| 	JG   fourByte | ||||
| loop8: | ||||
| 	MOVQ  (p), x | ||||
| 	ADDQ  $8, p | ||||
| 	round0(x) | ||||
| 	XORQ  x, h | ||||
| 	ROLQ  $27, h | ||||
| 	IMULQ prime1, h | ||||
| 	ADDQ  prime4, h | ||||
|  | ||||
| wordLoop: | ||||
| 	// Calculate k1. | ||||
| 	MOVQ  (SI), R8 | ||||
| 	ADDQ  $8, SI | ||||
| 	IMULQ R14, R8 | ||||
| 	ROLQ  $31, R8 | ||||
| 	IMULQ R13, R8 | ||||
| 	CMPQ p, end | ||||
| 	JLE  loop8 | ||||
|  | ||||
| 	XORQ  R8, AX | ||||
| 	ROLQ  $27, AX | ||||
| 	IMULQ R13, AX | ||||
| 	ADDQ  DI, AX | ||||
| try4: | ||||
| 	ADDQ $4, end | ||||
| 	CMPQ p, end | ||||
| 	JG   try1 | ||||
|  | ||||
| 	CMPQ SI, BX | ||||
| 	JLE  wordLoop | ||||
| 	MOVL  (p), x | ||||
| 	ADDQ  $4, p | ||||
| 	IMULQ prime1, x | ||||
| 	XORQ  x, h | ||||
|  | ||||
| fourByte: | ||||
| 	ADDQ $4, BX | ||||
| 	CMPQ SI, BX | ||||
| 	JG   singles | ||||
| 	ROLQ  $23, h | ||||
| 	IMULQ prime2, h | ||||
| 	ADDQ  ·primes+16(SB), h | ||||
|  | ||||
| 	MOVL  (SI), R8 | ||||
| 	ADDQ  $4, SI | ||||
| 	IMULQ R13, R8 | ||||
| 	XORQ  R8, AX | ||||
|  | ||||
| 	ROLQ  $23, AX | ||||
| 	IMULQ R14, AX | ||||
| 	ADDQ  ·prime3v(SB), AX | ||||
|  | ||||
| singles: | ||||
| 	ADDQ $4, BX | ||||
| 	CMPQ SI, BX | ||||
| try1: | ||||
| 	ADDQ $4, end | ||||
| 	CMPQ p, end | ||||
| 	JGE  finalize | ||||
|  | ||||
| singlesLoop: | ||||
| 	MOVBQZX (SI), R12 | ||||
| 	ADDQ    $1, SI | ||||
| 	IMULQ   ·prime5v(SB), R12 | ||||
| 	XORQ    R12, AX | ||||
| loop1: | ||||
| 	MOVBQZX (p), x | ||||
| 	ADDQ    $1, p | ||||
| 	IMULQ   ·primes+32(SB), x | ||||
| 	XORQ    x, h | ||||
| 	ROLQ    $11, h | ||||
| 	IMULQ   prime1, h | ||||
|  | ||||
| 	ROLQ  $11, AX | ||||
| 	IMULQ R13, AX | ||||
|  | ||||
| 	CMPQ SI, BX | ||||
| 	JL   singlesLoop | ||||
| 	CMPQ p, end | ||||
| 	JL   loop1 | ||||
|  | ||||
| finalize: | ||||
| 	MOVQ  AX, R12 | ||||
| 	SHRQ  $33, R12 | ||||
| 	XORQ  R12, AX | ||||
| 	IMULQ R14, AX | ||||
| 	MOVQ  AX, R12 | ||||
| 	SHRQ  $29, R12 | ||||
| 	XORQ  R12, AX | ||||
| 	IMULQ ·prime3v(SB), AX | ||||
| 	MOVQ  AX, R12 | ||||
| 	SHRQ  $32, R12 | ||||
| 	XORQ  R12, AX | ||||
| 	MOVQ  h, x | ||||
| 	SHRQ  $33, x | ||||
| 	XORQ  x, h | ||||
| 	IMULQ prime2, h | ||||
| 	MOVQ  h, x | ||||
| 	SHRQ  $29, x | ||||
| 	XORQ  x, h | ||||
| 	IMULQ ·primes+16(SB), h | ||||
| 	MOVQ  h, x | ||||
| 	SHRQ  $32, x | ||||
| 	XORQ  x, h | ||||
|  | ||||
| 	MOVQ AX, ret+24(FP) | ||||
| 	MOVQ h, ret+24(FP) | ||||
| 	RET | ||||
|  | ||||
| // writeBlocks uses the same registers as above except that it uses AX to store | ||||
| // the d pointer. | ||||
|  | ||||
| // func writeBlocks(d *Digest, b []byte) int | ||||
| TEXT ·writeBlocks(SB), NOSPLIT, $0-40 | ||||
| TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40 | ||||
| 	// Load fixed primes needed for round. | ||||
| 	MOVQ ·prime1v(SB), R13 | ||||
| 	MOVQ ·prime2v(SB), R14 | ||||
| 	MOVQ ·primes+0(SB), prime1 | ||||
| 	MOVQ ·primes+8(SB), prime2 | ||||
|  | ||||
| 	// Load slice. | ||||
| 	MOVQ b_base+8(FP), SI | ||||
| 	MOVQ b_len+16(FP), DX | ||||
| 	LEAQ (SI)(DX*1), BX | ||||
| 	SUBQ $32, BX | ||||
| 	MOVQ b_base+8(FP), p | ||||
| 	MOVQ b_len+16(FP), n | ||||
| 	LEAQ (p)(n*1), end | ||||
| 	SUBQ $32, end | ||||
|  | ||||
| 	// Load vN from d. | ||||
| 	MOVQ d+0(FP), AX | ||||
| 	MOVQ 0(AX), R8   // v1 | ||||
| 	MOVQ 8(AX), R9   // v2 | ||||
| 	MOVQ 16(AX), R10 // v3 | ||||
| 	MOVQ 24(AX), R11 // v4 | ||||
| 	MOVQ s+0(FP), d | ||||
| 	MOVQ 0(d), v1 | ||||
| 	MOVQ 8(d), v2 | ||||
| 	MOVQ 16(d), v3 | ||||
| 	MOVQ 24(d), v4 | ||||
|  | ||||
| 	// We don't need to check the loop condition here; this function is | ||||
| 	// always called with at least one block of data to process. | ||||
| blockLoop: | ||||
| 	round(R8) | ||||
| 	round(R9) | ||||
| 	round(R10) | ||||
| 	round(R11) | ||||
|  | ||||
| 	CMPQ SI, BX | ||||
| 	JLE  blockLoop | ||||
| 	blockLoop() | ||||
|  | ||||
| 	// Copy vN back to d. | ||||
| 	MOVQ R8, 0(AX) | ||||
| 	MOVQ R9, 8(AX) | ||||
| 	MOVQ R10, 16(AX) | ||||
| 	MOVQ R11, 24(AX) | ||||
| 	MOVQ v1, 0(d) | ||||
| 	MOVQ v2, 8(d) | ||||
| 	MOVQ v3, 16(d) | ||||
| 	MOVQ v4, 24(d) | ||||
|  | ||||
| 	// The number of bytes written is SI minus the old base pointer. | ||||
| 	SUBQ b_base+8(FP), SI | ||||
| 	MOVQ SI, ret+32(FP) | ||||
| 	// The number of bytes written is p minus the old base pointer. | ||||
| 	SUBQ b_base+8(FP), p | ||||
| 	MOVQ p, ret+32(FP) | ||||
|  | ||||
| 	RET | ||||
|   | ||||
							
								
								
									
										122
									
								
								vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										122
									
								
								vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -1,13 +1,17 @@ | ||||
| // +build gc,!purego,!noasm | ||||
| //go:build !appengine && gc && !purego && !noasm | ||||
| // +build !appengine | ||||
| // +build gc | ||||
| // +build !purego | ||||
| // +build !noasm | ||||
|  | ||||
| #include "textflag.h" | ||||
|  | ||||
| // Register allocation. | ||||
| // Registers: | ||||
| #define digest	R1 | ||||
| #define h	R2 // Return value. | ||||
| #define p	R3 // Input pointer. | ||||
| #define len	R4 | ||||
| #define nblocks	R5 // len / 32. | ||||
| #define h	R2 // return value | ||||
| #define p	R3 // input pointer | ||||
| #define n	R4 // input length | ||||
| #define nblocks	R5 // n / 32 | ||||
| #define prime1	R7 | ||||
| #define prime2	R8 | ||||
| #define prime3	R9 | ||||
| @@ -25,60 +29,52 @@ | ||||
| #define round(acc, x) \ | ||||
| 	MADD prime2, acc, x, acc \ | ||||
| 	ROR  $64-31, acc         \ | ||||
| 	MUL  prime1, acc         \ | ||||
| 	MUL  prime1, acc | ||||
|  | ||||
| // x = round(0, x). | ||||
| // round0 performs the operation x = round(0, x). | ||||
| #define round0(x) \ | ||||
| 	MUL prime2, x \ | ||||
| 	ROR $64-31, x \ | ||||
| 	MUL prime1, x \ | ||||
| 	MUL prime1, x | ||||
|  | ||||
| #define mergeRound(x) \ | ||||
| #define mergeRound(acc, x) \ | ||||
| 	round0(x)                     \ | ||||
| 	EOR  x, h                 \ | ||||
| 	MADD h, prime4, prime1, h \ | ||||
| 	EOR  x, acc                   \ | ||||
| 	MADD acc, prime4, prime1, acc | ||||
|  | ||||
| // Update v[1-4] with 32-byte blocks. Assumes len >= 32. | ||||
| #define blocksLoop() \ | ||||
| 	LSR     $5, len, nblocks \ | ||||
| // blockLoop processes as many 32-byte blocks as possible, | ||||
| // updating v1, v2, v3, and v4. It assumes that n >= 32. | ||||
| #define blockLoop() \ | ||||
| 	LSR     $5, n, nblocks  \ | ||||
| 	PCALIGN $16             \ | ||||
| 	loop:                   \ | ||||
| 	LDP.P   32(p), (x1, x2)  \ | ||||
| 	LDP.P   16(p), (x1, x2) \ | ||||
| 	LDP.P   16(p), (x3, x4) \ | ||||
| 	round(v1, x1)           \ | ||||
| 	LDP     -16(p), (x3, x4) \ | ||||
| 	round(v2, x2)           \ | ||||
| 	SUB     $1, nblocks      \ | ||||
| 	round(v3, x3)           \ | ||||
| 	round(v4, x4)           \ | ||||
| 	CBNZ    nblocks, loop    \ | ||||
|  | ||||
| // The primes are repeated here to ensure that they're stored | ||||
| // in a contiguous array, so we can load them with LDP. | ||||
| DATA primes<> +0(SB)/8, $11400714785074694791 | ||||
| DATA primes<> +8(SB)/8, $14029467366897019727 | ||||
| DATA primes<>+16(SB)/8, $1609587929392839161 | ||||
| DATA primes<>+24(SB)/8, $9650029242287828579 | ||||
| DATA primes<>+32(SB)/8, $2870177450012600261 | ||||
| GLOBL primes<>(SB), NOPTR+RODATA, $40 | ||||
| 	SUB     $1, nblocks     \ | ||||
| 	CBNZ    nblocks, loop | ||||
|  | ||||
| // func Sum64(b []byte) uint64 | ||||
| TEXT ·Sum64(SB), NOFRAME+NOSPLIT, $0-32 | ||||
| 	LDP b_base+0(FP), (p, len) | ||||
| TEXT ·Sum64(SB), NOSPLIT|NOFRAME, $0-32 | ||||
| 	LDP b_base+0(FP), (p, n) | ||||
|  | ||||
| 	LDP  primes<> +0(SB), (prime1, prime2) | ||||
| 	LDP  primes<>+16(SB), (prime3, prime4) | ||||
| 	MOVD primes<>+32(SB), prime5 | ||||
| 	LDP  ·primes+0(SB), (prime1, prime2) | ||||
| 	LDP  ·primes+16(SB), (prime3, prime4) | ||||
| 	MOVD ·primes+32(SB), prime5 | ||||
|  | ||||
| 	CMP  $32, len | ||||
| 	CSEL LO, prime5, ZR, h // if len < 32 { h = prime5 } else { h = 0 } | ||||
| 	BLO  afterLoop | ||||
| 	CMP  $32, n | ||||
| 	CSEL LT, prime5, ZR, h // if n < 32 { h = prime5 } else { h = 0 } | ||||
| 	BLT  afterLoop | ||||
|  | ||||
| 	ADD  prime1, prime2, v1 | ||||
| 	MOVD prime2, v2 | ||||
| 	MOVD $0, v3 | ||||
| 	NEG  prime1, v4 | ||||
|  | ||||
| 	blocksLoop() | ||||
| 	blockLoop() | ||||
|  | ||||
| 	ROR $64-1, v1, x1 | ||||
| 	ROR $64-7, v2, x2 | ||||
| @@ -88,71 +84,75 @@ TEXT ·Sum64(SB), NOFRAME+NOSPLIT, $0-32 | ||||
| 	ADD x3, x4 | ||||
| 	ADD x2, x4, h | ||||
|  | ||||
| 	mergeRound(v1) | ||||
| 	mergeRound(v2) | ||||
| 	mergeRound(v3) | ||||
| 	mergeRound(v4) | ||||
| 	mergeRound(h, v1) | ||||
| 	mergeRound(h, v2) | ||||
| 	mergeRound(h, v3) | ||||
| 	mergeRound(h, v4) | ||||
|  | ||||
| afterLoop: | ||||
| 	ADD len, h | ||||
| 	ADD n, h | ||||
|  | ||||
| 	TBZ   $4, len, try8 | ||||
| 	TBZ   $4, n, try8 | ||||
| 	LDP.P 16(p), (x1, x2) | ||||
|  | ||||
| 	round0(x1) | ||||
|  | ||||
| 	// NOTE: here and below, sequencing the EOR after the ROR (using a | ||||
| 	// rotated register) is worth a small but measurable speedup for small | ||||
| 	// inputs. | ||||
| 	ROR  $64-27, h | ||||
| 	EOR  x1 @> 64-27, h, h | ||||
| 	MADD h, prime4, prime1, h | ||||
|  | ||||
| 	round0(x2) | ||||
| 	ROR  $64-27, h | ||||
| 	EOR  x2 @> 64-27, h | ||||
| 	EOR  x2 @> 64-27, h, h | ||||
| 	MADD h, prime4, prime1, h | ||||
|  | ||||
| try8: | ||||
| 	TBZ    $3, len, try4 | ||||
| 	TBZ    $3, n, try4 | ||||
| 	MOVD.P 8(p), x1 | ||||
|  | ||||
| 	round0(x1) | ||||
| 	ROR  $64-27, h | ||||
| 	EOR  x1 @> 64-27, h | ||||
| 	EOR  x1 @> 64-27, h, h | ||||
| 	MADD h, prime4, prime1, h | ||||
|  | ||||
| try4: | ||||
| 	TBZ     $2, len, try2 | ||||
| 	TBZ     $2, n, try2 | ||||
| 	MOVWU.P 4(p), x2 | ||||
|  | ||||
| 	MUL  prime1, x2 | ||||
| 	ROR  $64-23, h | ||||
| 	EOR  x2 @> 64-23, h | ||||
| 	EOR  x2 @> 64-23, h, h | ||||
| 	MADD h, prime3, prime2, h | ||||
|  | ||||
| try2: | ||||
| 	TBZ     $1, len, try1 | ||||
| 	TBZ     $1, n, try1 | ||||
| 	MOVHU.P 2(p), x3 | ||||
| 	AND     $255, x3, x1 | ||||
| 	LSR     $8, x3, x2 | ||||
|  | ||||
| 	MUL prime5, x1 | ||||
| 	ROR $64-11, h | ||||
| 	EOR x1 @> 64-11, h | ||||
| 	EOR x1 @> 64-11, h, h | ||||
| 	MUL prime1, h | ||||
|  | ||||
| 	MUL prime5, x2 | ||||
| 	ROR $64-11, h | ||||
| 	EOR x2 @> 64-11, h | ||||
| 	EOR x2 @> 64-11, h, h | ||||
| 	MUL prime1, h | ||||
|  | ||||
| try1: | ||||
| 	TBZ   $0, len, end | ||||
| 	TBZ   $0, n, finalize | ||||
| 	MOVBU (p), x4 | ||||
|  | ||||
| 	MUL prime5, x4 | ||||
| 	ROR $64-11, h | ||||
| 	EOR x4 @> 64-11, h | ||||
| 	EOR x4 @> 64-11, h, h | ||||
| 	MUL prime1, h | ||||
|  | ||||
| end: | ||||
| finalize: | ||||
| 	EOR h >> 33, h | ||||
| 	MUL prime2, h | ||||
| 	EOR h >> 29, h | ||||
| @@ -163,24 +163,22 @@ end: | ||||
| 	RET | ||||
|  | ||||
| // func writeBlocks(d *Digest, b []byte) int | ||||
| // | ||||
| // Assumes len(b) >= 32. | ||||
| TEXT ·writeBlocks(SB), NOFRAME+NOSPLIT, $0-40 | ||||
| 	LDP primes<>(SB), (prime1, prime2) | ||||
| TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40 | ||||
| 	LDP ·primes+0(SB), (prime1, prime2) | ||||
|  | ||||
| 	// Load state. Assume v[1-4] are stored contiguously. | ||||
| 	MOVD d+0(FP), digest | ||||
| 	LDP  0(digest), (v1, v2) | ||||
| 	LDP  16(digest), (v3, v4) | ||||
|  | ||||
| 	LDP b_base+8(FP), (p, len) | ||||
| 	LDP b_base+8(FP), (p, n) | ||||
|  | ||||
| 	blocksLoop() | ||||
| 	blockLoop() | ||||
|  | ||||
| 	// Store updated state. | ||||
| 	STP (v1, v2), 0(digest) | ||||
| 	STP (v3, v4), 16(digest) | ||||
|  | ||||
| 	BIC  $31, len | ||||
| 	MOVD len, ret+32(FP) | ||||
| 	BIC  $31, n | ||||
| 	MOVD n, ret+32(FP) | ||||
| 	RET | ||||
|   | ||||
							
								
								
									
										2
									
								
								vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_asm.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_asm.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -13,4 +13,4 @@ package xxhash | ||||
| func Sum64(b []byte) uint64 | ||||
|  | ||||
| //go:noescape | ||||
| func writeBlocks(d *Digest, b []byte) int | ||||
| func writeBlocks(s *Digest, b []byte) int | ||||
|   | ||||
							
								
								
									
										19
									
								
								vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										19
									
								
								vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -15,10 +15,10 @@ func Sum64(b []byte) uint64 { | ||||
| 	var h uint64 | ||||
|  | ||||
| 	if n >= 32 { | ||||
| 		v1 := prime1v + prime2 | ||||
| 		v1 := primes[0] + prime2 | ||||
| 		v2 := prime2 | ||||
| 		v3 := uint64(0) | ||||
| 		v4 := -prime1v | ||||
| 		v4 := -primes[0] | ||||
| 		for len(b) >= 32 { | ||||
| 			v1 = round(v1, u64(b[0:8:len(b)])) | ||||
| 			v2 = round(v2, u64(b[8:16:len(b)])) | ||||
| @@ -37,19 +37,18 @@ func Sum64(b []byte) uint64 { | ||||
|  | ||||
| 	h += uint64(n) | ||||
|  | ||||
| 	i, end := 0, len(b) | ||||
| 	for ; i+8 <= end; i += 8 { | ||||
| 		k1 := round(0, u64(b[i:i+8:len(b)])) | ||||
| 	for ; len(b) >= 8; b = b[8:] { | ||||
| 		k1 := round(0, u64(b[:8])) | ||||
| 		h ^= k1 | ||||
| 		h = rol27(h)*prime1 + prime4 | ||||
| 	} | ||||
| 	if i+4 <= end { | ||||
| 		h ^= uint64(u32(b[i:i+4:len(b)])) * prime1 | ||||
| 	if len(b) >= 4 { | ||||
| 		h ^= uint64(u32(b[:4])) * prime1 | ||||
| 		h = rol23(h)*prime2 + prime3 | ||||
| 		i += 4 | ||||
| 		b = b[4:] | ||||
| 	} | ||||
| 	for ; i < end; i++ { | ||||
| 		h ^= uint64(b[i]) * prime5 | ||||
| 	for ; len(b) > 0; b = b[1:] { | ||||
| 		h ^= uint64(b[0]) * prime5 | ||||
| 		h = rol11(h) * prime1 | ||||
| 	} | ||||
|  | ||||
|   | ||||
							
								
								
									
										28
									
								
								vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										28
									
								
								vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -320,10 +320,6 @@ error_not_enough_literals: | ||||
| 	MOVQ $0x00000004, ret+24(FP) | ||||
| 	RET | ||||
|  | ||||
| 	// Return with not enough output space error | ||||
| 	MOVQ $0x00000005, ret+24(FP) | ||||
| 	RET | ||||
|  | ||||
| // func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int | ||||
| // Requires: CMOV | ||||
| TEXT ·sequenceDecs_decode_56_amd64(SB), $8-32 | ||||
| @@ -617,10 +613,6 @@ error_not_enough_literals: | ||||
| 	MOVQ $0x00000004, ret+24(FP) | ||||
| 	RET | ||||
|  | ||||
| 	// Return with not enough output space error | ||||
| 	MOVQ $0x00000005, ret+24(FP) | ||||
| 	RET | ||||
|  | ||||
| // func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int | ||||
| // Requires: BMI, BMI2, CMOV | ||||
| TEXT ·sequenceDecs_decode_bmi2(SB), $8-32 | ||||
| @@ -897,10 +889,6 @@ error_not_enough_literals: | ||||
| 	MOVQ $0x00000004, ret+24(FP) | ||||
| 	RET | ||||
|  | ||||
| 	// Return with not enough output space error | ||||
| 	MOVQ $0x00000005, ret+24(FP) | ||||
| 	RET | ||||
|  | ||||
| // func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int | ||||
| // Requires: BMI, BMI2, CMOV | ||||
| TEXT ·sequenceDecs_decode_56_bmi2(SB), $8-32 | ||||
| @@ -1152,10 +1140,6 @@ error_not_enough_literals: | ||||
| 	MOVQ $0x00000004, ret+24(FP) | ||||
| 	RET | ||||
|  | ||||
| 	// Return with not enough output space error | ||||
| 	MOVQ $0x00000005, ret+24(FP) | ||||
| 	RET | ||||
|  | ||||
| // func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool | ||||
| // Requires: SSE | ||||
| TEXT ·sequenceDecs_executeSimple_amd64(SB), $8-9 | ||||
| @@ -1389,8 +1373,7 @@ loop_finished: | ||||
| 	MOVQ ctx+0(FP), AX | ||||
| 	MOVQ DX, 24(AX) | ||||
| 	MOVQ DI, 104(AX) | ||||
| 	MOVQ 80(AX), CX | ||||
| 	SUBQ CX, SI | ||||
| 	SUBQ 80(AX), SI | ||||
| 	MOVQ SI, 112(AX) | ||||
| 	RET | ||||
|  | ||||
| @@ -1402,8 +1385,7 @@ error_match_off_too_big: | ||||
| 	MOVQ ctx+0(FP), AX | ||||
| 	MOVQ DX, 24(AX) | ||||
| 	MOVQ DI, 104(AX) | ||||
| 	MOVQ 80(AX), CX | ||||
| 	SUBQ CX, SI | ||||
| 	SUBQ 80(AX), SI | ||||
| 	MOVQ SI, 112(AX) | ||||
| 	RET | ||||
|  | ||||
| @@ -1747,8 +1729,7 @@ loop_finished: | ||||
| 	MOVQ ctx+0(FP), AX | ||||
| 	MOVQ DX, 24(AX) | ||||
| 	MOVQ DI, 104(AX) | ||||
| 	MOVQ 80(AX), CX | ||||
| 	SUBQ CX, SI | ||||
| 	SUBQ 80(AX), SI | ||||
| 	MOVQ SI, 112(AX) | ||||
| 	RET | ||||
|  | ||||
| @@ -1760,8 +1741,7 @@ error_match_off_too_big: | ||||
| 	MOVQ ctx+0(FP), AX | ||||
| 	MOVQ DX, 24(AX) | ||||
| 	MOVQ DI, 104(AX) | ||||
| 	MOVQ 80(AX), CX | ||||
| 	SUBQ CX, SI | ||||
| 	SUBQ 80(AX), SI | ||||
| 	MOVQ SI, 112(AX) | ||||
| 	RET | ||||
|  | ||||
|   | ||||
							
								
								
									
										31
									
								
								vendor/github.com/klauspost/compress/zstd/zstd.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										31
									
								
								vendor/github.com/klauspost/compress/zstd/zstd.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -36,9 +36,6 @@ const forcePreDef = false | ||||
| // zstdMinMatch is the minimum zstd match length. | ||||
| const zstdMinMatch = 3 | ||||
|  | ||||
| // Reset the buffer offset when reaching this. | ||||
| const bufferReset = math.MaxInt32 - MaxWindowSize | ||||
|  | ||||
| // fcsUnknown is used for unknown frame content size. | ||||
| const fcsUnknown = math.MaxUint64 | ||||
|  | ||||
| @@ -75,7 +72,6 @@ var ( | ||||
| 	ErrDecoderSizeExceeded = errors.New("decompressed size exceeds configured limit") | ||||
|  | ||||
| 	// ErrUnknownDictionary is returned if the dictionary ID is unknown. | ||||
| 	// For the time being dictionaries are not supported. | ||||
| 	ErrUnknownDictionary = errors.New("unknown dictionary") | ||||
|  | ||||
| 	// ErrFrameSizeExceeded is returned if the stated frame size is exceeded. | ||||
| @@ -110,26 +106,25 @@ func printf(format string, a ...interface{}) { | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // matchLen returns the maximum length. | ||||
| // matchLen returns the maximum common prefix length of a and b. | ||||
| // a must be the shortest of the two. | ||||
| // The function also returns whether all bytes matched. | ||||
| func matchLen(a, b []byte) int { | ||||
| 	b = b[:len(a)] | ||||
| 	for i := 0; i < len(a)-7; i += 8 { | ||||
| 		if diff := load64(a, i) ^ load64(b, i); diff != 0 { | ||||
| 			return i + (bits.TrailingZeros64(diff) >> 3) | ||||
| func matchLen(a, b []byte) (n int) { | ||||
| 	for ; len(a) >= 8 && len(b) >= 8; a, b = a[8:], b[8:] { | ||||
| 		diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b) | ||||
| 		if diff != 0 { | ||||
| 			return n + bits.TrailingZeros64(diff)>>3 | ||||
| 		} | ||||
| 		n += 8 | ||||
| 	} | ||||
|  | ||||
| 	checked := (len(a) >> 3) << 3 | ||||
| 	a = a[checked:] | ||||
| 	b = b[checked:] | ||||
| 	for i := range a { | ||||
| 		if a[i] != b[i] { | ||||
| 			return i + checked | ||||
| 			break | ||||
| 		} | ||||
| 		n++ | ||||
| 	} | ||||
| 	return len(a) + checked | ||||
| 	return n | ||||
|  | ||||
| } | ||||
|  | ||||
| func load3232(b []byte, i int32) uint32 { | ||||
| @@ -140,10 +135,6 @@ func load6432(b []byte, i int32) uint64 { | ||||
| 	return binary.LittleEndian.Uint64(b[i:]) | ||||
| } | ||||
|  | ||||
| func load64(b []byte, i int) uint64 { | ||||
| 	return binary.LittleEndian.Uint64(b[i:]) | ||||
| } | ||||
|  | ||||
| type byter interface { | ||||
| 	Bytes() []byte | ||||
| 	Len() int | ||||
|   | ||||
							
								
								
									
										4
									
								
								vendor/modules.txt
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								vendor/modules.txt
									
									
									
									
										vendored
									
									
								
							| @@ -284,8 +284,8 @@ github.com/intel/goresctrl/pkg/utils | ||||
| # github.com/json-iterator/go v1.1.12 | ||||
| ## explicit; go 1.12 | ||||
| github.com/json-iterator/go | ||||
| # github.com/klauspost/compress v1.15.11 | ||||
| ## explicit; go 1.17 | ||||
| # github.com/klauspost/compress v1.16.0 | ||||
| ## explicit; go 1.18 | ||||
| github.com/klauspost/compress | ||||
| github.com/klauspost/compress/fse | ||||
| github.com/klauspost/compress/huff0 | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Akihiro Suda
					Akihiro Suda