Upgrade github.com/klauspost/compress from v1.11.13 to v1.15.9

The package has multiple improvements and bug fixes.

Signed-off-by: Kazuyoshi Kato <katokazu@amazon.com>
This commit is contained in:
Kazuyoshi Kato
2022-08-25 22:15:41 +00:00
parent 765351ac4d
commit d063597e80
82 changed files with 10616 additions and 3974 deletions

View File

@@ -16,8 +16,7 @@ Currently the package is heavily optimized for 64 bit processors and will be sig
Install using `go get -u github.com/klauspost/compress`. The package is located in `github.com/klauspost/compress/zstd`.
Godoc Documentation: https://godoc.org/github.com/klauspost/compress/zstd
[![Go Reference](https://pkg.go.dev/badge/github.com/klauspost/compress/zstd.svg)](https://pkg.go.dev/github.com/klauspost/compress/zstd)
## Compressor
@@ -79,6 +78,9 @@ of a stream. This is independent of the `WithEncoderConcurrency(n)`, but that is
in the future. So if you want to limit concurrency for future updates, specify the concurrency
you would like.
If you would like stream encoding to be done without spawning async goroutines, use `WithEncoderConcurrency(1)`
which will compress input as each block is completed, blocking on writes until each has completed.
You can specify your desired compression level using `WithEncoderLevel()` option. Currently only pre-defined
compression settings can be specified.
@@ -105,7 +107,8 @@ and seems to ignore concatenated streams, even though [it is part of the spec](h
For compressing small blocks, the returned encoder has a function called `EncodeAll(src, dst []byte) []byte`.
`EncodeAll` will encode all input in src and append it to dst.
This function can be called concurrently, but each call will only run on a single goroutine.
This function can be called concurrently.
Each call will only run on a same goroutine as the caller.
Encoded blocks can be concatenated and the result will be the combined input stream.
Data compressed with EncodeAll can be decoded with the Decoder, using either a stream or `DecodeAll`.
@@ -150,10 +153,10 @@ http://sun.aei.polsl.pl/~sdeor/corpus/silesia.zip
This package:
file out level insize outsize millis mb/s
silesia.tar zskp 1 211947520 73101992 643 313.87
silesia.tar zskp 2 211947520 67504318 969 208.38
silesia.tar zskp 3 211947520 65177448 1899 106.44
silesia.tar zskp 4 211947520 61381950 8115 24.91
silesia.tar zskp 1 211947520 73821326 634 318.47
silesia.tar zskp 2 211947520 67655404 1508 133.96
silesia.tar zskp 3 211947520 64746933 3000 67.37
silesia.tar zskp 4 211947520 60073508 16926 11.94
cgo zstd:
silesia.tar zstd 1 211947520 73605392 543 371.56
@@ -162,84 +165,94 @@ silesia.tar zstd 6 211947520 62916450 1913 105.66
silesia.tar zstd 9 211947520 60212393 5063 39.92
gzip, stdlib/this package:
silesia.tar gzstd 1 211947520 80007735 1654 122.21
silesia.tar gzkp 1 211947520 80369488 1168 173.06
silesia.tar gzstd 1 211947520 80007735 1498 134.87
silesia.tar gzkp 1 211947520 80088272 1009 200.31
GOB stream of binary data. Highly compressible.
https://files.klauspost.com/compress/gob-stream.7z
file out level insize outsize millis mb/s
gob-stream zskp 1 1911399616 235022249 3088 590.30
gob-stream zskp 2 1911399616 205669791 3786 481.34
gob-stream zskp 3 1911399616 185792019 9324 195.48
gob-stream zskp 4 1911399616 171537212 32113 56.76
gob-stream zskp 1 1911399616 233948096 3230 564.34
gob-stream zskp 2 1911399616 203997694 4997 364.73
gob-stream zskp 3 1911399616 173526523 13435 135.68
gob-stream zskp 4 1911399616 162195235 47559 38.33
gob-stream zstd 1 1911399616 249810424 2637 691.26
gob-stream zstd 3 1911399616 208192146 3490 522.31
gob-stream zstd 6 1911399616 193632038 6687 272.56
gob-stream zstd 9 1911399616 177620386 16175 112.70
gob-stream gzstd 1 1911399616 357382641 10251 177.82
gob-stream gzkp 1 1911399616 362156523 5695 320.08
gob-stream gzstd 1 1911399616 357382013 9046 201.49
gob-stream gzkp 1 1911399616 359136669 4885 373.08
The test data for the Large Text Compression Benchmark is the first
10^9 bytes of the English Wikipedia dump on Mar. 3, 2006.
http://mattmahoney.net/dc/textdata.html
file out level insize outsize millis mb/s
enwik9 zskp 1 1000000000 343848582 3609 264.18
enwik9 zskp 2 1000000000 317276632 5746 165.97
enwik9 zskp 3 1000000000 294540704 11725 81.34
enwik9 zskp 4 1000000000 276609671 44029 21.66
enwik9 zskp 1 1000000000 343833605 3687 258.64
enwik9 zskp 2 1000000000 317001237 7672 124.29
enwik9 zskp 3 1000000000 291915823 15923 59.89
enwik9 zskp 4 1000000000 261710291 77697 12.27
enwik9 zstd 1 1000000000 358072021 3110 306.65
enwik9 zstd 3 1000000000 313734672 4784 199.35
enwik9 zstd 6 1000000000 295138875 10290 92.68
enwik9 zstd 9 1000000000 278348700 28549 33.40
enwik9 gzstd 1 1000000000 382578136 9604 99.30
enwik9 gzkp 1 1000000000 383825945 6544 145.73
enwik9 gzstd 1 1000000000 382578136 8608 110.78
enwik9 gzkp 1 1000000000 382781160 5628 169.45
Highly compressible JSON file.
https://files.klauspost.com/compress/github-june-2days-2019.json.zst
file out level insize outsize millis mb/s
github-june-2days-2019.json zskp 1 6273951764 699045015 10620 563.40
github-june-2days-2019.json zskp 2 6273951764 617881763 11687 511.96
github-june-2days-2019.json zskp 3 6273951764 537511906 29252 204.54
github-june-2days-2019.json zskp 4 6273951764 512796117 97791 61.18
github-june-2days-2019.json zskp 1 6273951764 697439532 9789 611.17
github-june-2days-2019.json zskp 2 6273951764 610876538 18553 322.49
github-june-2days-2019.json zskp 3 6273951764 517662858 44186 135.41
github-june-2days-2019.json zskp 4 6273951764 464617114 165373 36.18
github-june-2days-2019.json zstd 1 6273951764 766284037 8450 708.00
github-june-2days-2019.json zstd 3 6273951764 661889476 10927 547.57
github-june-2days-2019.json zstd 6 6273951764 642756859 22996 260.18
github-june-2days-2019.json zstd 9 6273951764 601974523 52413 114.16
github-june-2days-2019.json gzstd 1 6273951764 1164400847 29948 199.79
github-june-2days-2019.json gzkp 1 6273951764 1128755542 19236 311.03
github-june-2days-2019.json gzstd 1 6273951764 1164397768 26793 223.32
github-june-2days-2019.json gzkp 1 6273951764 1120631856 17693 338.16
VM Image, Linux mint with a few installed applications:
https://files.klauspost.com/compress/rawstudio-mint14.7z
file out level insize outsize millis mb/s
rawstudio-mint14.tar zskp 1 8558382592 3667489370 20210 403.84
rawstudio-mint14.tar zskp 2 8558382592 3364592300 31873 256.07
rawstudio-mint14.tar zskp 3 8558382592 3224594213 71751 113.75
rawstudio-mint14.tar zskp 4 8558382592 3027332295 486243 16.79
rawstudio-mint14.tar zskp 1 8558382592 3718400221 18206 448.29
rawstudio-mint14.tar zskp 2 8558382592 3326118337 37074 220.15
rawstudio-mint14.tar zskp 3 8558382592 3163842361 87306 93.49
rawstudio-mint14.tar zskp 4 8558382592 2970480650 783862 10.41
rawstudio-mint14.tar zstd 1 8558382592 3609250104 17136 476.27
rawstudio-mint14.tar zstd 3 8558382592 3341679997 29262 278.92
rawstudio-mint14.tar zstd 6 8558382592 3235846406 77904 104.77
rawstudio-mint14.tar zstd 9 8558382592 3160778861 140946 57.91
rawstudio-mint14.tar gzstd 1 8558382592 3926257486 57722 141.40
rawstudio-mint14.tar gzkp 1 8558382592 3970463184 41749 195.49
rawstudio-mint14.tar gzstd 1 8558382592 3926234992 51345 158.96
rawstudio-mint14.tar gzkp 1 8558382592 3960117298 36722 222.26
CSV data:
https://files.klauspost.com/compress/nyc-taxi-data-10M.csv.zst
file out level insize outsize millis mb/s
nyc-taxi-data-10M.csv zskp 1 3325605752 641339945 8925 355.35
nyc-taxi-data-10M.csv zskp 2 3325605752 591748091 11268 281.44
nyc-taxi-data-10M.csv zskp 3 3325605752 538490114 19880 159.53
nyc-taxi-data-10M.csv zskp 4 3325605752 495986829 89368 35.49
nyc-taxi-data-10M.csv zskp 1 3325605752 641319332 9462 335.17
nyc-taxi-data-10M.csv zskp 2 3325605752 588976126 17570 180.50
nyc-taxi-data-10M.csv zskp 3 3325605752 529329260 32432 97.79
nyc-taxi-data-10M.csv zskp 4 3325605752 474949772 138025 22.98
nyc-taxi-data-10M.csv zstd 1 3325605752 687399637 8233 385.18
nyc-taxi-data-10M.csv zstd 3 3325605752 598514411 10065 315.07
nyc-taxi-data-10M.csv zstd 6 3325605752 570522953 20038 158.27
nyc-taxi-data-10M.csv zstd 9 3325605752 517554797 64565 49.12
nyc-taxi-data-10M.csv gzstd 1 3325605752 928656485 23876 132.83
nyc-taxi-data-10M.csv gzkp 1 3325605752 924718719 16388 193.53
nyc-taxi-data-10M.csv gzstd 1 3325605752 928654908 21270 149.11
nyc-taxi-data-10M.csv gzkp 1 3325605752 922273214 13929 227.68
```
## Decompressor
@@ -274,8 +287,13 @@ func Decompress(in io.Reader, out io.Writer) error {
}
```
It is important to use the "Close" function when you no longer need the Reader to stop running goroutines.
See "Allocation-less operation" below.
It is important to use the "Close" function when you no longer need the Reader to stop running goroutines,
when running with default settings.
Goroutines will exit once an error has been returned, including `io.EOF` at the end of a stream.
Streams are decoded concurrently in 4 asynchronous stages to give the best possible throughput.
However, if you prefer synchronous decompression, use `WithDecoderConcurrency(1)` which will decompress data
as it is being requested only.
For decoding buffers, it could look something like this:
@@ -284,7 +302,7 @@ import "github.com/klauspost/compress/zstd"
// Create a reader that caches decompressors.
// For this operation type we supply a nil Reader.
var decoder, _ = zstd.NewReader(nil)
var decoder, _ = zstd.NewReader(nil, WithDecoderConcurrency(0))
// Decompress a buffer. We don't supply a destination buffer,
// so it will be allocated by the decoder.
@@ -294,9 +312,12 @@ func Decompress(src []byte) ([]byte, error) {
```
Both of these cases should provide the functionality needed.
The decoder can be used for *concurrent* decompression of multiple buffers.
The decoder can be used for *concurrent* decompression of multiple buffers.
By default 4 decompressors will be created.
It will only allow a certain number of concurrent operations to run.
To tweak that yourself use the `WithDecoderConcurrency(n)` option when creating the decoder.
To tweak that yourself use the `WithDecoderConcurrency(n)` option when creating the decoder.
It is possible to use `WithDecoderConcurrency(0)` to create GOMAXPROCS decoders.
### Dictionaries
@@ -348,70 +369,71 @@ In this case no unneeded allocations should be made.
The buffer decoder does everything on the same goroutine and does nothing concurrently.
It can however decode several buffers concurrently. Use `WithDecoderConcurrency(n)` to limit that.
The stream decoder operates on
The stream decoder will create goroutines that:
* One goroutine reads input and splits the input to several block decoders.
* A number of decoders will decode blocks.
* A goroutine coordinates these blocks and sends history from one to the next.
1) Reads input and splits the input into blocks.
2) Decompression of literals.
3) Decompression of sequences.
4) Reconstruction of output stream.
So effectively this also means the decoder will "read ahead" and prepare data to always be available for output.
The concurrency level will, for streams, determine how many blocks ahead the compression will start.
Since "blocks" are quite dependent on the output of the previous block stream decoding will only have limited concurrency.
In practice this means that concurrency is often limited to utilizing about 2 cores effectively.
In practice this means that concurrency is often limited to utilizing about 3 cores effectively.
### Benchmarks
These are some examples of performance compared to [datadog cgo library](https://github.com/DataDog/zstd).
The first two are streaming decodes and the last are smaller inputs.
Running on AMD Ryzen 9 3950X 16-Core Processor. AMD64 assembly used.
```
BenchmarkDecoderSilesia-8 3 385000067 ns/op 550.51 MB/s 5498 B/op 8 allocs/op
BenchmarkDecoderSilesiaCgo-8 6 197666567 ns/op 1072.25 MB/s 270672 B/op 8 allocs/op
BenchmarkDecoderSilesia-32 5 206878840 ns/op 1024.50 MB/s 49808 B/op 43 allocs/op
BenchmarkDecoderEnwik9-32 1 1271809000 ns/op 786.28 MB/s 72048 B/op 52 allocs/op
BenchmarkDecoderEnwik9-8 1 2027001600 ns/op 493.34 MB/s 10496 B/op 18 allocs/op
BenchmarkDecoderEnwik9Cgo-8 2 979499200 ns/op 1020.93 MB/s 270672 B/op 8 allocs/op
Concurrent blocks, performance:
Concurrent performance:
BenchmarkDecoder_DecodeAllParallel/kppkn.gtb.zst-16 28915 42469 ns/op 4340.07 MB/s 114 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/geo.protodata.zst-16 116505 9965 ns/op 11900.16 MB/s 16 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/plrabn12.txt.zst-16 8952 134272 ns/op 3588.70 MB/s 915 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/lcet10.txt.zst-16 11820 102538 ns/op 4161.90 MB/s 594 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/asyoulik.txt.zst-16 34782 34184 ns/op 3661.88 MB/s 60 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/alice29.txt.zst-16 27712 43447 ns/op 3500.58 MB/s 99 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/html_x_4.zst-16 62826 18750 ns/op 21845.10 MB/s 104 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/paper-100k.pdf.zst-16 631545 1794 ns/op 57078.74 MB/s 2 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/fireworks.jpeg.zst-16 1690140 712 ns/op 172938.13 MB/s 1 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/urls.10K.zst-16 10432 113593 ns/op 6180.73 MB/s 1143 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/html.zst-16 113206 10671 ns/op 9596.27 MB/s 15 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/comp-data.bin.zst-16 1530615 779 ns/op 5229.49 MB/s 0 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallelCgo/kppkn.gtb.zst-16 65217 16192 ns/op 11383.34 MB/s 46 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallelCgo/geo.protodata.zst-16 292671 4039 ns/op 29363.19 MB/s 6 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallelCgo/plrabn12.txt.zst-16 26314 46021 ns/op 10470.43 MB/s 293 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallelCgo/lcet10.txt.zst-16 33897 34900 ns/op 12227.96 MB/s 205 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallelCgo/asyoulik.txt.zst-16 104348 11433 ns/op 10949.01 MB/s 20 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallelCgo/alice29.txt.zst-16 75949 15510 ns/op 9805.60 MB/s 32 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallelCgo/html_x_4.zst-16 173910 6756 ns/op 60624.29 MB/s 37 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallelCgo/paper-100k.pdf.zst-16 923076 1339 ns/op 76474.87 MB/s 1 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallelCgo/fireworks.jpeg.zst-16 922920 1351 ns/op 91102.57 MB/s 2 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallelCgo/urls.10K.zst-16 27649 43618 ns/op 16096.19 MB/s 407 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallelCgo/html.zst-16 279073 4160 ns/op 24614.18 MB/s 6 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallelCgo/comp-data.bin.zst-16 749938 1579 ns/op 2581.71 MB/s 0 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/kppkn.gtb.zst-32 67356 17857 ns/op 10321.96 MB/s 22.48 pct 102 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/geo.protodata.zst-32 266656 4421 ns/op 26823.21 MB/s 11.89 pct 19 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/plrabn12.txt.zst-32 20992 56842 ns/op 8477.17 MB/s 39.90 pct 754 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/lcet10.txt.zst-32 27456 43932 ns/op 9714.01 MB/s 33.27 pct 524 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/asyoulik.txt.zst-32 78432 15047 ns/op 8319.15 MB/s 40.34 pct 66 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/alice29.txt.zst-32 65800 18436 ns/op 8249.63 MB/s 37.75 pct 88 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/html_x_4.zst-32 102993 11523 ns/op 35546.09 MB/s 3.637 pct 143 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/paper-100k.pdf.zst-32 1000000 1070 ns/op 95720.98 MB/s 80.53 pct 3 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/fireworks.jpeg.zst-32 749802 1752 ns/op 70272.35 MB/s 100.0 pct 5 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/urls.10K.zst-32 22640 52934 ns/op 13263.37 MB/s 26.25 pct 1014 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/html.zst-32 226412 5232 ns/op 19572.27 MB/s 14.49 pct 20 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/comp-data.bin.zst-32 923041 1276 ns/op 3194.71 MB/s 31.26 pct 0 B/op 0 allocs/op
```
This reflects the performance around May 2020, but this may be out of date.
This reflects the performance around May 2022, but this may be out of date.
## Zstd inside ZIP files
It is possible to use zstandard to compress individual files inside zip archives.
While this isn't widely supported it can be useful for internal files.
To support the compression and decompression of these files you must register a compressor and decompressor.
It is highly recommended registering the (de)compressors on individual zip Reader/Writer and NOT
use the global registration functions. The main reason for this is that 2 registrations from
different packages will result in a panic.
It is a good idea to only have a single compressor and decompressor, since they can be used for multiple zip
files concurrently, and using a single instance will allow reusing some resources.
See [this example](https://pkg.go.dev/github.com/klauspost/compress/zstd#example-ZipCompressor) for
how to compress and decompress files inside zip archives.
# Contributions
Contributions are always welcome.
For new features/fixes, remember to add tests and for performance enhancements include benchmarks.
For sending files for reproducing errors use a service like [goobox](https://goobox.io/#/upload) or similar to share your files.
For general feedback and experience reports, feel free to open an issue or write me on [Twitter](https://twitter.com/sh0dan).
This package includes the excellent [`github.com/cespare/xxhash`](https://github.com/cespare/xxhash) package Copyright (c) 2016 Caleb Spare.

View File

@@ -7,6 +7,7 @@ package zstd
import (
"encoding/binary"
"errors"
"fmt"
"io"
"math/bits"
)
@@ -50,16 +51,16 @@ func (b *bitReader) getBits(n uint8) int {
if n == 0 /*|| b.bitsRead >= 64 */ {
return 0
}
return b.getBitsFast(n)
return int(b.get32BitsFast(n))
}
// getBitsFast requires that at least one bit is requested every time.
// get32BitsFast requires that at least one bit is requested every time.
// There are no checks if the buffer is filled.
func (b *bitReader) getBitsFast(n uint8) int {
func (b *bitReader) get32BitsFast(n uint8) uint32 {
const regMask = 64 - 1
v := uint32((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask))
b.bitsRead += n
return int(v)
return v
}
// fillFast() will make sure at least 32 bits are available.
@@ -125,6 +126,9 @@ func (b *bitReader) remain() uint {
func (b *bitReader) close() error {
// Release reference.
b.in = nil
if !b.finished() {
return fmt.Errorf("%d extra bits on block, should be 0", b.remain())
}
if b.bitsRead > 64 {
return io.ErrUnexpectedEOF
}

View File

@@ -5,8 +5,6 @@
package zstd
import "fmt"
// bitWriter will write bits.
// First bit will be LSB of the first byte of output.
type bitWriter struct {
@@ -38,7 +36,7 @@ func (b *bitWriter) addBits16NC(value uint16, bits uint8) {
b.nBits += bits
}
// addBits32NC will add up to 32 bits.
// addBits32NC will add up to 31 bits.
// It will not check if there is space for them,
// so the caller must ensure that it has flushed recently.
func (b *bitWriter) addBits32NC(value uint32, bits uint8) {
@@ -46,6 +44,26 @@ func (b *bitWriter) addBits32NC(value uint32, bits uint8) {
b.nBits += bits
}
// addBits64NC will add up to 64 bits.
// There must be space for 32 bits.
func (b *bitWriter) addBits64NC(value uint64, bits uint8) {
if bits <= 31 {
b.addBits32Clean(uint32(value), bits)
return
}
b.addBits32Clean(uint32(value), 32)
b.flush32()
b.addBits32Clean(uint32(value>>32), bits-32)
}
// addBits32Clean will add up to 32 bits.
// It will not check if there is space for them.
// The input must not contain more bits than specified.
func (b *bitWriter) addBits32Clean(value uint32, bits uint8) {
b.bitContainer |= uint64(value) << (b.nBits & 63)
b.nBits += bits
}
// addBits16Clean will add up to 16 bits. value may not contain more set bits than indicated.
// It will not check if there is space for them, so the caller must ensure that it has flushed recently.
func (b *bitWriter) addBits16Clean(value uint16, bits uint8) {
@@ -53,80 +71,6 @@ func (b *bitWriter) addBits16Clean(value uint16, bits uint8) {
b.nBits += bits
}
// flush will flush all pending full bytes.
// There will be at least 56 bits available for writing when this has been called.
// Using flush32 is faster, but leaves less space for writing.
func (b *bitWriter) flush() {
v := b.nBits >> 3
switch v {
case 0:
case 1:
b.out = append(b.out,
byte(b.bitContainer),
)
case 2:
b.out = append(b.out,
byte(b.bitContainer),
byte(b.bitContainer>>8),
)
case 3:
b.out = append(b.out,
byte(b.bitContainer),
byte(b.bitContainer>>8),
byte(b.bitContainer>>16),
)
case 4:
b.out = append(b.out,
byte(b.bitContainer),
byte(b.bitContainer>>8),
byte(b.bitContainer>>16),
byte(b.bitContainer>>24),
)
case 5:
b.out = append(b.out,
byte(b.bitContainer),
byte(b.bitContainer>>8),
byte(b.bitContainer>>16),
byte(b.bitContainer>>24),
byte(b.bitContainer>>32),
)
case 6:
b.out = append(b.out,
byte(b.bitContainer),
byte(b.bitContainer>>8),
byte(b.bitContainer>>16),
byte(b.bitContainer>>24),
byte(b.bitContainer>>32),
byte(b.bitContainer>>40),
)
case 7:
b.out = append(b.out,
byte(b.bitContainer),
byte(b.bitContainer>>8),
byte(b.bitContainer>>16),
byte(b.bitContainer>>24),
byte(b.bitContainer>>32),
byte(b.bitContainer>>40),
byte(b.bitContainer>>48),
)
case 8:
b.out = append(b.out,
byte(b.bitContainer),
byte(b.bitContainer>>8),
byte(b.bitContainer>>16),
byte(b.bitContainer>>24),
byte(b.bitContainer>>32),
byte(b.bitContainer>>40),
byte(b.bitContainer>>48),
byte(b.bitContainer>>56),
)
default:
panic(fmt.Errorf("bits (%d) > 64", b.nBits))
}
b.bitContainer >>= v << 3
b.nBits &= 7
}
// flush32 will flush out, so there are at least 32 bits available for writing.
func (b *bitWriter) flush32() {
if b.nBits < 32 {

View File

@@ -5,9 +5,14 @@
package zstd
import (
"bytes"
"encoding/binary"
"errors"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"sync"
"github.com/klauspost/compress/huff0"
@@ -38,14 +43,14 @@ const (
// maxCompressedBlockSize is the biggest allowed compressed block size (128KB)
maxCompressedBlockSize = 128 << 10
compressedBlockOverAlloc = 16
maxCompressedBlockSizeAlloc = 128<<10 + compressedBlockOverAlloc
// Maximum possible block size (all Raw+Uncompressed).
maxBlockSize = (1 << 21) - 1
// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#literals_section_header
maxCompressedLiteralSize = 1 << 18
maxRLELiteralSize = 1 << 20
maxMatchLen = 131074
maxSequences = 0x7f00 + 0xffff
maxMatchLen = 131074
maxSequences = 0x7f00 + 0xffff
// We support slightly less than the reference decoder to be able to
// use ints on 32 bit archs.
@@ -76,20 +81,27 @@ type blockDec struct {
// Window size of the block.
WindowSize uint64
history chan *history
input chan struct{}
result chan decodeOutput
sequenceBuf []seq
err error
decWG sync.WaitGroup
err error
// Check against this crc
checkCRC []byte
// Frame to use for singlethreaded decoding.
// Should not be used by the decoder itself since parent may be another frame.
localFrame *frameDec
sequence []seqVals
async struct {
newHist *history
literals []byte
seqData []byte
seqSize int // Size of uncompressed sequences
fcs uint64
}
// Block is RLE, this is the size.
RLESize uint32
tmp [4]byte
Type blockType
@@ -109,13 +121,8 @@ func (b *blockDec) String() string {
func newBlockDec(lowMem bool) *blockDec {
b := blockDec{
lowMem: lowMem,
result: make(chan decodeOutput, 1),
input: make(chan struct{}, 1),
history: make(chan *history, 1),
lowMem: lowMem,
}
b.decWG.Add(1)
go b.startDecoder()
return &b
}
@@ -123,44 +130,60 @@ func newBlockDec(lowMem bool) *blockDec {
// Input must be a start of a block and will be at the end of the block when returned.
func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
b.WindowSize = windowSize
tmp := br.readSmall(3)
if tmp == nil {
if debug {
println("Reading block header:", io.ErrUnexpectedEOF)
}
return io.ErrUnexpectedEOF
tmp, err := br.readSmall(3)
if err != nil {
println("Reading block header:", err)
return err
}
bh := uint32(tmp[0]) | (uint32(tmp[1]) << 8) | (uint32(tmp[2]) << 16)
b.Last = bh&1 != 0
b.Type = blockType((bh >> 1) & 3)
// find size.
cSize := int(bh >> 3)
maxSize := maxBlockSize
maxSize := maxCompressedBlockSizeAlloc
switch b.Type {
case blockTypeReserved:
return ErrReservedBlockType
case blockTypeRLE:
if cSize > maxCompressedBlockSize || cSize > int(b.WindowSize) {
if debugDecoder {
printf("rle block too big: csize:%d block: %+v\n", uint64(cSize), b)
}
return ErrWindowSizeExceeded
}
b.RLESize = uint32(cSize)
if b.lowMem {
maxSize = cSize
}
cSize = 1
case blockTypeCompressed:
if debug {
if debugDecoder {
println("Data size on stream:", cSize)
}
b.RLESize = 0
maxSize = maxCompressedBlockSize
maxSize = maxCompressedBlockSizeAlloc
if windowSize < maxCompressedBlockSize && b.lowMem {
maxSize = int(windowSize)
maxSize = int(windowSize) + compressedBlockOverAlloc
}
if cSize > maxCompressedBlockSize || uint64(cSize) > b.WindowSize {
if debug {
if debugDecoder {
printf("compressed block too big: csize:%d block: %+v\n", uint64(cSize), b)
}
return ErrCompressedSizeTooBig
}
// Empty compressed blocks must at least be 2 bytes
// for Literals_Block_Type and one for Sequences_Section_Header.
if cSize < 2 {
return ErrBlockTooSmall
}
case blockTypeRaw:
if cSize > maxCompressedBlockSize || cSize > int(b.WindowSize) {
if debugDecoder {
printf("rle block too big: csize:%d block: %+v\n", uint64(cSize), b)
}
return ErrWindowSizeExceeded
}
b.RLESize = 0
// We do not need a destination for raw blocks.
maxSize = -1
@@ -170,19 +193,18 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
// Read block data.
if cap(b.dataStorage) < cSize {
if b.lowMem {
b.dataStorage = make([]byte, 0, cSize)
if b.lowMem || cSize > maxCompressedBlockSize {
b.dataStorage = make([]byte, 0, cSize+compressedBlockOverAlloc)
} else {
b.dataStorage = make([]byte, 0, maxBlockSize)
b.dataStorage = make([]byte, 0, maxCompressedBlockSizeAlloc)
}
}
if cap(b.dst) <= maxSize {
b.dst = make([]byte, 0, maxSize+1)
}
var err error
b.data, err = br.readBig(cSize, b.dataStorage)
if err != nil {
if debug {
if debugDecoder {
println("Reading block:", err, "(", cSize, ")", len(b.data))
printf("%T", br)
}
@@ -196,85 +218,14 @@ func (b *blockDec) sendErr(err error) {
b.Last = true
b.Type = blockTypeReserved
b.err = err
b.input <- struct{}{}
}
// Close will release resources.
// Closed blockDec cannot be reset.
func (b *blockDec) Close() {
close(b.input)
close(b.history)
close(b.result)
b.decWG.Wait()
}
// decodeAsync will prepare decoding the block when it receives input.
// This will separate output and history.
func (b *blockDec) startDecoder() {
defer b.decWG.Done()
for range b.input {
//println("blockDec: Got block input")
switch b.Type {
case blockTypeRLE:
if cap(b.dst) < int(b.RLESize) {
if b.lowMem {
b.dst = make([]byte, b.RLESize)
} else {
b.dst = make([]byte, maxBlockSize)
}
}
o := decodeOutput{
d: b,
b: b.dst[:b.RLESize],
err: nil,
}
v := b.data[0]
for i := range o.b {
o.b[i] = v
}
hist := <-b.history
hist.append(o.b)
b.result <- o
case blockTypeRaw:
o := decodeOutput{
d: b,
b: b.data,
err: nil,
}
hist := <-b.history
hist.append(o.b)
b.result <- o
case blockTypeCompressed:
b.dst = b.dst[:0]
err := b.decodeCompressed(nil)
o := decodeOutput{
d: b,
b: b.dst,
err: err,
}
if debug {
println("Decompressed to", len(b.dst), "bytes, error:", err)
}
b.result <- o
case blockTypeReserved:
// Used for returning errors.
<-b.history
b.result <- decodeOutput{
d: b,
b: nil,
err: b.err,
}
default:
panic("Invalid block type")
}
if debug {
println("blockDec: Finished block")
}
}
}
// decodeAsync will prepare decoding the block when it receives the history.
// If history is provided, it will not fetch it from the channel.
// decodeBuf
func (b *blockDec) decodeBuf(hist *history) error {
switch b.Type {
case blockTypeRLE:
@@ -297,14 +248,23 @@ func (b *blockDec) decodeBuf(hist *history) error {
return nil
case blockTypeCompressed:
saved := b.dst
b.dst = hist.b
hist.b = nil
// Append directly to history
if hist.ignoreBuffer == 0 {
b.dst = hist.b
hist.b = nil
} else {
b.dst = b.dst[:0]
}
err := b.decodeCompressed(hist)
if debug {
if debugDecoder {
println("Decompressed to total", len(b.dst), "bytes, hash:", xxhash.Sum64(b.dst), "error:", err)
}
hist.b = b.dst
b.dst = saved
if hist.ignoreBuffer == 0 {
hist.b = b.dst
b.dst = saved
} else {
hist.appendKeep(b.dst)
}
return err
case blockTypeReserved:
// Used for returning errors.
@@ -314,30 +274,18 @@ func (b *blockDec) decodeBuf(hist *history) error {
}
}
// decodeCompressed will start decompressing a block.
// If no history is supplied the decoder will decodeAsync as much as possible
// before fetching from blockDec.history
func (b *blockDec) decodeCompressed(hist *history) error {
in := b.data
delayedHistory := hist == nil
if delayedHistory {
// We must always grab history.
defer func() {
if hist == nil {
<-b.history
}
}()
}
func (b *blockDec) decodeLiterals(in []byte, hist *history) (remain []byte, err error) {
// There must be at least one byte for Literals_Block_Type and one for Sequences_Section_Header
if len(in) < 2 {
return ErrBlockTooSmall
return in, ErrBlockTooSmall
}
litType := literalsBlockType(in[0] & 3)
var litRegenSize int
var litCompSize int
sizeFormat := (in[0] >> 2) & 3
var fourStreams bool
var literals []byte
switch litType {
case literalsBlockRaw, literalsBlockRLE:
switch sizeFormat {
@@ -353,7 +301,7 @@ func (b *blockDec) decodeCompressed(hist *history) error {
// Regenerated_Size uses 20 bits (0-1048575). Literals_Section_Header uses 3 bytes.
if len(in) < 3 {
println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in))
return ErrBlockTooSmall
return in, ErrBlockTooSmall
}
litRegenSize = int(in[0]>>4) + (int(in[1]) << 4) + (int(in[2]) << 12)
in = in[3:]
@@ -364,7 +312,7 @@ func (b *blockDec) decodeCompressed(hist *history) error {
// Both Regenerated_Size and Compressed_Size use 10 bits (0-1023).
if len(in) < 3 {
println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in))
return ErrBlockTooSmall
return in, ErrBlockTooSmall
}
n := uint64(in[0]>>4) + (uint64(in[1]) << 4) + (uint64(in[2]) << 12)
litRegenSize = int(n & 1023)
@@ -375,7 +323,7 @@ func (b *blockDec) decodeCompressed(hist *history) error {
fourStreams = true
if len(in) < 4 {
println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in))
return ErrBlockTooSmall
return in, ErrBlockTooSmall
}
n := uint64(in[0]>>4) + (uint64(in[1]) << 4) + (uint64(in[2]) << 12) + (uint64(in[3]) << 20)
litRegenSize = int(n & 16383)
@@ -385,7 +333,7 @@ func (b *blockDec) decodeCompressed(hist *history) error {
fourStreams = true
if len(in) < 5 {
println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in))
return ErrBlockTooSmall
return in, ErrBlockTooSmall
}
n := uint64(in[0]>>4) + (uint64(in[1]) << 4) + (uint64(in[2]) << 12) + (uint64(in[3]) << 20) + (uint64(in[4]) << 28)
litRegenSize = int(n & 262143)
@@ -393,16 +341,18 @@ func (b *blockDec) decodeCompressed(hist *history) error {
in = in[5:]
}
}
if debug {
if debugDecoder {
println("literals type:", litType, "litRegenSize:", litRegenSize, "litCompSize:", litCompSize, "sizeFormat:", sizeFormat, "4X:", fourStreams)
}
var literals []byte
var huff *huff0.Scratch
if litRegenSize > int(b.WindowSize) || litRegenSize > maxCompressedBlockSize {
return in, ErrWindowSizeExceeded
}
switch litType {
case literalsBlockRaw:
if len(in) < litRegenSize {
println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", litRegenSize)
return ErrBlockTooSmall
return in, ErrBlockTooSmall
}
literals = in[:litRegenSize]
in = in[litRegenSize:]
@@ -410,19 +360,13 @@ func (b *blockDec) decodeCompressed(hist *history) error {
case literalsBlockRLE:
if len(in) < 1 {
println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", 1)
return ErrBlockTooSmall
return in, ErrBlockTooSmall
}
if cap(b.literalBuf) < litRegenSize {
if b.lowMem {
b.literalBuf = make([]byte, litRegenSize)
b.literalBuf = make([]byte, litRegenSize, litRegenSize+compressedBlockOverAlloc)
} else {
if litRegenSize > maxCompressedLiteralSize {
// Exceptional
b.literalBuf = make([]byte, litRegenSize)
} else {
b.literalBuf = make([]byte, litRegenSize, maxCompressedLiteralSize)
}
b.literalBuf = make([]byte, litRegenSize, maxCompressedBlockSize+compressedBlockOverAlloc)
}
}
literals = b.literalBuf[:litRegenSize]
@@ -431,45 +375,79 @@ func (b *blockDec) decodeCompressed(hist *history) error {
literals[i] = v
}
in = in[1:]
if debug {
if debugDecoder {
printf("Found %d RLE compressed literals\n", litRegenSize)
}
case literalsBlockTreeless:
if len(in) < litCompSize {
println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", litCompSize)
return ErrBlockTooSmall
return in, ErrBlockTooSmall
}
// Store compressed literals, so we defer decoding until we get history.
literals = in[:litCompSize]
in = in[litCompSize:]
if debug {
if debugDecoder {
printf("Found %d compressed literals\n", litCompSize)
}
case literalsBlockCompressed:
if len(in) < litCompSize {
println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", litCompSize)
return ErrBlockTooSmall
huff := hist.huffTree
if huff == nil {
return in, errors.New("literal block was treeless, but no history was defined")
}
literals = in[:litCompSize]
in = in[litCompSize:]
huff = huffDecoderPool.Get().(*huff0.Scratch)
var err error
// Ensure we have space to store it.
if cap(b.literalBuf) < litRegenSize {
if b.lowMem {
b.literalBuf = make([]byte, 0, litRegenSize)
b.literalBuf = make([]byte, 0, litRegenSize+compressedBlockOverAlloc)
} else {
b.literalBuf = make([]byte, 0, maxCompressedLiteralSize)
b.literalBuf = make([]byte, 0, maxCompressedBlockSize+compressedBlockOverAlloc)
}
}
if huff == nil {
huff = &huff0.Scratch{}
var err error
// Use our out buffer.
huff.MaxDecodedSize = litRegenSize
if fourStreams {
literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals)
} else {
literals, err = huff.Decoder().Decompress1X(b.literalBuf[:0:litRegenSize], literals)
}
// Make sure we don't leak our literals buffer
if err != nil {
println("decompressing literals:", err)
return in, err
}
if len(literals) != litRegenSize {
return in, fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals))
}
case literalsBlockCompressed:
if len(in) < litCompSize {
println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", litCompSize)
return in, ErrBlockTooSmall
}
literals = in[:litCompSize]
in = in[litCompSize:]
// Ensure we have space to store it.
if cap(b.literalBuf) < litRegenSize {
if b.lowMem {
b.literalBuf = make([]byte, 0, litRegenSize+compressedBlockOverAlloc)
} else {
b.literalBuf = make([]byte, 0, maxCompressedBlockSize+compressedBlockOverAlloc)
}
}
huff := hist.huffTree
if huff == nil || (hist.dict != nil && huff == hist.dict.litEnc) {
huff = huffDecoderPool.Get().(*huff0.Scratch)
if huff == nil {
huff = &huff0.Scratch{}
}
}
var err error
huff, literals, err = huff0.ReadTable(literals, huff)
if err != nil {
println("reading huffman table:", err)
return err
return in, err
}
hist.huffTree = huff
huff.MaxDecodedSize = litRegenSize
// Use our out buffer.
if fourStreams {
literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals)
@@ -478,27 +456,63 @@ func (b *blockDec) decodeCompressed(hist *history) error {
}
if err != nil {
println("decoding compressed literals:", err)
return err
return in, err
}
// Make sure we don't leak our literals buffer
if len(literals) != litRegenSize {
return fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals))
return in, fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals))
}
if debug {
// Re-cap to get extra size.
literals = b.literalBuf[:len(literals)]
if debugDecoder {
printf("Decompressed %d literals into %d bytes\n", litCompSize, litRegenSize)
}
}
hist.decoders.literals = literals
return in, nil
}
// decodeCompressed will start decompressing a block.
func (b *blockDec) decodeCompressed(hist *history) error {
in := b.data
in, err := b.decodeLiterals(in, hist)
if err != nil {
return err
}
err = b.prepareSequences(in, hist)
if err != nil {
return err
}
if hist.decoders.nSeqs == 0 {
b.dst = append(b.dst, hist.decoders.literals...)
return nil
}
before := len(hist.decoders.out)
err = hist.decoders.decodeSync(hist.b[hist.ignoreBuffer:])
if err != nil {
return err
}
if hist.decoders.maxSyncLen > 0 {
hist.decoders.maxSyncLen += uint64(before)
hist.decoders.maxSyncLen -= uint64(len(hist.decoders.out))
}
b.dst = hist.decoders.out
hist.recentOffsets = hist.decoders.prevOffset
return nil
}
func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) {
if debugDecoder {
printf("prepareSequences: %d byte(s) input\n", len(in))
}
// Decode Sequences
// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#sequences-section
if len(in) < 1 {
return ErrBlockTooSmall
}
var nSeqs int
seqHeader := in[0]
nSeqs := 0
switch {
case seqHeader == 0:
in = in[1:]
case seqHeader < 128:
nSeqs = int(seqHeader)
in = in[1:]
@@ -515,19 +529,16 @@ func (b *blockDec) decodeCompressed(hist *history) error {
nSeqs = 0x7f00 + int(in[1]) + (int(in[2]) << 8)
in = in[3:]
}
// Allocate sequences
if cap(b.sequenceBuf) < nSeqs {
if b.lowMem {
b.sequenceBuf = make([]seq, nSeqs)
} else {
// Allocate max
b.sequenceBuf = make([]seq, nSeqs, maxSequences)
if nSeqs == 0 && len(in) != 0 {
// When no sequences, there should not be any more data...
if debugDecoder {
printf("prepareSequences: 0 sequences, but %d byte(s) left on stream\n", len(in))
}
} else {
// Reuse buffer
b.sequenceBuf = b.sequenceBuf[:nSeqs]
return ErrUnexpectedBlockSize
}
var seqs = &sequenceDecs{}
var seqs = &hist.decoders
seqs.nSeqs = nSeqs
if nSeqs > 0 {
if len(in) < 1 {
return ErrBlockTooSmall
@@ -535,12 +546,12 @@ func (b *blockDec) decodeCompressed(hist *history) error {
br := byteReader{b: in, off: 0}
compMode := br.Uint8()
br.advance(1)
if debug {
if debugDecoder {
printf("Compression modes: 0b%b", compMode)
}
for i := uint(0); i < 3; i++ {
mode := seqCompMode((compMode >> (6 - i*2)) & 3)
if debug {
if debugDecoder {
println("Table", tableIndex(i), "is", mode)
}
var seq *sequenceDec
@@ -556,6 +567,9 @@ func (b *blockDec) decodeCompressed(hist *history) error {
}
switch mode {
case compModePredefined:
if seq.fse != nil && !seq.fse.preDefined {
fseDecoderPool.Put(seq.fse)
}
seq.fse = &fsePredef[i]
case compModeRLE:
if br.remain() < 1 {
@@ -563,34 +577,36 @@ func (b *blockDec) decodeCompressed(hist *history) error {
}
v := br.Uint8()
br.advance(1)
dec := fseDecoderPool.Get().(*fseDecoder)
if seq.fse == nil || seq.fse.preDefined {
seq.fse = fseDecoderPool.Get().(*fseDecoder)
}
symb, err := decSymbolValue(v, symbolTableX[i])
if err != nil {
printf("RLE Transform table (%v) error: %v", tableIndex(i), err)
return err
}
dec.setRLE(symb)
seq.fse = dec
if debug {
seq.fse.setRLE(symb)
if debugDecoder {
printf("RLE set to %+v, code: %v", symb, v)
}
case compModeFSE:
println("Reading table for", tableIndex(i))
dec := fseDecoderPool.Get().(*fseDecoder)
err := dec.readNCount(&br, uint16(maxTableSymbol[i]))
if seq.fse == nil || seq.fse.preDefined {
seq.fse = fseDecoderPool.Get().(*fseDecoder)
}
err := seq.fse.readNCount(&br, uint16(maxTableSymbol[i]))
if err != nil {
println("Read table error:", err)
return err
}
err = dec.transform(symbolTableX[i])
err = seq.fse.transform(symbolTableX[i])
if err != nil {
println("Transform table error:", err)
return err
}
if debug {
println("Read table ok", "symbolLen:", dec.symbolLen)
if debugDecoder {
println("Read table ok", "symbolLen:", seq.fse.symbolLen)
}
seq.fse = dec
case compModeRepeat:
seq.repeat = true
}
@@ -600,140 +616,106 @@ func (b *blockDec) decodeCompressed(hist *history) error {
}
in = br.unread()
}
// Wait for history.
// All time spent after this is critical since it is strictly sequential.
if hist == nil {
hist = <-b.history
if hist.error {
return ErrDecoderClosed
}
}
// Decode treeless literal block.
if litType == literalsBlockTreeless {
// TODO: We could send the history early WITHOUT the stream history.
// This would allow decoding treeless literals before the byte history is available.
// Silencia stats: Treeless 4393, with: 32775, total: 37168, 11% treeless.
// So not much obvious gain here.
if hist.huffTree == nil {
return errors.New("literal block was treeless, but no history was defined")
}
// Ensure we have space to store it.
if cap(b.literalBuf) < litRegenSize {
if b.lowMem {
b.literalBuf = make([]byte, 0, litRegenSize)
} else {
b.literalBuf = make([]byte, 0, maxCompressedLiteralSize)
}
}
var err error
// Use our out buffer.
huff = hist.huffTree
if fourStreams {
literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals)
} else {
literals, err = huff.Decoder().Decompress1X(b.literalBuf[:0:litRegenSize], literals)
}
// Make sure we don't leak our literals buffer
if err != nil {
println("decompressing literals:", err)
return err
}
if len(literals) != litRegenSize {
return fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals))
}
} else {
if hist.huffTree != nil && huff != nil {
if hist.dict == nil || hist.dict.litEnc != hist.huffTree {
huffDecoderPool.Put(hist.huffTree)
}
hist.huffTree = nil
}
}
if huff != nil {
hist.huffTree = huff
}
if debug {
println("Final literals:", len(literals), "hash:", xxhash.Sum64(literals), "and", nSeqs, "sequences.")
if debugDecoder {
println("Literals:", len(seqs.literals), "hash:", xxhash.Sum64(seqs.literals), "and", seqs.nSeqs, "sequences.")
}
if nSeqs == 0 {
// Decompressed content is defined entirely as Literals Section content.
b.dst = append(b.dst, literals...)
if delayedHistory {
hist.append(literals)
if len(b.sequence) > 0 {
b.sequence = b.sequence[:0]
}
return nil
}
seqs, err := seqs.mergeHistory(&hist.decoders)
if err != nil {
return err
br := seqs.br
if br == nil {
br = &bitReader{}
}
if debug {
println("History merged ok")
}
br := &bitReader{}
if err := br.init(in); err != nil {
return err
}
// TODO: Investigate if sending history without decoders are faster.
// This would allow the sequences to be decoded async and only have to construct stream history.
// If only recent offsets were not transferred, this would be an obvious win.
// Also, if first 3 sequences don't reference recent offsets, all sequences can be decoded.
if err := seqs.initialize(br, hist, b.dst); err != nil {
println("initializing sequences:", err)
return err
}
// Extract blocks...
if false && hist.dict == nil {
fatalErr := func(err error) {
if err != nil {
panic(err)
}
}
fn := fmt.Sprintf("n-%d-lits-%d-prev-%d-%d-%d-win-%d.blk", hist.decoders.nSeqs, len(hist.decoders.literals), hist.recentOffsets[0], hist.recentOffsets[1], hist.recentOffsets[2], hist.windowSize)
var buf bytes.Buffer
fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.litLengths.fse))
fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.matchLengths.fse))
fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.offsets.fse))
buf.Write(in)
ioutil.WriteFile(filepath.Join("testdata", "seqs", fn), buf.Bytes(), os.ModePerm)
}
return nil
}
func (b *blockDec) decodeSequences(hist *history) error {
if cap(b.sequence) < hist.decoders.nSeqs {
if b.lowMem {
b.sequence = make([]seqVals, 0, hist.decoders.nSeqs)
} else {
b.sequence = make([]seqVals, 0, 0x7F00+0xffff)
}
}
b.sequence = b.sequence[:hist.decoders.nSeqs]
if hist.decoders.nSeqs == 0 {
hist.decoders.seqSize = len(hist.decoders.literals)
return nil
}
hist.decoders.windowSize = hist.windowSize
hist.decoders.prevOffset = hist.recentOffsets
err := hist.decoders.decode(b.sequence)
hist.recentOffsets = hist.decoders.prevOffset
return err
}
func (b *blockDec) executeSequences(hist *history) error {
hbytes := hist.b
if len(hbytes) > hist.windowSize {
hbytes = hbytes[len(hbytes)-hist.windowSize:]
// We do not need history any more.
// We do not need history anymore.
if hist.dict != nil {
hist.dict.content = nil
}
}
if err := seqs.initialize(br, hist, literals, b.dst); err != nil {
println("initializing sequences:", err)
return err
}
err = seqs.decode(nSeqs, br, hbytes)
hist.decoders.windowSize = hist.windowSize
hist.decoders.out = b.dst[:0]
err := hist.decoders.execute(b.sequence, hbytes)
if err != nil {
return err
}
if !br.finished() {
return fmt.Errorf("%d extra bits on block, should be 0", br.remain())
}
return b.updateHistory(hist)
}
err = br.close()
if err != nil {
printf("Closing sequences: %v, %+v\n", err, *br)
}
func (b *blockDec) updateHistory(hist *history) error {
if len(b.data) > maxCompressedBlockSize {
return fmt.Errorf("compressed block size too large (%d)", len(b.data))
}
// Set output and release references.
b.dst = seqs.out
seqs.out, seqs.literals, seqs.hist = nil, nil, nil
b.dst = hist.decoders.out
hist.recentOffsets = hist.decoders.prevOffset
if !delayedHistory {
// If we don't have delayed history, no need to update.
hist.recentOffsets = seqs.prevOffset
return nil
}
if b.Last {
// if last block we don't care about history.
println("Last block, no history returned")
hist.b = hist.b[:0]
return nil
} else {
hist.append(b.dst)
if debugDecoder {
println("Finished block with ", len(b.sequence), "sequences. Added", len(b.dst), "to history, now length", len(hist.b))
}
}
hist.append(b.dst)
hist.recentOffsets = seqs.prevOffset
if debug {
println("Finished block with literals:", len(literals), "and", nSeqs, "sequences.")
}
hist.decoders.out, hist.decoders.literals = nil, nil
return nil
}

View File

@@ -51,7 +51,7 @@ func (b *blockEnc) init() {
if cap(b.literals) < maxCompressedBlockSize {
b.literals = make([]byte, 0, maxCompressedBlockSize)
}
const defSeqs = 200
const defSeqs = 2000
if cap(b.sequences) < defSeqs {
b.sequences = make([]seq, 0, defSeqs)
}
@@ -156,7 +156,7 @@ func (h *literalsHeader) setSize(regenLen int) {
switch {
case inBits < 5:
lh |= (uint64(regenLen) << 3) | (1 << 60)
if debug {
if debugEncoder {
got := int(lh>>3) & 0xff
if got != regenLen {
panic(fmt.Sprint("litRegenSize = ", regenLen, "(want) != ", got, "(got)"))
@@ -184,7 +184,7 @@ func (h *literalsHeader) setSizes(compLen, inLen int, single bool) {
lh |= 1 << 2
}
lh |= (uint64(inLen) << 4) | (uint64(compLen) << (10 + 4)) | (3 << 60)
if debug {
if debugEncoder {
const mmask = (1 << 24) - 1
n := (lh >> 4) & mmask
if int(n&1023) != inLen {
@@ -312,7 +312,7 @@ func (b *blockEnc) encodeRaw(a []byte) {
bh.setType(blockTypeRaw)
b.output = bh.appendTo(b.output[:0])
b.output = append(b.output, a...)
if debug {
if debugEncoder {
println("Adding RAW block, length", len(a), "last:", b.last)
}
}
@@ -325,7 +325,7 @@ func (b *blockEnc) encodeRawTo(dst, src []byte) []byte {
bh.setType(blockTypeRaw)
dst = bh.appendTo(dst)
dst = append(dst, src...)
if debug {
if debugEncoder {
println("Adding RAW block, length", len(src), "last:", b.last)
}
return dst
@@ -339,7 +339,7 @@ func (b *blockEnc) encodeLits(lits []byte, raw bool) error {
// Don't compress extremely small blocks
if len(lits) < 8 || (len(lits) < 32 && b.dictLitEnc == nil) || raw {
if debug {
if debugEncoder {
println("Adding RAW block, length", len(lits), "last:", b.last)
}
bh.setType(blockTypeRaw)
@@ -371,7 +371,7 @@ func (b *blockEnc) encodeLits(lits []byte, raw bool) error {
switch err {
case huff0.ErrIncompressible:
if debug {
if debugEncoder {
println("Adding RAW block, length", len(lits), "last:", b.last)
}
bh.setType(blockTypeRaw)
@@ -379,16 +379,16 @@ func (b *blockEnc) encodeLits(lits []byte, raw bool) error {
b.output = append(b.output, lits...)
return nil
case huff0.ErrUseRLE:
if debug {
if debugEncoder {
println("Adding RLE block, length", len(lits))
}
bh.setType(blockTypeRLE)
b.output = bh.appendTo(b.output)
b.output = append(b.output, lits[0])
return nil
case nil:
default:
return err
case nil:
}
// Compressed...
// Now, allow reuse
@@ -396,12 +396,12 @@ func (b *blockEnc) encodeLits(lits []byte, raw bool) error {
bh.setType(blockTypeCompressed)
var lh literalsHeader
if reUsed {
if debug {
if debugEncoder {
println("Reused tree, compressed to", len(out))
}
lh.setType(literalsBlockTreeless)
} else {
if debug {
if debugEncoder {
println("New tree, compressed to", len(out), "tree size:", len(b.litEnc.OutTable))
}
lh.setType(literalsBlockCompressed)
@@ -426,7 +426,7 @@ func fuzzFseEncoder(data []byte) int {
return 0
}
enc := fseEncoder{}
hist := enc.Histogram()[:256]
hist := enc.Histogram()
maxSym := uint8(0)
for i, v := range data {
v = v & 63
@@ -517,7 +517,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
lh.setSize(len(b.literals))
b.output = lh.appendTo(b.output)
b.output = append(b.output, b.literals...)
if debug {
if debugEncoder {
println("Adding literals RAW, length", len(b.literals))
}
case huff0.ErrUseRLE:
@@ -525,27 +525,22 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
lh.setSize(len(b.literals))
b.output = lh.appendTo(b.output)
b.output = append(b.output, b.literals[0])
if debug {
if debugEncoder {
println("Adding literals RLE")
}
default:
if debug {
println("Adding literals ERROR:", err)
}
return err
case nil:
// Compressed litLen...
if reUsed {
if debug {
if debugEncoder {
println("reused tree")
}
lh.setType(literalsBlockTreeless)
} else {
if debug {
if debugEncoder {
println("new tree, size:", len(b.litEnc.OutTable))
}
lh.setType(literalsBlockCompressed)
if debug {
if debugEncoder {
_, _, err := huff0.ReadTable(out, nil)
if err != nil {
panic(err)
@@ -553,16 +548,21 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
}
}
lh.setSizes(len(out), len(b.literals), single)
if debug {
if debugEncoder {
printf("Compressed %d literals to %d bytes", len(b.literals), len(out))
println("Adding literal header:", lh)
}
b.output = lh.appendTo(b.output)
b.output = append(b.output, out...)
b.litEnc.Reuse = huff0.ReusePolicyAllow
if debug {
if debugEncoder {
println("Adding literals compressed")
}
default:
if debugEncoder {
println("Adding literals ERROR:", err)
}
return err
}
// Sequence compression
@@ -577,7 +577,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
n := len(b.sequences) - 0x7f00
b.output = append(b.output, 255, uint8(n), uint8(n>>8))
}
if debug {
if debugEncoder {
println("Encoding", len(b.sequences), "sequences")
}
b.genCodes()
@@ -611,17 +611,17 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
nSize = nSize + (nSize+2*8*16)>>4
switch {
case predefSize <= prevSize && predefSize <= nSize || forcePreDef:
if debug {
if debugEncoder {
println("Using predefined", predefSize>>3, "<=", nSize>>3)
}
return preDef, compModePredefined
case prevSize <= nSize:
if debug {
if debugEncoder {
println("Using previous", prevSize>>3, "<=", nSize>>3)
}
return prev, compModeRepeat
default:
if debug {
if debugEncoder {
println("Using new, predef", predefSize>>3, ". previous:", prevSize>>3, ">", nSize>>3, "header max:", cur.maxHeaderSize()>>3, "bytes")
println("tl:", cur.actualTableLog, "symbolLen:", cur.symbolLen, "norm:", cur.norm[:cur.symbolLen], "hist", cur.count[:cur.symbolLen])
}
@@ -634,7 +634,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
if llEnc.useRLE {
mode |= uint8(compModeRLE) << 6
llEnc.setRLE(b.sequences[0].llCode)
if debug {
if debugEncoder {
println("llEnc.useRLE")
}
} else {
@@ -645,7 +645,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
if ofEnc.useRLE {
mode |= uint8(compModeRLE) << 4
ofEnc.setRLE(b.sequences[0].ofCode)
if debug {
if debugEncoder {
println("ofEnc.useRLE")
}
} else {
@@ -657,7 +657,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
if mlEnc.useRLE {
mode |= uint8(compModeRLE) << 2
mlEnc.setRLE(b.sequences[0].mlCode)
if debug {
if debugEncoder {
println("mlEnc.useRLE, code: ", b.sequences[0].mlCode, "value", b.sequences[0].matchLen)
}
} else {
@@ -666,7 +666,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
mode |= uint8(m) << 2
}
b.output = append(b.output, mode)
if debug {
if debugEncoder {
printf("Compression modes: 0b%b", mode)
}
b.output, err = llEnc.writeCount(b.output)
@@ -722,52 +722,53 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
println("Encoded seq", seq, s, "codes:", s.llCode, s.mlCode, s.ofCode, "states:", ll.state, ml.state, of.state, "bits:", llB, mlB, ofB)
}
seq--
if llEnc.maxBits+mlEnc.maxBits+ofEnc.maxBits <= 32 {
// No need to flush (common)
for seq >= 0 {
s = b.sequences[seq]
wr.flush32()
llB, ofB, mlB := llTT[s.llCode], ofTT[s.ofCode], mlTT[s.mlCode]
// tabelog max is 8 for all.
of.encode(ofB)
ml.encode(mlB)
ll.encode(llB)
wr.flush32()
// Store sequences in reverse...
for seq >= 0 {
s = b.sequences[seq]
// We checked that all can stay within 32 bits
wr.addBits32NC(s.litLen, llB.outBits)
wr.addBits32NC(s.matchLen, mlB.outBits)
wr.addBits32NC(s.offset, ofB.outBits)
ofB := ofTT[s.ofCode]
wr.flush32() // tablelog max is below 8 for each, so it will fill max 24 bits.
//of.encode(ofB)
nbBitsOut := (uint32(of.state) + ofB.deltaNbBits) >> 16
dstState := int32(of.state>>(nbBitsOut&15)) + int32(ofB.deltaFindState)
wr.addBits16NC(of.state, uint8(nbBitsOut))
of.state = of.stateTable[dstState]
if debugSequences {
println("Encoded seq", seq, s)
}
// Accumulate extra bits.
outBits := ofB.outBits & 31
extraBits := uint64(s.offset & bitMask32[outBits])
extraBitsN := outBits
seq--
mlB := mlTT[s.mlCode]
//ml.encode(mlB)
nbBitsOut = (uint32(ml.state) + mlB.deltaNbBits) >> 16
dstState = int32(ml.state>>(nbBitsOut&15)) + int32(mlB.deltaFindState)
wr.addBits16NC(ml.state, uint8(nbBitsOut))
ml.state = ml.stateTable[dstState]
outBits = mlB.outBits & 31
extraBits = extraBits<<outBits | uint64(s.matchLen&bitMask32[outBits])
extraBitsN += outBits
llB := llTT[s.llCode]
//ll.encode(llB)
nbBitsOut = (uint32(ll.state) + llB.deltaNbBits) >> 16
dstState = int32(ll.state>>(nbBitsOut&15)) + int32(llB.deltaFindState)
wr.addBits16NC(ll.state, uint8(nbBitsOut))
ll.state = ll.stateTable[dstState]
outBits = llB.outBits & 31
extraBits = extraBits<<outBits | uint64(s.litLen&bitMask32[outBits])
extraBitsN += outBits
wr.flush32()
wr.addBits64NC(extraBits, extraBitsN)
if debugSequences {
println("Encoded seq", seq, s)
}
} else {
for seq >= 0 {
s = b.sequences[seq]
wr.flush32()
llB, ofB, mlB := llTT[s.llCode], ofTT[s.ofCode], mlTT[s.mlCode]
// tabelog max is below 8 for each.
of.encode(ofB)
ml.encode(mlB)
ll.encode(llB)
wr.flush32()
// ml+ll = max 32 bits total
wr.addBits32NC(s.litLen, llB.outBits)
wr.addBits32NC(s.matchLen, mlB.outBits)
wr.flush32()
wr.addBits32NC(s.offset, ofB.outBits)
if debugSequences {
println("Encoded seq", seq, s)
}
seq--
}
seq--
}
ml.flush(mlEnc.actualTableLog)
of.flush(ofEnc.actualTableLog)
@@ -786,7 +787,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
// Size is output minus block header.
bh.setSize(uint32(len(b.output)-bhOffset) - 3)
if debug {
if debugEncoder {
println("Rewriting block header", bh)
}
_ = bh.appendTo(b.output[bhOffset:bhOffset])
@@ -801,14 +802,13 @@ func (b *blockEnc) genCodes() {
// nothing to do
return
}
if len(b.sequences) > math.MaxUint16 {
panic("can only encode up to 64K sequences")
}
// No bounds checks after here:
llH := b.coders.llEnc.Histogram()[:256]
ofH := b.coders.ofEnc.Histogram()[:256]
mlH := b.coders.mlEnc.Histogram()[:256]
llH := b.coders.llEnc.Histogram()
ofH := b.coders.ofEnc.Histogram()
mlH := b.coders.mlEnc.Histogram()
for i := range llH {
llH[i] = 0
}
@@ -820,7 +820,8 @@ func (b *blockEnc) genCodes() {
}
var llMax, ofMax, mlMax uint8
for i, seq := range b.sequences {
for i := range b.sequences {
seq := &b.sequences[i]
v := llCode(seq.litLen)
seq.llCode = v
llH[v]++
@@ -844,7 +845,6 @@ func (b *blockEnc) genCodes() {
panic(fmt.Errorf("mlMax > maxMatchLengthSymbol (%d), matchlen: %d", mlMax, seq.matchLen))
}
}
b.sequences[i] = seq
}
maxCount := func(a []uint32) int {
var max uint32

View File

@@ -12,8 +12,8 @@ import (
type byteBuffer interface {
// Read up to 8 bytes.
// Returns nil if no more input is available.
readSmall(n int) []byte
// Returns io.ErrUnexpectedEOF if this cannot be satisfied.
readSmall(n int) ([]byte, error)
// Read >8 bytes.
// MAY use the destination slice.
@@ -23,23 +23,23 @@ type byteBuffer interface {
readByte() (byte, error)
// Skip n bytes.
skipN(n int) error
skipN(n int64) error
}
// in-memory buffer
type byteBuf []byte
func (b *byteBuf) readSmall(n int) []byte {
func (b *byteBuf) readSmall(n int) ([]byte, error) {
if debugAsserts && n > 8 {
panic(fmt.Errorf("small read > 8 (%d). use readBig", n))
}
bb := *b
if len(bb) < n {
return nil
return nil, io.ErrUnexpectedEOF
}
r := bb[:n]
*b = bb[n:]
return r
return r, nil
}
func (b *byteBuf) readBig(n int, dst []byte) ([]byte, error) {
@@ -52,10 +52,6 @@ func (b *byteBuf) readBig(n int, dst []byte) ([]byte, error) {
return r, nil
}
func (b *byteBuf) remain() []byte {
return *b
}
func (b *byteBuf) readByte() (byte, error) {
bb := *b
if len(bb) < 1 {
@@ -66,9 +62,12 @@ func (b *byteBuf) readByte() (byte, error) {
return r, nil
}
func (b *byteBuf) skipN(n int) error {
func (b *byteBuf) skipN(n int64) error {
bb := *b
if len(bb) < n {
if n < 0 {
return fmt.Errorf("negative skip (%d) requested", n)
}
if int64(len(bb)) < n {
return io.ErrUnexpectedEOF
}
*b = bb[n:]
@@ -81,19 +80,22 @@ type readerWrapper struct {
tmp [8]byte
}
func (r *readerWrapper) readSmall(n int) []byte {
func (r *readerWrapper) readSmall(n int) ([]byte, error) {
if debugAsserts && n > 8 {
panic(fmt.Errorf("small read > 8 (%d). use readBig", n))
}
n2, err := io.ReadFull(r.r, r.tmp[:n])
// We only really care about the actual bytes read.
if n2 != n {
if debug {
if err != nil {
if err == io.EOF {
return nil, io.ErrUnexpectedEOF
}
if debugDecoder {
println("readSmall: got", n2, "want", n, "err", err)
}
return nil
return nil, err
}
return r.tmp[:n]
return r.tmp[:n], nil
}
func (r *readerWrapper) readBig(n int, dst []byte) ([]byte, error) {
@@ -110,6 +112,9 @@ func (r *readerWrapper) readBig(n int, dst []byte) ([]byte, error) {
func (r *readerWrapper) readByte() (byte, error) {
n2, err := r.r.Read(r.tmp[:1])
if err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
return 0, err
}
if n2 != 1 {
@@ -118,9 +123,9 @@ func (r *readerWrapper) readByte() (byte, error) {
return r.tmp[0], nil
}
func (r *readerWrapper) skipN(n int) error {
n2, err := io.CopyN(ioutil.Discard, r.r, int64(n))
if n2 != int64(n) {
func (r *readerWrapper) skipN(n int64) error {
n2, err := io.CopyN(ioutil.Discard, r.r, n)
if n2 != n {
err = io.ErrUnexpectedEOF
}
return err

View File

@@ -13,12 +13,6 @@ type byteReader struct {
off int
}
// init will initialize the reader and set the input.
func (b *byteReader) init(in []byte) {
b.b = in
b.off = 0
}
// advance the stream b n bytes.
func (b *byteReader) advance(n uint) {
b.off += int(n)

View File

@@ -5,6 +5,7 @@ package zstd
import (
"bytes"
"encoding/binary"
"errors"
"io"
)
@@ -15,18 +16,50 @@ const HeaderMaxSize = 14 + 3
// Header contains information about the first frame and block within that.
type Header struct {
// Window Size the window of data to keep while decoding.
// Will only be set if HasFCS is false.
WindowSize uint64
// SingleSegment specifies whether the data is to be decompressed into a
// single contiguous memory segment.
// It implies that WindowSize is invalid and that FrameContentSize is valid.
SingleSegment bool
// Frame content size.
// Expected size of the entire frame.
FrameContentSize uint64
// WindowSize is the window of data to keep while decoding.
// Will only be set if SingleSegment is false.
WindowSize uint64
// Dictionary ID.
// If 0, no dictionary.
DictionaryID uint32
// HasFCS specifies whether FrameContentSize has a valid value.
HasFCS bool
// FrameContentSize is the expected uncompressed size of the entire frame.
FrameContentSize uint64
// Skippable will be true if the frame is meant to be skipped.
// This implies that FirstBlock.OK is false.
Skippable bool
// SkippableID is the user-specific ID for the skippable frame.
// Valid values are between 0 to 15, inclusive.
SkippableID int
// SkippableSize is the length of the user data to skip following
// the header.
SkippableSize uint32
// HeaderSize is the raw size of the frame header.
//
// For normal frames, it includes the size of the magic number and
// the size of the header (per section 3.1.1.1).
// It does not include the size for any data blocks (section 3.1.1.2) nor
// the size for the trailing content checksum.
//
// For skippable frames, this counts the size of the magic number
// along with the size of the size field of the payload.
// It does not include the size of the skippable payload itself.
// The total frame size is the HeaderSize plus the SkippableSize.
HeaderSize int
// First block information.
FirstBlock struct {
// OK will be set if first block could be decoded.
@@ -51,17 +84,9 @@ type Header struct {
CompressedSize int
}
// Skippable will be true if the frame is meant to be skipped.
// No other information will be populated.
Skippable bool
// If set there is a checksum present for the block content.
// The checksum field at the end is always 4 bytes long.
HasCheckSum bool
// If this is true FrameContentSize will have a valid value
HasFCS bool
SingleSegment bool
}
// Decode the header from the beginning of the stream.
@@ -71,39 +96,46 @@ type Header struct {
// If there isn't enough input, io.ErrUnexpectedEOF is returned.
// The FirstBlock.OK will indicate if enough information was available to decode the first block header.
func (h *Header) Decode(in []byte) error {
*h = Header{}
if len(in) < 4 {
return io.ErrUnexpectedEOF
}
h.HeaderSize += 4
b, in := in[:4], in[4:]
if !bytes.Equal(b, frameMagic) {
if !bytes.Equal(b[1:4], skippableFrameMagic) || b[0]&0xf0 != 0x50 {
return ErrMagicMismatch
}
*h = Header{Skippable: true}
if len(in) < 4 {
return io.ErrUnexpectedEOF
}
h.HeaderSize += 4
h.Skippable = true
h.SkippableID = int(b[0] & 0xf)
h.SkippableSize = binary.LittleEndian.Uint32(in)
return nil
}
if len(in) < 1 {
return io.ErrUnexpectedEOF
}
// Clear output
*h = Header{}
fhd, in := in[0], in[1:]
h.SingleSegment = fhd&(1<<5) != 0
h.HasCheckSum = fhd&(1<<2) != 0
if fhd&(1<<3) != 0 {
return errors.New("Reserved bit set on frame header")
}
// Read Window_Descriptor
// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor
if len(in) < 1 {
return io.ErrUnexpectedEOF
}
fhd, in := in[0], in[1:]
h.HeaderSize++
h.SingleSegment = fhd&(1<<5) != 0
h.HasCheckSum = fhd&(1<<2) != 0
if fhd&(1<<3) != 0 {
return errors.New("reserved bit set on frame header")
}
if !h.SingleSegment {
if len(in) < 1 {
return io.ErrUnexpectedEOF
}
var wd byte
wd, in = in[0], in[1:]
h.HeaderSize++
windowLog := 10 + (wd >> 3)
windowBase := uint64(1) << windowLog
windowAdd := (windowBase / 8) * uint64(wd&0x7)
@@ -120,9 +152,7 @@ func (h *Header) Decode(in []byte) error {
return io.ErrUnexpectedEOF
}
b, in = in[:size], in[size:]
if b == nil {
return io.ErrUnexpectedEOF
}
h.HeaderSize += int(size)
switch size {
case 1:
h.DictionaryID = uint32(b[0])
@@ -152,9 +182,7 @@ func (h *Header) Decode(in []byte) error {
return io.ErrUnexpectedEOF
}
b, in = in[:fcsSize], in[fcsSize:]
if b == nil {
return io.ErrUnexpectedEOF
}
h.HeaderSize += int(fcsSize)
switch fcsSize {
case 1:
h.FrameContentSize = uint64(b[0])
@@ -174,7 +202,7 @@ func (h *Header) Decode(in []byte) error {
if len(in) < 3 {
return nil
}
tmp, in := in[:3], in[3:]
tmp := in[:3]
bh := uint32(tmp[0]) | (uint32(tmp[1]) << 8) | (uint32(tmp[2]) << 16)
h.FirstBlock.Last = bh&1 != 0
blockType := blockType((bh >> 1) & 3)

View File

@@ -5,9 +5,13 @@
package zstd
import (
"errors"
"bytes"
"context"
"encoding/binary"
"io"
"sync"
"github.com/klauspost/compress/zstd/internal/xxhash"
)
// Decoder provides decoding of zstandard streams.
@@ -22,12 +26,19 @@ type Decoder struct {
// Unreferenced decoders, ready for use.
decoders chan *blockDec
// Streams ready to be decoded.
stream chan decodeStream
// Current read position used for Reader functionality.
current decoderState
// sync stream decoding
syncStream struct {
decodedFrame uint64
br readerWrapper
enabled bool
inFrame bool
}
frame *frameDec
// Custom dictionaries.
// Always uses copies.
dicts map[uint32]dict
@@ -46,7 +57,10 @@ type decoderState struct {
output chan decodeOutput
// cancel remaining output.
cancel chan struct{}
cancel context.CancelFunc
// crc of current frame
crc *xxhash.Digest
flushed bool
}
@@ -81,7 +95,7 @@ func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) {
return nil, err
}
}
d.current.output = make(chan decodeOutput, d.o.concurrent)
d.current.crc = xxhash.New()
d.current.flushed = true
if r == nil {
@@ -113,9 +127,6 @@ func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) {
// Returns the number of bytes written and any error that occurred.
// When the stream is done, io.EOF will be returned.
func (d *Decoder) Read(p []byte) (int, error) {
if d.stream == nil {
return 0, ErrDecoderNilInput
}
var n int
for {
if len(d.current.b) > 0 {
@@ -133,12 +144,12 @@ func (d *Decoder) Read(p []byte) (int, error) {
break
}
if !d.nextBlock(n == 0) {
return n, nil
return n, d.current.err
}
}
}
if len(d.current.b) > 0 {
if debug {
if debugDecoder {
println("returning", n, "still bytes left:", len(d.current.b))
}
// Only return error at end of block
@@ -147,7 +158,7 @@ func (d *Decoder) Read(p []byte) (int, error) {
if d.current.err != nil {
d.drainOutput()
}
if debug {
if debugDecoder {
println("returning", n, d.current.err, len(d.decoders))
}
return n, d.current.err
@@ -165,23 +176,20 @@ func (d *Decoder) Reset(r io.Reader) error {
d.drainOutput()
d.syncStream.br.r = nil
if r == nil {
d.current.err = ErrDecoderNilInput
if len(d.current.b) > 0 {
d.current.b = d.current.b[:0]
}
d.current.flushed = true
return nil
}
if d.stream == nil {
d.stream = make(chan decodeStream, 1)
d.streamWg.Add(1)
go d.startStreamDecoder(d.stream)
}
// If bytes buffer and < 1MB, do sync decoding anyway.
if bb, ok := r.(byter); ok && bb.Len() < 1<<20 {
var bb2 byter
bb2 = bb
if debug {
// If bytes buffer and < 5MB, do sync decoding anyway.
if bb, ok := r.(byter); ok && bb.Len() < 5<<20 {
bb2 := bb
if debugDecoder {
println("*bytes.Buffer detected, doing sync decode, len:", bb.Len())
}
b := bb2.Bytes()
@@ -197,36 +205,48 @@ func (d *Decoder) Reset(r io.Reader) error {
d.current.b = dst
d.current.err = err
d.current.flushed = true
if debug {
if debugDecoder {
println("sync decode to", len(dst), "bytes, err:", err)
}
return nil
}
// Remove current block.
d.stashDecoder()
d.current.decodeOutput = decodeOutput{}
d.current.err = nil
d.current.cancel = make(chan struct{})
d.current.flushed = false
d.current.d = nil
d.stream <- decodeStream{
r: r,
output: d.current.output,
cancel: d.current.cancel,
// Ensure no-one else is still running...
d.streamWg.Wait()
if d.frame == nil {
d.frame = newFrameDec(d.o)
}
if d.o.concurrent == 1 {
return d.startSyncDecoder(r)
}
d.current.output = make(chan decodeOutput, d.o.concurrent)
ctx, cancel := context.WithCancel(context.Background())
d.current.cancel = cancel
d.streamWg.Add(1)
go d.startStreamDecoder(ctx, r, d.current.output)
return nil
}
// drainOutput will drain the output until errEndOfStream is sent.
func (d *Decoder) drainOutput() {
if d.current.cancel != nil {
println("cancelling current")
close(d.current.cancel)
if debugDecoder {
println("cancelling current")
}
d.current.cancel()
d.current.cancel = nil
}
if d.current.d != nil {
if debug {
if debugDecoder {
printf("re-adding current decoder %p, decoders: %d", d.current.d, len(d.decoders))
}
d.decoders <- d.current.d
@@ -237,39 +257,31 @@ func (d *Decoder) drainOutput() {
println("current already flushed")
return
}
for {
select {
case v := <-d.current.output:
if v.d != nil {
if debug {
printf("re-adding decoder %p", v.d)
}
d.decoders <- v.d
}
if v.err == errEndOfStream {
println("current flushed")
d.current.flushed = true
return
for v := range d.current.output {
if v.d != nil {
if debugDecoder {
printf("re-adding decoder %p", v.d)
}
d.decoders <- v.d
}
}
d.current.output = nil
d.current.flushed = true
}
// WriteTo writes data to w until there's no more data to write or when an error occurs.
// The return value n is the number of bytes written.
// Any error encountered during the write is also returned.
func (d *Decoder) WriteTo(w io.Writer) (int64, error) {
if d.stream == nil {
return 0, ErrDecoderNilInput
}
var n int64
for {
if len(d.current.b) > 0 {
n2, err2 := w.Write(d.current.b)
n += int64(n2)
if err2 != nil && d.current.err == nil {
if err2 != nil && (d.current.err == nil || d.current.err == io.EOF) {
d.current.err = err2
break
} else if n2 != len(d.current.b) {
d.current.err = io.ErrShortWrite
}
}
if d.current.err != nil {
@@ -293,7 +305,7 @@ func (d *Decoder) WriteTo(w io.Writer) (int64, error) {
// DecodeAll can be used concurrently.
// The Decoder concurrency limits will be respected.
func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
if d.current.err == ErrDecoderClosed {
if d.decoders == nil {
return dst, ErrDecoderClosed
}
@@ -301,11 +313,14 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
block := <-d.decoders
frame := block.localFrame
defer func() {
if debug {
if debugDecoder {
printf("re-adding decoder: %p", block)
}
frame.rawInput = nil
frame.bBuf = nil
if frame.history.decoders.br != nil {
frame.history.decoders.br.in = nil
}
d.decoders <- block
}()
frame.bBuf = input
@@ -313,33 +328,42 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
for {
frame.history.reset()
err := frame.reset(&frame.bBuf)
if err == io.EOF {
if debug {
println("frame reset return EOF")
if err != nil {
if err == io.EOF {
if debugDecoder {
println("frame reset return EOF")
}
return dst, nil
}
return dst, nil
return dst, err
}
if frame.DictionaryID != nil {
dict, ok := d.dicts[*frame.DictionaryID]
if !ok {
return nil, ErrUnknownDictionary
}
if debugDecoder {
println("setting dict", frame.DictionaryID)
}
frame.history.setDict(&dict)
}
if err != nil {
return dst, err
if frame.WindowSize > d.o.maxWindowSize {
if debugDecoder {
println("window size exceeded:", frame.WindowSize, ">", d.o.maxWindowSize)
}
return dst, ErrWindowSizeExceeded
}
if frame.FrameContentSize > d.o.maxDecodedSize-uint64(len(dst)) {
return dst, ErrDecoderSizeExceeded
}
if frame.FrameContentSize > 0 && frame.FrameContentSize < 1<<30 {
// Never preallocate moe than 1 GB up front.
if frame.FrameContentSize != fcsUnknown {
if frame.FrameContentSize > d.o.maxDecodedSize-uint64(len(dst)) {
return dst, ErrDecoderSizeExceeded
}
if cap(dst)-len(dst) < int(frame.FrameContentSize) {
dst2 := make([]byte, len(dst), len(dst)+int(frame.FrameContentSize))
dst2 := make([]byte, len(dst), len(dst)+int(frame.FrameContentSize)+compressedBlockOverAlloc)
copy(dst2, dst)
dst = dst2
}
}
if cap(dst) == 0 {
// Allocate len(input) * 2 by default if nothing is provided
// and we didn't get frame content size.
@@ -359,7 +383,7 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
return dst, err
}
if len(frame.bBuf) == 0 {
if debug {
if debugDecoder {
println("frame dbuf empty")
}
break
@@ -374,31 +398,174 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
// If non-blocking mode is used the returned boolean will be false
// if no data was available without blocking.
func (d *Decoder) nextBlock(blocking bool) (ok bool) {
if d.current.err != nil {
// Keep error state.
return false
}
d.current.b = d.current.b[:0]
// SYNC:
if d.syncStream.enabled {
if !blocking {
return false
}
ok = d.nextBlockSync()
if !ok {
d.stashDecoder()
}
return ok
}
//ASYNC:
d.stashDecoder()
if blocking {
d.current.decodeOutput, ok = <-d.current.output
} else {
select {
case d.current.decodeOutput, ok = <-d.current.output:
default:
return false
}
}
if !ok {
// This should not happen, so signal error state...
d.current.err = io.ErrUnexpectedEOF
return false
}
next := d.current.decodeOutput
if next.d != nil && next.d.async.newHist != nil {
d.current.crc.Reset()
}
if debugDecoder {
var tmp [4]byte
binary.LittleEndian.PutUint32(tmp[:], uint32(xxhash.Sum64(next.b)))
println("got", len(d.current.b), "bytes, error:", d.current.err, "data crc:", tmp)
}
if !d.o.ignoreChecksum && len(next.b) > 0 {
n, err := d.current.crc.Write(next.b)
if err == nil {
if n != len(next.b) {
d.current.err = io.ErrShortWrite
}
}
}
if next.err == nil && next.d != nil && len(next.d.checkCRC) != 0 {
got := d.current.crc.Sum64()
var tmp [4]byte
binary.LittleEndian.PutUint32(tmp[:], uint32(got))
if !d.o.ignoreChecksum && !bytes.Equal(tmp[:], next.d.checkCRC) {
if debugDecoder {
println("CRC Check Failed:", tmp[:], " (got) !=", next.d.checkCRC, "(on stream)")
}
d.current.err = ErrCRCMismatch
} else {
if debugDecoder {
println("CRC ok", tmp[:])
}
}
}
return true
}
func (d *Decoder) nextBlockSync() (ok bool) {
if d.current.d == nil {
d.current.d = <-d.decoders
}
for len(d.current.b) == 0 {
if !d.syncStream.inFrame {
d.frame.history.reset()
d.current.err = d.frame.reset(&d.syncStream.br)
if d.current.err != nil {
return false
}
if d.frame.DictionaryID != nil {
dict, ok := d.dicts[*d.frame.DictionaryID]
if !ok {
d.current.err = ErrUnknownDictionary
return false
} else {
d.frame.history.setDict(&dict)
}
}
if d.frame.WindowSize > d.o.maxDecodedSize || d.frame.WindowSize > d.o.maxWindowSize {
d.current.err = ErrDecoderSizeExceeded
return false
}
d.syncStream.decodedFrame = 0
d.syncStream.inFrame = true
}
d.current.err = d.frame.next(d.current.d)
if d.current.err != nil {
return false
}
d.frame.history.ensureBlock()
if debugDecoder {
println("History trimmed:", len(d.frame.history.b), "decoded already:", d.syncStream.decodedFrame)
}
histBefore := len(d.frame.history.b)
d.current.err = d.current.d.decodeBuf(&d.frame.history)
if d.current.err != nil {
println("error after:", d.current.err)
return false
}
d.current.b = d.frame.history.b[histBefore:]
if debugDecoder {
println("history after:", len(d.frame.history.b))
}
// Check frame size (before CRC)
d.syncStream.decodedFrame += uint64(len(d.current.b))
if d.syncStream.decodedFrame > d.frame.FrameContentSize {
if debugDecoder {
printf("DecodedFrame (%d) > FrameContentSize (%d)\n", d.syncStream.decodedFrame, d.frame.FrameContentSize)
}
d.current.err = ErrFrameSizeExceeded
return false
}
// Check FCS
if d.current.d.Last && d.frame.FrameContentSize != fcsUnknown && d.syncStream.decodedFrame != d.frame.FrameContentSize {
if debugDecoder {
printf("DecodedFrame (%d) != FrameContentSize (%d)\n", d.syncStream.decodedFrame, d.frame.FrameContentSize)
}
d.current.err = ErrFrameSizeMismatch
return false
}
// Update/Check CRC
if d.frame.HasCheckSum {
if !d.o.ignoreChecksum {
d.frame.crc.Write(d.current.b)
}
if d.current.d.Last {
if !d.o.ignoreChecksum {
d.current.err = d.frame.checkCRC()
} else {
d.current.err = d.frame.consumeCRC()
}
if d.current.err != nil {
println("CRC error:", d.current.err)
return false
}
}
}
d.syncStream.inFrame = !d.current.d.Last
}
return true
}
func (d *Decoder) stashDecoder() {
if d.current.d != nil {
if debug {
if debugDecoder {
printf("re-adding current decoder %p", d.current.d)
}
d.decoders <- d.current.d
d.current.d = nil
}
if d.current.err != nil {
// Keep error state.
return blocking
}
if blocking {
d.current.decodeOutput = <-d.current.output
} else {
select {
case d.current.decodeOutput = <-d.current.output:
default:
return false
}
}
if debug {
println("got", len(d.current.b), "bytes, error:", d.current.err)
}
return true
}
// Close will release all resources.
@@ -408,10 +575,10 @@ func (d *Decoder) Close() {
return
}
d.drainOutput()
if d.stream != nil {
close(d.stream)
if d.current.cancel != nil {
d.current.cancel()
d.streamWg.Wait()
d.stream = nil
d.current.cancel = nil
}
if d.decoders != nil {
close(d.decoders)
@@ -462,100 +629,296 @@ type decodeOutput struct {
err error
}
type decodeStream struct {
r io.Reader
// Blocks ready to be written to output.
output chan decodeOutput
// cancel reading from the input
cancel chan struct{}
func (d *Decoder) startSyncDecoder(r io.Reader) error {
d.frame.history.reset()
d.syncStream.br = readerWrapper{r: r}
d.syncStream.inFrame = false
d.syncStream.enabled = true
d.syncStream.decodedFrame = 0
return nil
}
// errEndOfStream indicates that everything from the stream was read.
var errEndOfStream = errors.New("end-of-stream")
// Create Decoder:
// Spawn n block decoders. These accept tasks to decode a block.
// Create goroutine that handles stream processing, this will send history to decoders as they are available.
// Decoders update the history as they decode.
// When a block is returned:
// a) history is sent to the next decoder,
// b) content written to CRC.
// c) return data to WRITER.
// d) wait for next block to return data.
// Once WRITTEN, the decoders reused by the writer frame decoder for re-use.
func (d *Decoder) startStreamDecoder(inStream chan decodeStream) {
// ASYNC:
// Spawn 3 go routines.
// 0: Read frames and decode block literals.
// 1: Decode sequences.
// 2: Execute sequences, send to output.
func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output chan decodeOutput) {
defer d.streamWg.Done()
frame := newFrameDec(d.o)
for stream := range inStream {
if debug {
println("got new stream")
br := readerWrapper{r: r}
var seqDecode = make(chan *blockDec, d.o.concurrent)
var seqExecute = make(chan *blockDec, d.o.concurrent)
// Async 1: Decode sequences...
go func() {
var hist history
var hasErr bool
for block := range seqDecode {
if hasErr {
if block != nil {
seqExecute <- block
}
continue
}
if block.async.newHist != nil {
if debugDecoder {
println("Async 1: new history, recent:", block.async.newHist.recentOffsets)
}
hist.decoders = block.async.newHist.decoders
hist.recentOffsets = block.async.newHist.recentOffsets
hist.windowSize = block.async.newHist.windowSize
if block.async.newHist.dict != nil {
hist.setDict(block.async.newHist.dict)
}
}
if block.err != nil || block.Type != blockTypeCompressed {
hasErr = block.err != nil
seqExecute <- block
continue
}
hist.decoders.literals = block.async.literals
block.err = block.prepareSequences(block.async.seqData, &hist)
if debugDecoder && block.err != nil {
println("prepareSequences returned:", block.err)
}
hasErr = block.err != nil
if block.err == nil {
block.err = block.decodeSequences(&hist)
if debugDecoder && block.err != nil {
println("decodeSequences returned:", block.err)
}
hasErr = block.err != nil
// block.async.sequence = hist.decoders.seq[:hist.decoders.nSeqs]
block.async.seqSize = hist.decoders.seqSize
}
seqExecute <- block
}
br := readerWrapper{r: stream.r}
decodeStream:
for {
frame.history.reset()
err := frame.reset(&br)
if debug && err != nil {
println("Frame decoder returned", err)
close(seqExecute)
}()
var wg sync.WaitGroup
wg.Add(1)
// Async 3: Execute sequences...
frameHistCache := d.frame.history.b
go func() {
var hist history
var decodedFrame uint64
var fcs uint64
var hasErr bool
for block := range seqExecute {
out := decodeOutput{err: block.err, d: block}
if block.err != nil || hasErr {
hasErr = true
output <- out
continue
}
if err == nil && frame.DictionaryID != nil {
dict, ok := d.dicts[*frame.DictionaryID]
if !ok {
err = ErrUnknownDictionary
if block.async.newHist != nil {
if debugDecoder {
println("Async 2: new history")
}
hist.windowSize = block.async.newHist.windowSize
hist.allocFrameBuffer = block.async.newHist.allocFrameBuffer
if block.async.newHist.dict != nil {
hist.setDict(block.async.newHist.dict)
}
if cap(hist.b) < hist.allocFrameBuffer {
if cap(frameHistCache) >= hist.allocFrameBuffer {
hist.b = frameHistCache
} else {
hist.b = make([]byte, 0, hist.allocFrameBuffer)
println("Alloc history sized", hist.allocFrameBuffer)
}
}
hist.b = hist.b[:0]
fcs = block.async.fcs
decodedFrame = 0
}
do := decodeOutput{err: block.err, d: block}
switch block.Type {
case blockTypeRLE:
if debugDecoder {
println("add rle block length:", block.RLESize)
}
if cap(block.dst) < int(block.RLESize) {
if block.lowMem {
block.dst = make([]byte, block.RLESize)
} else {
block.dst = make([]byte, maxBlockSize)
}
}
block.dst = block.dst[:block.RLESize]
v := block.data[0]
for i := range block.dst {
block.dst[i] = v
}
hist.append(block.dst)
do.b = block.dst
case blockTypeRaw:
if debugDecoder {
println("add raw block length:", len(block.data))
}
hist.append(block.data)
do.b = block.data
case blockTypeCompressed:
if debugDecoder {
println("execute with history length:", len(hist.b), "window:", hist.windowSize)
}
hist.decoders.seqSize = block.async.seqSize
hist.decoders.literals = block.async.literals
do.err = block.executeSequences(&hist)
hasErr = do.err != nil
if debugDecoder && hasErr {
println("executeSequences returned:", do.err)
}
do.b = block.dst
}
if !hasErr {
decodedFrame += uint64(len(do.b))
if decodedFrame > fcs {
println("fcs exceeded", block.Last, fcs, decodedFrame)
do.err = ErrFrameSizeExceeded
hasErr = true
} else if block.Last && fcs != fcsUnknown && decodedFrame != fcs {
do.err = ErrFrameSizeMismatch
hasErr = true
} else {
frame.history.setDict(&dict)
if debugDecoder {
println("fcs ok", block.Last, fcs, decodedFrame)
}
}
}
if err != nil {
stream.output <- decodeOutput{
err: err,
output <- do
}
close(output)
frameHistCache = hist.b
wg.Done()
if debugDecoder {
println("decoder goroutines finished")
}
}()
decodeStream:
for {
var hist history
var hasErr bool
decodeBlock := func(block *blockDec) {
if hasErr {
if block != nil {
seqDecode <- block
}
return
}
if block.err != nil || block.Type != blockTypeCompressed {
hasErr = block.err != nil
seqDecode <- block
return
}
remain, err := block.decodeLiterals(block.data, &hist)
block.err = err
hasErr = block.err != nil
if err == nil {
block.async.literals = hist.decoders.literals
block.async.seqData = remain
} else if debugDecoder {
println("decodeLiterals error:", err)
}
seqDecode <- block
}
frame := d.frame
if debugDecoder {
println("New frame...")
}
var historySent bool
frame.history.reset()
err := frame.reset(&br)
if debugDecoder && err != nil {
println("Frame decoder returned", err)
}
if err == nil && frame.DictionaryID != nil {
dict, ok := d.dicts[*frame.DictionaryID]
if !ok {
err = ErrUnknownDictionary
} else {
frame.history.setDict(&dict)
}
}
if err == nil && d.frame.WindowSize > d.o.maxWindowSize {
err = ErrDecoderSizeExceeded
}
if err != nil {
select {
case <-ctx.Done():
case dec := <-d.decoders:
dec.sendErr(err)
decodeBlock(dec)
}
break decodeStream
}
// Go through all blocks of the frame.
for {
var dec *blockDec
select {
case <-ctx.Done():
break decodeStream
case dec = <-d.decoders:
// Once we have a decoder, we MUST return it.
}
err := frame.next(dec)
if !historySent {
h := frame.history
if debugDecoder {
println("Alloc History:", h.allocFrameBuffer)
}
hist.reset()
if h.dict != nil {
hist.setDict(h.dict)
}
dec.async.newHist = &h
dec.async.fcs = frame.FrameContentSize
historySent = true
} else {
dec.async.newHist = nil
}
if debugDecoder && err != nil {
println("next block returned error:", err)
}
dec.err = err
dec.checkCRC = nil
if dec.Last && frame.HasCheckSum && err == nil {
crc, err := frame.rawInput.readSmall(4)
if err != nil {
println("CRC missing?", err)
dec.err = err
}
var tmp [4]byte
copy(tmp[:], crc)
dec.checkCRC = tmp[:]
if debugDecoder {
println("found crc to check:", dec.checkCRC)
}
}
err = dec.err
last := dec.Last
decodeBlock(dec)
if err != nil {
break decodeStream
}
if last {
break
}
if debug {
println("starting frame decoder")
}
// This goroutine will forward history between frames.
frame.frameDone.Add(1)
frame.initAsync()
go frame.startDecoder(stream.output)
decodeFrame:
// Go through all blocks of the frame.
for {
dec := <-d.decoders
select {
case <-stream.cancel:
if !frame.sendErr(dec, io.EOF) {
// To not let the decoder dangle, send it back.
stream.output <- decodeOutput{d: dec}
}
break decodeStream
default:
}
err := frame.next(dec)
switch err {
case io.EOF:
// End of current frame, no error
println("EOF on next block")
break decodeFrame
case nil:
continue
default:
println("block decoder returned", err)
break decodeStream
}
}
// All blocks have started decoding, check if there are more frames.
println("waiting for done")
frame.frameDone.Wait()
println("done waiting...")
}
frame.frameDone.Wait()
println("Sending EOS")
stream.output <- decodeOutput{err: errEndOfStream}
}
close(seqDecode)
wg.Wait()
d.frame.history.b = frameHistCache
}

View File

@@ -6,7 +6,6 @@ package zstd
import (
"errors"
"fmt"
"runtime"
)
@@ -18,16 +17,22 @@ type decoderOptions struct {
lowMem bool
concurrent int
maxDecodedSize uint64
maxWindowSize uint64
dicts []dict
ignoreChecksum bool
}
func (o *decoderOptions) setDefault() {
*o = decoderOptions{
// use less ram: true for now, but may change.
lowMem: true,
concurrent: runtime.GOMAXPROCS(0),
lowMem: true,
concurrent: runtime.GOMAXPROCS(0),
maxWindowSize: MaxWindowSize,
}
o.maxDecodedSize = 1 << 63
if o.concurrent > 4 {
o.concurrent = 4
}
o.maxDecodedSize = 64 << 30
}
// WithDecoderLowmem will set whether to use a lower amount of memory,
@@ -36,16 +41,25 @@ func WithDecoderLowmem(b bool) DOption {
return func(o *decoderOptions) error { o.lowMem = b; return nil }
}
// WithDecoderConcurrency will set the concurrency,
// meaning the maximum number of decoders to run concurrently.
// The value supplied must be at least 1.
// By default this will be set to GOMAXPROCS.
// WithDecoderConcurrency sets the number of created decoders.
// When decoding block with DecodeAll, this will limit the number
// of possible concurrently running decodes.
// When decoding streams, this will limit the number of
// inflight blocks.
// When decoding streams and setting maximum to 1,
// no async decoding will be done.
// When a value of 0 is provided GOMAXPROCS will be used.
// By default this will be set to 4 or GOMAXPROCS, whatever is lower.
func WithDecoderConcurrency(n int) DOption {
return func(o *decoderOptions) error {
if n <= 0 {
return fmt.Errorf("Concurrency must be at least 1")
if n < 0 {
return errors.New("concurrency must be at least 1")
}
if n == 0 {
o.concurrent = runtime.GOMAXPROCS(0)
} else {
o.concurrent = n
}
o.concurrent = n
return nil
}
}
@@ -53,15 +67,14 @@ func WithDecoderConcurrency(n int) DOption {
// WithDecoderMaxMemory allows to set a maximum decoded size for in-memory
// non-streaming operations or maximum window size for streaming operations.
// This can be used to control memory usage of potentially hostile content.
// For streaming operations, the maximum window size is capped at 1<<30 bytes.
// Maximum and default is 1 << 63 bytes.
// Maximum is 1 << 63 bytes. Default is 64GiB.
func WithDecoderMaxMemory(n uint64) DOption {
return func(o *decoderOptions) error {
if n == 0 {
return errors.New("WithDecoderMaxMemory must be at least 1")
}
if n > 1<<63 {
return fmt.Errorf("WithDecoderMaxmemory must be less than 1 << 63")
return errors.New("WithDecoderMaxmemory must be less than 1 << 63")
}
o.maxDecodedSize = n
return nil
@@ -82,3 +95,29 @@ func WithDecoderDicts(dicts ...[]byte) DOption {
return nil
}
}
// WithDecoderMaxWindow allows to set a maximum window size for decodes.
// This allows rejecting packets that will cause big memory usage.
// The Decoder will likely allocate more memory based on the WithDecoderLowmem setting.
// If WithDecoderMaxMemory is set to a lower value, that will be used.
// Default is 512MB, Maximum is ~3.75 TB as per zstandard spec.
func WithDecoderMaxWindow(size uint64) DOption {
return func(o *decoderOptions) error {
if size < MinWindowSize {
return errors.New("WithMaxWindowSize must be at least 1KB, 1024 bytes")
}
if size > (1<<41)+7*(1<<38) {
return errors.New("WithMaxWindowSize must be less than (1<<41) + 7*(1<<38) ~ 3.75TB")
}
o.maxWindowSize = size
return nil
}
}
// IgnoreChecksum allows to forcibly ignore checksum checking.
func IgnoreChecksum(b bool) DOption {
return func(o *decoderOptions) error {
o.ignoreChecksum = b
return nil
}
}

View File

@@ -82,7 +82,7 @@ func loadDict(b []byte) (*dict, error) {
println("Transform table error:", err)
return err
}
if debug {
if debugDecoder || debugEncoder {
println("Read table ok", "symbolLen:", dec.symbolLen)
}
// Set decoders as predefined so they aren't reused.

View File

@@ -38,8 +38,8 @@ func (e *fastBase) AppendCRC(dst []byte) []byte {
// WindowSize returns the window size of the encoder,
// or a window size small enough to contain the input size, if > 0.
func (e *fastBase) WindowSize(size int) int32 {
if size > 0 && size < int(e.maxMatchOff) {
func (e *fastBase) WindowSize(size int64) int32 {
if size > 0 && size < int64(e.maxMatchOff) {
b := int32(1) << uint(bits.Len(uint(size)))
// Keep minimum window.
if b < 1024 {
@@ -108,11 +108,6 @@ func (e *fastBase) UseBlock(enc *blockEnc) {
e.blk = enc
}
func (e *fastBase) matchlenNoHist(s, t int32, src []byte) int32 {
// Extend the match to be as long as possible.
return int32(matchLen(src[s:], src[t:]))
}
func (e *fastBase) matchlen(s, t int32, src []byte) int32 {
if debugAsserts {
if s < 0 {
@@ -131,9 +126,24 @@ func (e *fastBase) matchlen(s, t int32, src []byte) int32 {
panic(fmt.Sprintf("len(src)-s (%d) > maxCompressedBlockSize (%d)", len(src)-int(s), maxCompressedBlockSize))
}
}
a := src[s:]
b := src[t:]
b = b[:len(a)]
end := int32((len(a) >> 3) << 3)
for i := int32(0); i < end; i += 8 {
if diff := load6432(a, i) ^ load6432(b, i); diff != 0 {
return i + int32(bits.TrailingZeros64(diff)>>3)
}
}
// Extend the match to be as long as possible.
return int32(matchLen(src[s:], src[t:]))
a = a[end:]
b = b[end:]
for i := range a {
if a[i] != b[i] {
return int32(i) + end
}
}
return int32(len(a)) + end
}
// Reset the encoding table.
@@ -150,14 +160,15 @@ func (e *fastBase) resetBase(d *dict, singleBlock bool) {
} else {
e.crc.Reset()
}
if (!singleBlock || d.DictContentSize() > 0) && cap(e.hist) < int(e.maxMatchOff*2)+d.DictContentSize() {
l := e.maxMatchOff*2 + int32(d.DictContentSize())
// Make it at least 1MB.
if l < 1<<20 {
l = 1 << 20
if d != nil {
low := e.lowMem
if singleBlock {
e.lowMem = true
}
e.hist = make([]byte, 0, l)
e.ensureHist(d.DictContentSize() + maxCompressedBlockSize)
e.lowMem = low
}
// We offset current position so everything will be out of reach.
// If above reset line, history will be purged.
if e.cur < bufferReset {

View File

@@ -5,22 +5,61 @@
package zstd
import (
"bytes"
"fmt"
"math/bits"
"github.com/klauspost/compress"
)
const (
bestLongTableBits = 20 // Bits used in the long match table
bestLongTableBits = 22 // Bits used in the long match table
bestLongTableSize = 1 << bestLongTableBits // Size of the table
bestLongLen = 8 // Bytes used for table hash
// Note: Increasing the short table bits or making the hash shorter
// can actually lead to compression degradation since it will 'steal' more from the
// long match table and match offsets are quite big.
// This greatly depends on the type of input.
bestShortTableBits = 16 // Bits used in the short match table
bestShortTableBits = 18 // Bits used in the short match table
bestShortTableSize = 1 << bestShortTableBits // Size of the table
bestShortLen = 4 // Bytes used for table hash
)
type match struct {
offset int32
s int32
length int32
rep int32
est int32
}
const highScore = 25000
// estBits will estimate output bits from predefined tables.
func (m *match) estBits(bitsPerByte int32) {
mlc := mlCode(uint32(m.length - zstdMinMatch))
var ofc uint8
if m.rep < 0 {
ofc = ofCode(uint32(m.s-m.offset) + 3)
} else {
ofc = ofCode(uint32(m.rep))
}
// Cost, excluding
ofTT, mlTT := fsePredefEnc[tableOffsets].ct.symbolTT[ofc], fsePredefEnc[tableMatchLengths].ct.symbolTT[mlc]
// Add cost of match encoding...
m.est = int32(ofTT.outBits + mlTT.outBits)
m.est += int32(ofTT.deltaNbBits>>16 + mlTT.deltaNbBits>>16)
// Subtract savings compared to literal encoding...
m.est -= (m.length * bitsPerByte) >> 10
if m.est > 0 {
// Unlikely gain..
m.length = 0
m.est = highScore
}
}
// bestFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches.
// The long match table contains the previous entry with the same hash,
// effectively making it a "chain" of length 2.
@@ -109,6 +148,14 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {
return
}
// Use this to estimate literal cost.
// Scaled by 10 bits.
bitsPerByte := int32((compress.ShannonEntropyBits(src) * 1024) / len(src))
// Huffman can never go < 1 bit/byte
if bitsPerByte < 1024 {
bitsPerByte = 1024
}
// Override src
src = e.hist
sLimit := int32(len(src)) - inputMargin
@@ -132,7 +179,7 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {
}
_ = addLiterals
if debug {
if debugEncoder {
println("recent offsets:", blk.recentOffsets)
}
@@ -145,51 +192,49 @@ encodeLoop:
panic("offset0 was 0")
}
type match struct {
offset int32
s int32
length int32
rep int32
}
matchAt := func(offset int32, s int32, first uint32, rep int32) match {
if s-offset >= e.maxMatchOff || load3232(src, offset) != first {
return match{offset: offset, s: s}
}
return match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep}
}
bestOf := func(a, b match) match {
aScore := b.s - a.s + a.length
bScore := a.s - b.s + b.length
if a.rep < 0 {
aScore = aScore - int32(bits.Len32(uint32(a.offset)))/8
}
if b.rep < 0 {
bScore = bScore - int32(bits.Len32(uint32(b.offset)))/8
}
if aScore >= bScore {
if a.est+(a.s-b.s)*bitsPerByte>>10 < b.est+(b.s-a.s)*bitsPerByte>>10 {
return a
}
return b
}
const goodEnough = 100
nextHashL := hash8(cv, bestLongTableBits)
nextHashS := hash4x64(cv, bestShortTableBits)
nextHashL := hashLen(cv, bestLongTableBits, bestLongLen)
nextHashS := hashLen(cv, bestShortTableBits, bestShortLen)
candidateL := e.longTable[nextHashL]
candidateS := e.table[nextHashS]
matchAt := func(offset int32, s int32, first uint32, rep int32) match {
if s-offset >= e.maxMatchOff || load3232(src, offset) != first {
return match{s: s, est: highScore}
}
if debugAsserts {
if !bytes.Equal(src[s:s+4], src[offset:offset+4]) {
panic(fmt.Sprintf("first match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first))
}
}
m := match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep}
m.estBits(bitsPerByte)
return m
}
best := bestOf(matchAt(candidateL.offset-e.cur, s, uint32(cv), -1), matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1))
best = bestOf(best, matchAt(candidateS.prev-e.cur, s, uint32(cv), -1))
if canRepeat && best.length < goodEnough {
best = bestOf(best, matchAt(s-offset1+1, s+1, uint32(cv>>8), 1))
best = bestOf(best, matchAt(s-offset2+1, s+1, uint32(cv>>8), 2))
best = bestOf(best, matchAt(s-offset3+1, s+1, uint32(cv>>8), 3))
cv32 := uint32(cv >> 8)
spp := s + 1
best = bestOf(best, matchAt(spp-offset1, spp, cv32, 1))
best = bestOf(best, matchAt(spp-offset2, spp, cv32, 2))
best = bestOf(best, matchAt(spp-offset3, spp, cv32, 3))
if best.length > 0 {
best = bestOf(best, matchAt(s-offset1+3, s+3, uint32(cv>>24), 1))
best = bestOf(best, matchAt(s-offset2+3, s+3, uint32(cv>>24), 2))
best = bestOf(best, matchAt(s-offset3+3, s+3, uint32(cv>>24), 3))
cv32 = uint32(cv >> 24)
spp += 2
best = bestOf(best, matchAt(spp-offset1, spp, cv32, 1))
best = bestOf(best, matchAt(spp-offset2, spp, cv32, 2))
best = bestOf(best, matchAt(spp-offset3, spp, cv32, 3))
}
}
// Load next and check...
@@ -209,17 +254,43 @@ encodeLoop:
}
s++
candidateS = e.table[hash4x64(cv>>8, bestShortTableBits)]
candidateS = e.table[hashLen(cv>>8, bestShortTableBits, bestShortLen)]
cv = load6432(src, s)
cv2 := load6432(src, s+1)
candidateL = e.longTable[hash8(cv, bestLongTableBits)]
candidateL2 := e.longTable[hash8(cv2, bestLongTableBits)]
candidateL = e.longTable[hashLen(cv, bestLongTableBits, bestLongLen)]
candidateL2 := e.longTable[hashLen(cv2, bestLongTableBits, bestLongLen)]
// Short at s+1
best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1))
// Long at s+1, s+2
best = bestOf(best, matchAt(candidateL.offset-e.cur, s, uint32(cv), -1))
best = bestOf(best, matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
best = bestOf(best, matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1))
best = bestOf(best, matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1))
if false {
// Short at s+3.
// Too often worse...
best = bestOf(best, matchAt(e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+2, uint32(cv2>>8), -1))
}
// See if we can find a better match by checking where the current best ends.
// Use that offset to see if we can find a better full match.
if sAt := best.s + best.length; sAt < sLimit {
nextHashL := hashLen(load6432(src, sAt), bestLongTableBits, bestLongLen)
candidateEnd := e.longTable[nextHashL]
if pos := candidateEnd.offset - e.cur - best.length; pos >= 0 {
bestEnd := bestOf(best, matchAt(pos, best.s, load3232(src, best.s), -1))
if pos := candidateEnd.prev - e.cur - best.length; pos >= 0 {
bestEnd = bestOf(bestEnd, matchAt(pos, best.s, load3232(src, best.s), -1))
}
best = bestEnd
}
}
}
if debugAsserts {
if !bytes.Equal(src[best.s:best.s+best.length], src[best.offset:best.offset+best.length]) {
panic(fmt.Sprintf("match mismatch: %v != %v", src[best.s:best.s+best.length], src[best.offset:best.offset+best.length]))
}
}
// We have a match, we can store the forward value
@@ -260,7 +331,7 @@ encodeLoop:
nextEmit = s
if s >= sLimit {
if debug {
if debugEncoder {
println("repeat ended", s, best.length)
}
@@ -270,8 +341,8 @@ encodeLoop:
off := index0 + e.cur
for index0 < s-1 {
cv0 := load6432(src, index0)
h0 := hash8(cv0, bestLongTableBits)
h1 := hash4x64(cv0, bestShortTableBits)
h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset}
off++
@@ -297,7 +368,7 @@ encodeLoop:
panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
}
if debugAsserts && canRepeat && int(offset1) > len(src) {
if debugAsserts && int(offset1) > len(src) {
panic("invalid offset")
}
@@ -338,8 +409,8 @@ encodeLoop:
// every entry
for index0 < s-1 {
cv0 := load6432(src, index0)
h0 := hash8(cv0, bestLongTableBits)
h1 := hash4x64(cv0, bestShortTableBits)
h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset}
@@ -360,8 +431,8 @@ encodeLoop:
}
// Store this, since we have it.
nextHashS := hash4x64(cv, bestShortTableBits)
nextHashL := hash8(cv, bestLongTableBits)
nextHashS := hashLen(cv, bestShortTableBits, bestShortLen)
nextHashL := hashLen(cv, bestLongTableBits, bestLongLen)
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
@@ -398,7 +469,7 @@ encodeLoop:
blk.recentOffsets[0] = uint32(offset1)
blk.recentOffsets[1] = uint32(offset2)
blk.recentOffsets[2] = uint32(offset3)
if debug {
if debugEncoder {
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}
}
@@ -411,7 +482,7 @@ func (e *bestFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
e.Encode(blk, src)
}
// ResetDict will reset and set a dictionary if not nil
// Reset will reset and set a dictionary if not nil
func (e *bestFastEncoder) Reset(d *dict, singleBlock bool) {
e.resetBase(d, singleBlock)
if d == nil {
@@ -427,10 +498,10 @@ func (e *bestFastEncoder) Reset(d *dict, singleBlock bool) {
const hashLog = bestShortTableBits
cv := load6432(d.content, i-e.maxMatchOff)
nextHash := hash4x64(cv, hashLog) // 0 -> 4
nextHash1 := hash4x64(cv>>8, hashLog) // 1 -> 5
nextHash2 := hash4x64(cv>>16, hashLog) // 2 -> 6
nextHash3 := hash4x64(cv>>24, hashLog) // 3 -> 7
nextHash := hashLen(cv, hashLog, bestShortLen) // 0 -> 4
nextHash1 := hashLen(cv>>8, hashLog, bestShortLen) // 1 -> 5
nextHash2 := hashLen(cv>>16, hashLog, bestShortLen) // 2 -> 6
nextHash3 := hashLen(cv>>24, hashLog, bestShortLen) // 3 -> 7
e.dictTable[nextHash] = prevEntry{
prev: e.dictTable[nextHash].offset,
offset: i,
@@ -458,7 +529,7 @@ func (e *bestFastEncoder) Reset(d *dict, singleBlock bool) {
}
if len(d.content) >= 8 {
cv := load6432(d.content, 0)
h := hash8(cv, bestLongTableBits)
h := hashLen(cv, bestLongTableBits, bestLongLen)
e.dictLongTable[h] = prevEntry{
offset: e.maxMatchOff,
prev: e.dictLongTable[h].offset,
@@ -468,7 +539,7 @@ func (e *bestFastEncoder) Reset(d *dict, singleBlock bool) {
off := 8 // First to read
for i := e.maxMatchOff + 1; i < end; i++ {
cv = cv>>8 | (uint64(d.content[off]) << 56)
h := hash8(cv, bestLongTableBits)
h := hashLen(cv, bestLongTableBits, bestLongLen)
e.dictLongTable[h] = prevEntry{
offset: i,
prev: e.dictLongTable[h].offset,

View File

@@ -9,6 +9,7 @@ import "fmt"
const (
betterLongTableBits = 19 // Bits used in the long match table
betterLongTableSize = 1 << betterLongTableBits // Size of the table
betterLongLen = 8 // Bytes used for table hash
// Note: Increasing the short table bits or making the hash shorter
// can actually lead to compression degradation since it will 'steal' more from the
@@ -16,6 +17,7 @@ const (
// This greatly depends on the type of input.
betterShortTableBits = 13 // Bits used in the short match table
betterShortTableSize = 1 << betterShortTableBits // Size of the table
betterShortLen = 5 // Bytes used for table hash
betterLongTableShardCnt = 1 << (betterLongTableBits - dictShardBits) // Number of shards in the table
betterLongTableShardSize = betterLongTableSize / betterLongTableShardCnt // Size of an individual shard
@@ -138,7 +140,7 @@ func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) {
blk.literals = append(blk.literals, src[nextEmit:until]...)
s.litLen = uint32(until - nextEmit)
}
if debug {
if debugEncoder {
println("recent offsets:", blk.recentOffsets)
}
@@ -154,8 +156,8 @@ encodeLoop:
panic("offset0 was 0")
}
nextHashS := hash5(cv, betterShortTableBits)
nextHashL := hash8(cv, betterLongTableBits)
nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
candidateL := e.longTable[nextHashL]
candidateS := e.table[nextHashS]
@@ -204,7 +206,7 @@ encodeLoop:
nextEmit = s
if s >= sLimit {
if debug {
if debugEncoder {
println("repeat ended", s, lenght)
}
@@ -214,10 +216,10 @@ encodeLoop:
for index0 < s-1 {
cv0 := load6432(src, index0)
cv1 := cv0 >> 8
h0 := hash8(cv0, betterLongTableBits)
h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)}
e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)}
index0 += 2
}
cv = load6432(src, s)
@@ -264,7 +266,7 @@ encodeLoop:
s += lenght + repOff2
nextEmit = s
if s >= sLimit {
if debug {
if debugEncoder {
println("repeat ended", s, lenght)
}
@@ -275,10 +277,10 @@ encodeLoop:
for index0 < s-1 {
cv0 := load6432(src, index0)
cv1 := cv0 >> 8
h0 := hash8(cv0, betterLongTableBits)
h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)}
e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)}
index0 += 2
}
cv = load6432(src, s)
@@ -353,7 +355,7 @@ encodeLoop:
// See if we can find a long match at s+1
const checkAt = 1
cv := load6432(src, s+checkAt)
nextHashL = hash8(cv, betterLongTableBits)
nextHashL = hashLen(cv, betterLongTableBits, betterLongLen)
candidateL = e.longTable[nextHashL]
coffsetL = candidateL.offset - e.cur
@@ -412,8 +414,41 @@ encodeLoop:
cv = load6432(src, s)
}
// A 4-byte match has been found. Update recent offsets.
// We'll later see if more than 4 bytes.
// Try to find a better match by searching for a long match at the end of the current best match
if s+matched < sLimit {
nextHashL := hashLen(load6432(src, s+matched), betterLongTableBits, betterLongLen)
cv := load3232(src, s)
candidateL := e.longTable[nextHashL]
coffsetL := candidateL.offset - e.cur - matched
if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
// Found a long match, at least 4 bytes.
matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
if matchedNext > matched {
t = coffsetL
matched = matchedNext
if debugMatches {
println("long match at end-of-match")
}
}
}
// Check prev long...
if true {
coffsetL = candidateL.prev - e.cur - matched
if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
// Found a long match, at least 4 bytes.
matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
if matchedNext > matched {
t = coffsetL
matched = matchedNext
if debugMatches {
println("prev long match at end-of-match")
}
}
}
}
}
// A match has been found. Update recent offsets.
offset2 = offset1
offset1 = s - t
@@ -462,10 +497,10 @@ encodeLoop:
for index0 < s-1 {
cv0 := load6432(src, index0)
cv1 := cv0 >> 8
h0 := hash8(cv0, betterLongTableBits)
h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)}
e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)}
index0 += 2
}
@@ -483,8 +518,8 @@ encodeLoop:
}
// Store this, since we have it.
nextHashS := hash5(cv, betterShortTableBits)
nextHashL := hash8(cv, betterLongTableBits)
nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
@@ -520,7 +555,7 @@ encodeLoop:
}
blk.recentOffsets[0] = uint32(offset1)
blk.recentOffsets[1] = uint32(offset2)
if debug {
if debugEncoder {
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}
}
@@ -623,7 +658,7 @@ func (e *betterFastEncoderDict) Encode(blk *blockEnc, src []byte) {
blk.literals = append(blk.literals, src[nextEmit:until]...)
s.litLen = uint32(until - nextEmit)
}
if debug {
if debugEncoder {
println("recent offsets:", blk.recentOffsets)
}
@@ -639,8 +674,8 @@ encodeLoop:
panic("offset0 was 0")
}
nextHashS := hash5(cv, betterShortTableBits)
nextHashL := hash8(cv, betterLongTableBits)
nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
candidateL := e.longTable[nextHashL]
candidateS := e.table[nextHashS]
@@ -691,7 +726,7 @@ encodeLoop:
nextEmit = s
if s >= sLimit {
if debug {
if debugEncoder {
println("repeat ended", s, lenght)
}
@@ -701,11 +736,11 @@ encodeLoop:
for index0 < s-1 {
cv0 := load6432(src, index0)
cv1 := cv0 >> 8
h0 := hash8(cv0, betterLongTableBits)
h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.markLongShardDirty(h0)
h1 := hash5(cv1, betterShortTableBits)
h1 := hashLen(cv1, betterShortTableBits, betterShortLen)
e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
e.markShortShardDirty(h1)
index0 += 2
@@ -754,7 +789,7 @@ encodeLoop:
s += lenght + repOff2
nextEmit = s
if s >= sLimit {
if debug {
if debugEncoder {
println("repeat ended", s, lenght)
}
@@ -765,11 +800,11 @@ encodeLoop:
for index0 < s-1 {
cv0 := load6432(src, index0)
cv1 := cv0 >> 8
h0 := hash8(cv0, betterLongTableBits)
h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.markLongShardDirty(h0)
h1 := hash5(cv1, betterShortTableBits)
h1 := hashLen(cv1, betterShortTableBits, betterShortLen)
e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
e.markShortShardDirty(h1)
index0 += 2
@@ -846,7 +881,7 @@ encodeLoop:
// See if we can find a long match at s+1
const checkAt = 1
cv := load6432(src, s+checkAt)
nextHashL = hash8(cv, betterLongTableBits)
nextHashL = hashLen(cv, betterLongTableBits, betterLongLen)
candidateL = e.longTable[nextHashL]
coffsetL = candidateL.offset - e.cur
@@ -905,9 +940,41 @@ encodeLoop:
}
cv = load6432(src, s)
}
// Try to find a better match by searching for a long match at the end of the current best match
if s+matched < sLimit {
nextHashL := hashLen(load6432(src, s+matched), betterLongTableBits, betterLongLen)
cv := load3232(src, s)
candidateL := e.longTable[nextHashL]
coffsetL := candidateL.offset - e.cur - matched
if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
// Found a long match, at least 4 bytes.
matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
if matchedNext > matched {
t = coffsetL
matched = matchedNext
if debugMatches {
println("long match at end-of-match")
}
}
}
// A 4-byte match has been found. Update recent offsets.
// We'll later see if more than 4 bytes.
// Check prev long...
if true {
coffsetL = candidateL.prev - e.cur - matched
if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
// Found a long match, at least 4 bytes.
matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
if matchedNext > matched {
t = coffsetL
matched = matchedNext
if debugMatches {
println("prev long match at end-of-match")
}
}
}
}
}
// A match has been found. Update recent offsets.
offset2 = offset1
offset1 = s - t
@@ -956,11 +1023,11 @@ encodeLoop:
for index0 < s-1 {
cv0 := load6432(src, index0)
cv1 := cv0 >> 8
h0 := hash8(cv0, betterLongTableBits)
h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.markLongShardDirty(h0)
h1 := hash5(cv1, betterShortTableBits)
h1 := hashLen(cv1, betterShortTableBits, betterShortLen)
e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
e.markShortShardDirty(h1)
index0 += 2
@@ -980,8 +1047,8 @@ encodeLoop:
}
// Store this, since we have it.
nextHashS := hash5(cv, betterShortTableBits)
nextHashL := hash8(cv, betterLongTableBits)
nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
@@ -1019,7 +1086,7 @@ encodeLoop:
}
blk.recentOffsets[0] = uint32(offset1)
blk.recentOffsets[1] = uint32(offset2)
if debug {
if debugEncoder {
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}
}
@@ -1048,10 +1115,10 @@ func (e *betterFastEncoderDict) Reset(d *dict, singleBlock bool) {
const hashLog = betterShortTableBits
cv := load6432(d.content, i-e.maxMatchOff)
nextHash := hash5(cv, hashLog) // 0 -> 4
nextHash1 := hash5(cv>>8, hashLog) // 1 -> 5
nextHash2 := hash5(cv>>16, hashLog) // 2 -> 6
nextHash3 := hash5(cv>>24, hashLog) // 3 -> 7
nextHash := hashLen(cv, hashLog, betterShortLen) // 0 -> 4
nextHash1 := hashLen(cv>>8, hashLog, betterShortLen) // 1 -> 5
nextHash2 := hashLen(cv>>16, hashLog, betterShortLen) // 2 -> 6
nextHash3 := hashLen(cv>>24, hashLog, betterShortLen) // 3 -> 7
e.dictTable[nextHash] = tableEntry{
val: uint32(cv),
offset: i,
@@ -1080,7 +1147,7 @@ func (e *betterFastEncoderDict) Reset(d *dict, singleBlock bool) {
}
if len(d.content) >= 8 {
cv := load6432(d.content, 0)
h := hash8(cv, betterLongTableBits)
h := hashLen(cv, betterLongTableBits, betterLongLen)
e.dictLongTable[h] = prevEntry{
offset: e.maxMatchOff,
prev: e.dictLongTable[h].offset,
@@ -1090,7 +1157,7 @@ func (e *betterFastEncoderDict) Reset(d *dict, singleBlock bool) {
off := 8 // First to read
for i := e.maxMatchOff + 1; i < end; i++ {
cv = cv>>8 | (uint64(d.content[off]) << 56)
h := hash8(cv, betterLongTableBits)
h := hashLen(cv, betterLongTableBits, betterLongLen)
e.dictLongTable[h] = prevEntry{
offset: i,
prev: e.dictLongTable[h].offset,

View File

@@ -10,6 +10,7 @@ const (
dFastLongTableBits = 17 // Bits used in the long match table
dFastLongTableSize = 1 << dFastLongTableBits // Size of the table
dFastLongTableMask = dFastLongTableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
dFastLongLen = 8 // Bytes used for table hash
dLongTableShardCnt = 1 << (dFastLongTableBits - dictShardBits) // Number of shards in the table
dLongTableShardSize = dFastLongTableSize / tableShardCnt // Size of an individual shard
@@ -17,6 +18,8 @@ const (
dFastShortTableBits = tableBits // Bits used in the short match table
dFastShortTableSize = 1 << dFastShortTableBits // Size of the table
dFastShortTableMask = dFastShortTableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
dFastShortLen = 5 // Bytes used for table hash
)
type doubleFastEncoder struct {
@@ -109,7 +112,7 @@ func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) {
blk.literals = append(blk.literals, src[nextEmit:until]...)
s.litLen = uint32(until - nextEmit)
}
if debug {
if debugEncoder {
println("recent offsets:", blk.recentOffsets)
}
@@ -124,8 +127,8 @@ encodeLoop:
panic("offset0 was 0")
}
nextHashS := hash5(cv, dFastShortTableBits)
nextHashL := hash8(cv, dFastLongTableBits)
nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
candidateL := e.longTable[nextHashL]
candidateS := e.table[nextHashS]
@@ -170,7 +173,7 @@ encodeLoop:
s += lenght + repOff
nextEmit = s
if s >= sLimit {
if debug {
if debugEncoder {
println("repeat ended", s, lenght)
}
@@ -208,7 +211,7 @@ encodeLoop:
// See if we can find a long match at s+1
const checkAt = 1
cv := load6432(src, s+checkAt)
nextHashL = hash8(cv, dFastLongTableBits)
nextHashL = hashLen(cv, dFastLongTableBits, dFastLongLen)
candidateL = e.longTable[nextHashL]
coffsetL = s - (candidateL.offset - e.cur) + checkAt
@@ -304,16 +307,16 @@ encodeLoop:
cv1 := load6432(src, index1)
te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
e.longTable[hash8(cv0, dFastLongTableBits)] = te0
e.longTable[hash8(cv1, dFastLongTableBits)] = te1
e.longTable[hashLen(cv0, dFastLongTableBits, dFastLongLen)] = te0
e.longTable[hashLen(cv1, dFastLongTableBits, dFastLongLen)] = te1
cv0 >>= 8
cv1 >>= 8
te0.offset++
te1.offset++
te0.val = uint32(cv0)
te1.val = uint32(cv1)
e.table[hash5(cv0, dFastShortTableBits)] = te0
e.table[hash5(cv1, dFastShortTableBits)] = te1
e.table[hashLen(cv0, dFastShortTableBits, dFastShortLen)] = te0
e.table[hashLen(cv1, dFastShortTableBits, dFastShortLen)] = te1
cv = load6432(src, s)
@@ -330,8 +333,8 @@ encodeLoop:
}
// Store this, since we have it.
nextHashS := hash5(cv, dFastShortTableBits)
nextHashL := hash8(cv, dFastLongTableBits)
nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
@@ -368,7 +371,7 @@ encodeLoop:
}
blk.recentOffsets[0] = uint32(offset1)
blk.recentOffsets[1] = uint32(offset2)
if debug {
if debugEncoder {
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}
}
@@ -427,7 +430,7 @@ func (e *doubleFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
blk.literals = append(blk.literals, src[nextEmit:until]...)
s.litLen = uint32(until - nextEmit)
}
if debug {
if debugEncoder {
println("recent offsets:", blk.recentOffsets)
}
@@ -436,8 +439,8 @@ encodeLoop:
var t int32
for {
nextHashS := hash5(cv, dFastShortTableBits)
nextHashL := hash8(cv, dFastLongTableBits)
nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
candidateL := e.longTable[nextHashL]
candidateS := e.table[nextHashS]
@@ -483,7 +486,7 @@ encodeLoop:
s += length + repOff
nextEmit = s
if s >= sLimit {
if debug {
if debugEncoder {
println("repeat ended", s, length)
}
@@ -521,7 +524,7 @@ encodeLoop:
// See if we can find a long match at s+1
const checkAt = 1
cv := load6432(src, s+checkAt)
nextHashL = hash8(cv, dFastLongTableBits)
nextHashL = hashLen(cv, dFastLongTableBits, dFastLongLen)
candidateL = e.longTable[nextHashL]
coffsetL = s - (candidateL.offset - e.cur) + checkAt
@@ -614,16 +617,16 @@ encodeLoop:
cv1 := load6432(src, index1)
te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
e.longTable[hash8(cv0, dFastLongTableBits)] = te0
e.longTable[hash8(cv1, dFastLongTableBits)] = te1
e.longTable[hashLen(cv0, dFastLongTableBits, dFastLongLen)] = te0
e.longTable[hashLen(cv1, dFastLongTableBits, dFastLongLen)] = te1
cv0 >>= 8
cv1 >>= 8
te0.offset++
te1.offset++
te0.val = uint32(cv0)
te1.val = uint32(cv1)
e.table[hash5(cv0, dFastShortTableBits)] = te0
e.table[hash5(cv1, dFastShortTableBits)] = te1
e.table[hashLen(cv0, dFastShortTableBits, dFastShortLen)] = te0
e.table[hashLen(cv1, dFastShortTableBits, dFastShortLen)] = te1
cv = load6432(src, s)
@@ -640,8 +643,8 @@ encodeLoop:
}
// Store this, since we have it.
nextHashS := hash5(cv1>>8, dFastShortTableBits)
nextHashL := hash8(cv, dFastLongTableBits)
nextHashS := hashLen(cv1>>8, dFastShortTableBits, dFastShortLen)
nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
@@ -677,7 +680,7 @@ encodeLoop:
blk.literals = append(blk.literals, src[nextEmit:]...)
blk.extraLits = len(src) - int(nextEmit)
}
if debug {
if debugEncoder {
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}
@@ -767,7 +770,7 @@ func (e *doubleFastEncoderDict) Encode(blk *blockEnc, src []byte) {
blk.literals = append(blk.literals, src[nextEmit:until]...)
s.litLen = uint32(until - nextEmit)
}
if debug {
if debugEncoder {
println("recent offsets:", blk.recentOffsets)
}
@@ -782,8 +785,8 @@ encodeLoop:
panic("offset0 was 0")
}
nextHashS := hash5(cv, dFastShortTableBits)
nextHashL := hash8(cv, dFastLongTableBits)
nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
candidateL := e.longTable[nextHashL]
candidateS := e.table[nextHashS]
@@ -830,7 +833,7 @@ encodeLoop:
s += lenght + repOff
nextEmit = s
if s >= sLimit {
if debug {
if debugEncoder {
println("repeat ended", s, lenght)
}
@@ -868,7 +871,7 @@ encodeLoop:
// See if we can find a long match at s+1
const checkAt = 1
cv := load6432(src, s+checkAt)
nextHashL = hash8(cv, dFastLongTableBits)
nextHashL = hashLen(cv, dFastLongTableBits, dFastLongLen)
candidateL = e.longTable[nextHashL]
coffsetL = s - (candidateL.offset - e.cur) + checkAt
@@ -965,8 +968,8 @@ encodeLoop:
cv1 := load6432(src, index1)
te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
longHash1 := hash8(cv0, dFastLongTableBits)
longHash2 := hash8(cv0, dFastLongTableBits)
longHash1 := hashLen(cv0, dFastLongTableBits, dFastLongLen)
longHash2 := hashLen(cv1, dFastLongTableBits, dFastLongLen)
e.longTable[longHash1] = te0
e.longTable[longHash2] = te1
e.markLongShardDirty(longHash1)
@@ -977,8 +980,8 @@ encodeLoop:
te1.offset++
te0.val = uint32(cv0)
te1.val = uint32(cv1)
hashVal1 := hash5(cv0, dFastShortTableBits)
hashVal2 := hash5(cv1, dFastShortTableBits)
hashVal1 := hashLen(cv0, dFastShortTableBits, dFastShortLen)
hashVal2 := hashLen(cv1, dFastShortTableBits, dFastShortLen)
e.table[hashVal1] = te0
e.markShardDirty(hashVal1)
e.table[hashVal2] = te1
@@ -999,8 +1002,8 @@ encodeLoop:
}
// Store this, since we have it.
nextHashS := hash5(cv, dFastShortTableBits)
nextHashL := hash8(cv, dFastLongTableBits)
nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
@@ -1039,7 +1042,7 @@ encodeLoop:
}
blk.recentOffsets[0] = uint32(offset1)
blk.recentOffsets[1] = uint32(offset2)
if debug {
if debugEncoder {
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}
// If we encoded more than 64K mark all dirty.
@@ -1071,14 +1074,14 @@ func (e *doubleFastEncoderDict) Reset(d *dict, singleBlock bool) {
}
if len(d.content) >= 8 {
cv := load6432(d.content, 0)
e.dictLongTable[hash8(cv, dFastLongTableBits)] = tableEntry{
e.dictLongTable[hashLen(cv, dFastLongTableBits, dFastLongLen)] = tableEntry{
val: uint32(cv),
offset: e.maxMatchOff,
}
end := int32(len(d.content)) - 8 + e.maxMatchOff
for i := e.maxMatchOff + 1; i < end; i++ {
cv = cv>>8 | (uint64(d.content[i-e.maxMatchOff+7]) << 56)
e.dictLongTable[hash8(cv, dFastLongTableBits)] = tableEntry{
e.dictLongTable[hashLen(cv, dFastLongTableBits, dFastLongLen)] = tableEntry{
val: uint32(cv),
offset: i,
}

View File

@@ -6,17 +6,16 @@ package zstd
import (
"fmt"
"math"
"math/bits"
)
const (
tableBits = 15 // Bits used in the table
tableSize = 1 << tableBits // Size of the table
tableShardCnt = 1 << (tableBits - dictShardBits) // Number of shards in the table
tableShardSize = tableSize / tableShardCnt // Size of an individual shard
tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
maxMatchLength = 131074
tableBits = 15 // Bits used in the table
tableSize = 1 << tableBits // Size of the table
tableShardCnt = 1 << (tableBits - dictShardBits) // Number of shards in the table
tableShardSize = tableSize / tableShardCnt // Size of an individual shard
tableFastHashLen = 6
tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
maxMatchLength = 131074
)
type tableEntry struct {
@@ -86,7 +85,7 @@ func (e *fastEncoder) Encode(blk *blockEnc, src []byte) {
// TEMPLATE
const hashLog = tableBits
// seems global, but would be nice to tweak.
const kSearchStrength = 7
const kSearchStrength = 6
// nextEmit is where in src the next emitLiteral should start from.
nextEmit := s
@@ -103,7 +102,7 @@ func (e *fastEncoder) Encode(blk *blockEnc, src []byte) {
blk.literals = append(blk.literals, src[nextEmit:until]...)
s.litLen = uint32(until - nextEmit)
}
if debug {
if debugEncoder {
println("recent offsets:", blk.recentOffsets)
}
@@ -122,8 +121,8 @@ encodeLoop:
panic("offset0 was 0")
}
nextHash := hash6(cv, hashLog)
nextHash2 := hash6(cv>>8, hashLog)
nextHash := hashLen(cv, hashLog, tableFastHashLen)
nextHash2 := hashLen(cv>>8, hashLog, tableFastHashLen)
candidate := e.table[nextHash]
candidate2 := e.table[nextHash2]
repIndex := s - offset1 + 2
@@ -135,20 +134,7 @@ encodeLoop:
// Consider history as well.
var seq seq
var length int32
// length = 4 + e.matchlen(s+6, repIndex+4, src)
{
a := src[s+6:]
b := src[repIndex+4:]
endI := len(a) & (math.MaxInt32 - 7)
length = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
length = 4 + e.matchlen(s+6, repIndex+4, src)
seq.matchLen = uint32(length - zstdMinMatch)
// We might be able to match backwards.
@@ -178,7 +164,7 @@ encodeLoop:
s += length + 2
nextEmit = s
if s >= sLimit {
if debug {
if debugEncoder {
println("repeat ended", s, length)
}
@@ -235,20 +221,7 @@ encodeLoop:
}
// Extend the 4-byte match as long as possible.
//l := e.matchlen(s+4, t+4, src) + 4
var l int32
{
a := src[s+4:]
b := src[t+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
l := e.matchlen(s+4, t+4, src) + 4
// Extend backwards
tMin := s - e.maxMatchOff
@@ -285,23 +258,10 @@ encodeLoop:
if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
//l := 4 + e.matchlen(s+4, o2+4, src)
var l int32
{
a := src[s+4:]
b := src[o2+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
l := 4 + e.matchlen(s+4, o2+4, src)
// Store this, since we have it.
nextHash := hash6(cv, hashLog)
nextHash := hashLen(cv, hashLog, tableFastHashLen)
e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
seq.matchLen = uint32(l) - zstdMinMatch
seq.litLen = 0
@@ -330,7 +290,7 @@ encodeLoop:
}
blk.recentOffsets[0] = uint32(offset1)
blk.recentOffsets[1] = uint32(offset2)
if debug {
if debugEncoder {
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}
}
@@ -343,7 +303,7 @@ func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
inputMargin = 8
minNonLiteralBlockSize = 1 + 1 + inputMargin
)
if debug {
if debugEncoder {
if len(src) > maxBlockSize {
panic("src too big")
}
@@ -374,7 +334,7 @@ func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
// TEMPLATE
const hashLog = tableBits
// seems global, but would be nice to tweak.
const kSearchStrength = 8
const kSearchStrength = 6
// nextEmit is where in src the next emitLiteral should start from.
nextEmit := s
@@ -391,7 +351,7 @@ func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
blk.literals = append(blk.literals, src[nextEmit:until]...)
s.litLen = uint32(until - nextEmit)
}
if debug {
if debugEncoder {
println("recent offsets:", blk.recentOffsets)
}
@@ -405,8 +365,8 @@ encodeLoop:
// By not using them for the first 3 matches
for {
nextHash := hash6(cv, hashLog)
nextHash2 := hash6(cv>>8, hashLog)
nextHash := hashLen(cv, hashLog, tableFastHashLen)
nextHash2 := hashLen(cv>>8, hashLog, tableFastHashLen)
candidate := e.table[nextHash]
candidate2 := e.table[nextHash2]
repIndex := s - offset1 + 2
@@ -417,21 +377,7 @@ encodeLoop:
if len(blk.sequences) > 2 && load3232(src, repIndex) == uint32(cv>>16) {
// Consider history as well.
var seq seq
// length := 4 + e.matchlen(s+6, repIndex+4, src)
// length := 4 + int32(matchLen(src[s+6:], src[repIndex+4:]))
var length int32
{
a := src[s+6:]
b := src[repIndex+4:]
endI := len(a) & (math.MaxInt32 - 7)
length = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
length := 4 + e.matchlen(s+6, repIndex+4, src)
seq.matchLen = uint32(length - zstdMinMatch)
@@ -462,7 +408,7 @@ encodeLoop:
s += length + 2
nextEmit = s
if s >= sLimit {
if debug {
if debugEncoder {
println("repeat ended", s, length)
}
@@ -521,21 +467,7 @@ encodeLoop:
panic(fmt.Sprintf("t (%d) < 0 ", t))
}
// Extend the 4-byte match as long as possible.
//l := e.matchlenNoHist(s+4, t+4, src) + 4
// l := int32(matchLen(src[s+4:], src[t+4:])) + 4
var l int32
{
a := src[s+4:]
b := src[t+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
l := e.matchlen(s+4, t+4, src) + 4
// Extend backwards
tMin := s - e.maxMatchOff
@@ -572,24 +504,10 @@ encodeLoop:
if o2 := s - offset2; len(blk.sequences) > 2 && load3232(src, o2) == uint32(cv) {
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
//l := 4 + e.matchlenNoHist(s+4, o2+4, src)
// l := 4 + int32(matchLen(src[s+4:], src[o2+4:]))
var l int32
{
a := src[s+4:]
b := src[o2+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
l := 4 + e.matchlen(s+4, o2+4, src)
// Store this, since we have it.
nextHash := hash6(cv, hashLog)
nextHash := hashLen(cv, hashLog, tableFastHashLen)
e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
seq.matchLen = uint32(l) - zstdMinMatch
seq.litLen = 0
@@ -616,7 +534,7 @@ encodeLoop:
blk.literals = append(blk.literals, src[nextEmit:]...)
blk.extraLits = len(src) - int(nextEmit)
}
if debug {
if debugEncoder {
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}
// We do not store history, so we must offset e.cur to avoid false matches for next user.
@@ -696,7 +614,7 @@ func (e *fastEncoderDict) Encode(blk *blockEnc, src []byte) {
blk.literals = append(blk.literals, src[nextEmit:until]...)
s.litLen = uint32(until - nextEmit)
}
if debug {
if debugEncoder {
println("recent offsets:", blk.recentOffsets)
}
@@ -715,8 +633,8 @@ encodeLoop:
panic("offset0 was 0")
}
nextHash := hash6(cv, hashLog)
nextHash2 := hash6(cv>>8, hashLog)
nextHash := hashLen(cv, hashLog, tableFastHashLen)
nextHash2 := hashLen(cv>>8, hashLog, tableFastHashLen)
candidate := e.table[nextHash]
candidate2 := e.table[nextHash2]
repIndex := s - offset1 + 2
@@ -730,19 +648,7 @@ encodeLoop:
// Consider history as well.
var seq seq
var length int32
// length = 4 + e.matchlen(s+6, repIndex+4, src)
{
a := src[s+6:]
b := src[repIndex+4:]
endI := len(a) & (math.MaxInt32 - 7)
length = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
length = 4 + e.matchlen(s+6, repIndex+4, src)
seq.matchLen = uint32(length - zstdMinMatch)
@@ -773,7 +679,7 @@ encodeLoop:
s += length + 2
nextEmit = s
if s >= sLimit {
if debug {
if debugEncoder {
println("repeat ended", s, length)
}
@@ -830,20 +736,7 @@ encodeLoop:
}
// Extend the 4-byte match as long as possible.
//l := e.matchlen(s+4, t+4, src) + 4
var l int32
{
a := src[s+4:]
b := src[t+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
l := e.matchlen(s+4, t+4, src) + 4
// Extend backwards
tMin := s - e.maxMatchOff
@@ -880,23 +773,10 @@ encodeLoop:
if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
//l := 4 + e.matchlen(s+4, o2+4, src)
var l int32
{
a := src[s+4:]
b := src[o2+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
l := 4 + e.matchlen(s+4, o2+4, src)
// Store this, since we have it.
nextHash := hash6(cv, hashLog)
nextHash := hashLen(cv, hashLog, tableFastHashLen)
e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
e.markShardDirty(nextHash)
seq.matchLen = uint32(l) - zstdMinMatch
@@ -926,7 +806,7 @@ encodeLoop:
}
blk.recentOffsets[0] = uint32(offset1)
blk.recentOffsets[1] = uint32(offset2)
if debug {
if debugEncoder {
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}
}
@@ -957,9 +837,9 @@ func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) {
const hashLog = tableBits
cv := load6432(d.content, i-e.maxMatchOff)
nextHash := hash6(cv, hashLog) // 0 -> 5
nextHash1 := hash6(cv>>8, hashLog) // 1 -> 6
nextHash2 := hash6(cv>>16, hashLog) // 2 -> 7
nextHash := hashLen(cv, hashLog, tableFastHashLen) // 0 -> 5
nextHash1 := hashLen(cv>>8, hashLog, tableFastHashLen) // 1 -> 6
nextHash2 := hashLen(cv>>16, hashLog, tableFastHashLen) // 2 -> 7
e.dictTable[nextHash] = tableEntry{
val: uint32(cv),
offset: i,

View File

@@ -33,7 +33,7 @@ type encoder interface {
Block() *blockEnc
CRC() *xxhash.Digest
AppendCRC([]byte) []byte
WindowSize(size int) int32
WindowSize(size int64) int32
UseBlock(*blockEnc)
Reset(d *dict, singleBlock bool)
}
@@ -48,6 +48,8 @@ type encoderState struct {
err error
writeErr error
nWritten int64
nInput int64
frameContentSize int64
headerWritten bool
eofWritten bool
fullFrameWritten bool
@@ -96,23 +98,25 @@ func (e *Encoder) Reset(w io.Writer) {
if cap(s.filling) == 0 {
s.filling = make([]byte, 0, e.o.blockSize)
}
if cap(s.current) == 0 {
s.current = make([]byte, 0, e.o.blockSize)
}
if cap(s.previous) == 0 {
s.previous = make([]byte, 0, e.o.blockSize)
if e.o.concurrent > 1 {
if cap(s.current) == 0 {
s.current = make([]byte, 0, e.o.blockSize)
}
if cap(s.previous) == 0 {
s.previous = make([]byte, 0, e.o.blockSize)
}
s.current = s.current[:0]
s.previous = s.previous[:0]
if s.writing == nil {
s.writing = &blockEnc{lowMem: e.o.lowMem}
s.writing.init()
}
s.writing.initNewEncode()
}
if s.encoder == nil {
s.encoder = e.o.encoder()
}
if s.writing == nil {
s.writing = &blockEnc{lowMem: e.o.lowMem}
s.writing.init()
}
s.writing.initNewEncode()
s.filling = s.filling[:0]
s.current = s.current[:0]
s.previous = s.previous[:0]
s.encoder.Reset(e.o.dict, false)
s.headerWritten = false
s.eofWritten = false
@@ -120,7 +124,21 @@ func (e *Encoder) Reset(w io.Writer) {
s.w = w
s.err = nil
s.nWritten = 0
s.nInput = 0
s.writeErr = nil
s.frameContentSize = 0
}
// ResetContentSize will reset and set a content size for the next stream.
// If the bytes written does not match the size given an error will be returned
// when calling Close().
// This is removed when Reset is called.
// Sizes <= 0 results in no content size set.
func (e *Encoder) ResetContentSize(w io.Writer, size int64) {
e.Reset(w)
if size >= 0 {
e.state.frameContentSize = size
}
}
// Write data to the encoder.
@@ -190,6 +208,7 @@ func (e *Encoder) nextBlock(final bool) error {
return s.err
}
s.nWritten += int64(n2)
s.nInput += int64(len(s.filling))
s.current = s.current[:0]
s.filling = s.filling[:0]
s.headerWritten = true
@@ -200,8 +219,8 @@ func (e *Encoder) nextBlock(final bool) error {
var tmp [maxHeaderSize]byte
fh := frameHeader{
ContentSize: 0,
WindowSize: uint32(s.encoder.WindowSize(0)),
ContentSize: uint64(s.frameContentSize),
WindowSize: uint32(s.encoder.WindowSize(s.frameContentSize)),
SingleSegment: false,
Checksum: e.o.crc,
DictID: e.o.dict.ID(),
@@ -241,11 +260,52 @@ func (e *Encoder) nextBlock(final bool) error {
return s.err
}
// SYNC:
if e.o.concurrent == 1 {
src := s.filling
s.nInput += int64(len(s.filling))
if debugEncoder {
println("Adding sync block,", len(src), "bytes, final:", final)
}
enc := s.encoder
blk := enc.Block()
blk.reset(nil)
enc.Encode(blk, src)
blk.last = final
if final {
s.eofWritten = true
}
err := errIncompressible
// If we got the exact same number of literals as input,
// assume the literals cannot be compressed.
if len(src) != len(blk.literals) || len(src) != e.o.blockSize {
err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
}
switch err {
case errIncompressible:
if debugEncoder {
println("Storing incompressible block as raw")
}
blk.encodeRaw(src)
// In fast mode, we do not transfer offsets, so we don't have to deal with changing the.
case nil:
default:
s.err = err
return err
}
_, s.err = s.w.Write(blk.output)
s.nWritten += int64(len(blk.output))
s.filling = s.filling[:0]
return s.err
}
// Move blocks forward.
s.filling, s.current, s.previous = s.previous[:0], s.filling, s.current
s.nInput += int64(len(s.current))
s.wg.Add(1)
go func(src []byte) {
if debug {
if debugEncoder {
println("Adding block,", len(src), "bytes, final:", final)
}
defer func() {
@@ -290,7 +350,7 @@ func (e *Encoder) nextBlock(final bool) error {
}
switch err {
case errIncompressible:
if debug {
if debugEncoder {
println("Storing incompressible block as raw")
}
blk.encodeRaw(src)
@@ -313,7 +373,7 @@ func (e *Encoder) nextBlock(final bool) error {
//
// The Copy function uses ReaderFrom if available.
func (e *Encoder) ReadFrom(r io.Reader) (n int64, err error) {
if debug {
if debugEncoder {
println("Using ReadFrom")
}
@@ -336,20 +396,20 @@ func (e *Encoder) ReadFrom(r io.Reader) (n int64, err error) {
switch err {
case io.EOF:
e.state.filling = e.state.filling[:len(e.state.filling)-len(src)]
if debug {
if debugEncoder {
println("ReadFrom: got EOF final block:", len(e.state.filling))
}
return n, nil
case nil:
default:
if debug {
if debugEncoder {
println("ReadFrom: got error:", err)
}
e.state.err = err
return n, err
case nil:
}
if len(src) > 0 {
if debug {
if debugEncoder {
println("ReadFrom: got space left in source:", len(src))
}
continue
@@ -394,6 +454,11 @@ func (e *Encoder) Close() error {
if err != nil {
return err
}
if s.frameContentSize > 0 {
if s.nInput != s.frameContentSize {
return fmt.Errorf("frame content size %d given, but %d bytes was written", s.frameContentSize, s.nInput)
}
}
if e.state.fullFrameWritten {
return s.err
}
@@ -463,14 +528,14 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
// If a non-single block is needed the encoder will reset again.
e.encoders <- enc
}()
// Use single segments when above minimum window and below 1MB.
single := len(src) < 1<<20 && len(src) > MinWindowSize
// Use single segments when above minimum window and below window size.
single := len(src) <= e.o.windowSize && len(src) > MinWindowSize
if e.o.single != nil {
single = *e.o.single
}
fh := frameHeader{
ContentSize: uint64(len(src)),
WindowSize: uint32(enc.WindowSize(len(src))),
WindowSize: uint32(enc.WindowSize(int64(len(src)))),
SingleSegment: single,
Checksum: e.o.crc,
DictID: e.o.dict.ID(),
@@ -486,7 +551,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
}
// If we can do everything in one block, prefer that.
if len(src) <= maxCompressedBlockSize {
if len(src) <= e.o.blockSize {
enc.Reset(e.o.dict, true)
// Slightly faster with no history and everything in one block.
if e.o.crc {
@@ -512,7 +577,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
switch err {
case errIncompressible:
if debug {
if debugEncoder {
println("Storing incompressible block as raw")
}
dst = blk.encodeRawTo(dst, src)
@@ -548,7 +613,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
switch err {
case errIncompressible:
if debug {
if debugEncoder {
println("Storing incompressible block as raw")
}
dst = blk.encodeRawTo(dst, todo)

View File

@@ -24,6 +24,7 @@ type encoderOptions struct {
allLitEntropy bool
customWindow bool
customALEntropy bool
customBlockSize bool
lowMem bool
dict *dict
}
@@ -33,7 +34,7 @@ func (o *encoderOptions) setDefault() {
concurrent: runtime.GOMAXPROCS(0),
crc: true,
single: nil,
blockSize: 1 << 16,
blockSize: maxCompressedBlockSize,
windowSize: 8 << 20,
level: SpeedDefault,
allLitEntropy: true,
@@ -73,8 +74,9 @@ func WithEncoderCRC(b bool) EOption {
}
// WithEncoderConcurrency will set the concurrency,
// meaning the maximum number of decoders to run concurrently.
// meaning the maximum number of encoders to run concurrently.
// The value supplied must be at least 1.
// For streams, setting a value of 1 will disable async compression.
// By default this will be set to GOMAXPROCS.
func WithEncoderConcurrency(n int) EOption {
return func(o *encoderOptions) error {
@@ -106,6 +108,7 @@ func WithWindowSize(n int) EOption {
o.customWindow = true
if o.blockSize > o.windowSize {
o.blockSize = o.windowSize
o.customBlockSize = true
}
return nil
}
@@ -188,10 +191,9 @@ func EncoderLevelFromZstd(level int) EncoderLevel {
return SpeedDefault
case level >= 6 && level < 10:
return SpeedBetterCompression
case level >= 10:
return SpeedBetterCompression
default:
return SpeedBestCompression
}
return SpeedDefault
}
// String provides a string representation of the compression level.
@@ -222,6 +224,9 @@ func WithEncoderLevel(l EncoderLevel) EOption {
switch o.level {
case SpeedFastest:
o.windowSize = 4 << 20
if !o.customBlockSize {
o.blockSize = 1 << 16
}
case SpeedDefault:
o.windowSize = 8 << 20
case SpeedBetterCompression:
@@ -278,7 +283,7 @@ func WithNoEntropyCompression(b bool) EOption {
// a decoder is allowed to reject a compressed frame which requests a memory size beyond decoder's authorized range.
// For broader compatibility, decoders are recommended to support memory sizes of at least 8 MB.
// This is only a recommendation, each decoder is free to support higher or lower limits, depending on local limitations.
// If this is not specified, block encodes will automatically choose this based on the input size.
// If this is not specified, block encodes will automatically choose this based on the input size and the window size.
// This setting has no effect on streamed encodes.
func WithSingleSegment(b bool) EOption {
return func(o *encoderOptions) error {

View File

@@ -8,27 +8,17 @@ import (
"bytes"
"encoding/hex"
"errors"
"hash"
"io"
"sync"
"github.com/klauspost/compress/zstd/internal/xxhash"
)
type frameDec struct {
o decoderOptions
crc hash.Hash64
offset int64
o decoderOptions
crc *xxhash.Digest
WindowSize uint64
// maxWindowSize is the maximum windows size to support.
// should never be bigger than max-int.
maxWindowSize uint64
// In order queue of blocks being decoded.
decoding chan *blockDec
// Frame history passed between blocks
history history
@@ -38,20 +28,18 @@ type frameDec struct {
bBuf byteBuf
FrameContentSize uint64
frameDone sync.WaitGroup
DictionaryID *uint32
HasCheckSum bool
SingleSegment bool
// asyncRunning indicates whether the async routine processes input on 'decoding'.
asyncRunningMu sync.Mutex
asyncRunning bool
}
const (
// The minimum Window_Size is 1 KB.
// MinWindowSize is the minimum Window Size, which is 1 KB.
MinWindowSize = 1 << 10
// MaxWindowSize is the maximum encoder window size
// and the default decoder maximum window size.
MaxWindowSize = 1 << 29
)
@@ -61,12 +49,11 @@ var (
)
func newFrameDec(o decoderOptions) *frameDec {
d := frameDec{
o: o,
maxWindowSize: MaxWindowSize,
if o.maxWindowSize > o.maxDecodedSize {
o.maxWindowSize = o.maxDecodedSize
}
if d.maxWindowSize > o.maxDecodedSize {
d.maxWindowSize = o.maxDecodedSize
d := frameDec{
o: o,
}
return &d
}
@@ -78,50 +65,74 @@ func newFrameDec(o decoderOptions) *frameDec {
func (d *frameDec) reset(br byteBuffer) error {
d.HasCheckSum = false
d.WindowSize = 0
var b []byte
var signature [4]byte
for {
b = br.readSmall(4)
if b == nil {
var err error
// Check if we can read more...
b, err := br.readSmall(1)
switch err {
case io.EOF, io.ErrUnexpectedEOF:
return io.EOF
default:
return err
case nil:
signature[0] = b[0]
}
if !bytes.Equal(b[1:4], skippableFrameMagic) || b[0]&0xf0 != 0x50 {
if debug {
println("Not skippable", hex.EncodeToString(b), hex.EncodeToString(skippableFrameMagic))
// Read the rest, don't allow io.ErrUnexpectedEOF
b, err = br.readSmall(3)
switch err {
case io.EOF:
return io.EOF
default:
return err
case nil:
copy(signature[1:], b)
}
if !bytes.Equal(signature[1:4], skippableFrameMagic) || signature[0]&0xf0 != 0x50 {
if debugDecoder {
println("Not skippable", hex.EncodeToString(signature[:]), hex.EncodeToString(skippableFrameMagic))
}
// Break if not skippable frame.
break
}
// Read size to skip
b = br.readSmall(4)
if b == nil {
println("Reading Frame Size EOF")
return io.ErrUnexpectedEOF
b, err = br.readSmall(4)
if err != nil {
if debugDecoder {
println("Reading Frame Size", err)
}
return err
}
n := uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
println("Skipping frame with", n, "bytes.")
err := br.skipN(int(n))
err = br.skipN(int64(n))
if err != nil {
if debug {
if debugDecoder {
println("Reading discarded frame", err)
}
return err
}
}
if !bytes.Equal(b, frameMagic) {
println("Got magic numbers: ", b, "want:", frameMagic)
if !bytes.Equal(signature[:], frameMagic) {
if debugDecoder {
println("Got magic numbers: ", signature, "want:", frameMagic)
}
return ErrMagicMismatch
}
// Read Frame_Header_Descriptor
fhd, err := br.readByte()
if err != nil {
println("Reading Frame_Header_Descriptor", err)
if debugDecoder {
println("Reading Frame_Header_Descriptor", err)
}
return err
}
d.SingleSegment = fhd&(1<<5) != 0
if fhd&(1<<3) != 0 {
return errors.New("Reserved bit set on frame header")
return errors.New("reserved bit set on frame header")
}
// Read Window_Descriptor
@@ -130,7 +141,9 @@ func (d *frameDec) reset(br byteBuffer) error {
if !d.SingleSegment {
wd, err := br.readByte()
if err != nil {
println("Reading Window_Descriptor", err)
if debugDecoder {
println("Reading Window_Descriptor", err)
}
return err
}
printf("raw: %x, mantissa: %d, exponent: %d\n", wd, wd&7, wd>>3)
@@ -147,12 +160,11 @@ func (d *frameDec) reset(br byteBuffer) error {
if size == 3 {
size = 4
}
b = br.readSmall(int(size))
if b == nil {
if debug {
println("Reading Dictionary_ID", io.ErrUnexpectedEOF)
}
return io.ErrUnexpectedEOF
b, err := br.readSmall(int(size))
if err != nil {
println("Reading Dictionary_ID", err)
return err
}
var id uint32
switch size {
@@ -163,7 +175,7 @@ func (d *frameDec) reset(br byteBuffer) error {
case 4:
id = uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
}
if debug {
if debugDecoder {
println("Dict size", size, "ID:", id)
}
if id > 0 {
@@ -185,12 +197,12 @@ func (d *frameDec) reset(br byteBuffer) error {
default:
fcsSize = 1 << v
}
d.FrameContentSize = 0
d.FrameContentSize = fcsUnknown
if fcsSize > 0 {
b := br.readSmall(fcsSize)
if b == nil {
println("Reading Frame content", io.ErrUnexpectedEOF)
return io.ErrUnexpectedEOF
b, err := br.readSmall(fcsSize)
if err != nil {
println("Reading Frame content", err)
return err
}
switch fcsSize {
case 1:
@@ -205,10 +217,11 @@ func (d *frameDec) reset(br byteBuffer) error {
d2 := uint32(b[4]) | (uint32(b[5]) << 8) | (uint32(b[6]) << 16) | (uint32(b[7]) << 24)
d.FrameContentSize = uint64(d1) | (uint64(d2) << 32)
}
if debug {
println("field size bits:", v, "fcsSize:", fcsSize, "FrameContentSize:", d.FrameContentSize, hex.EncodeToString(b[:fcsSize]), "singleseg:", d.SingleSegment, "window:", d.WindowSize)
if debugDecoder {
println("Read FCS:", d.FrameContentSize)
}
}
// Move this to shared.
d.HasCheckSum = fhd&(1<<2) != 0
if d.HasCheckSum {
@@ -218,29 +231,47 @@ func (d *frameDec) reset(br byteBuffer) error {
d.crc.Reset()
}
if d.WindowSize > d.o.maxWindowSize {
if debugDecoder {
printf("window size %d > max %d\n", d.WindowSize, d.o.maxWindowSize)
}
return ErrWindowSizeExceeded
}
if d.WindowSize == 0 && d.SingleSegment {
// We may not need window in this case.
d.WindowSize = d.FrameContentSize
if d.WindowSize < MinWindowSize {
d.WindowSize = MinWindowSize
}
if d.WindowSize > d.o.maxDecodedSize {
if debugDecoder {
printf("window size %d > max %d\n", d.WindowSize, d.o.maxWindowSize)
}
return ErrDecoderSizeExceeded
}
}
if d.WindowSize > d.maxWindowSize {
printf("window size %d > max %d\n", d.WindowSize, d.maxWindowSize)
return ErrWindowSizeExceeded
}
// The minimum Window_Size is 1 KB.
if d.WindowSize < MinWindowSize {
println("got window size: ", d.WindowSize)
if debugDecoder {
println("got window size: ", d.WindowSize)
}
return ErrWindowSizeTooSmall
}
d.history.windowSize = int(d.WindowSize)
if d.o.lowMem && d.history.windowSize < maxBlockSize {
d.history.maxSize = d.history.windowSize * 2
if !d.o.lowMem || d.history.windowSize < maxBlockSize {
// Alloc 2x window size if not low-mem, or very small window size.
d.history.allocFrameBuffer = d.history.windowSize * 2
} else {
d.history.maxSize = d.history.windowSize + maxBlockSize
// Alloc with one additional block
d.history.allocFrameBuffer = d.history.windowSize + maxBlockSize
}
if debugDecoder {
println("Frame: Dict:", d.DictionaryID, "FrameContentSize:", d.FrameContentSize, "singleseg:", d.SingleSegment, "window:", d.WindowSize, "crc:", d.HasCheckSum)
}
// history contains input - maybe we do something
d.rawInput = br
return nil
@@ -248,56 +279,37 @@ func (d *frameDec) reset(br byteBuffer) error {
// next will start decoding the next block from stream.
func (d *frameDec) next(block *blockDec) error {
if debug {
printf("decoding new block %p:%p", block, block.data)
if debugDecoder {
println("decoding new block")
}
err := block.reset(d.rawInput, d.WindowSize)
if err != nil {
println("block error:", err)
// Signal the frame decoder we have a problem.
d.sendErr(block, err)
block.sendErr(err)
return err
}
block.input <- struct{}{}
if debug {
println("next block:", block)
}
d.asyncRunningMu.Lock()
defer d.asyncRunningMu.Unlock()
if !d.asyncRunning {
return nil
}
if block.Last {
// We indicate the frame is done by sending io.EOF
d.decoding <- block
return io.EOF
}
d.decoding <- block
return nil
}
// sendEOF will queue an error block on the frame.
// This will cause the frame decoder to return when it encounters the block.
// Returns true if the decoder was added.
func (d *frameDec) sendErr(block *blockDec, err error) bool {
d.asyncRunningMu.Lock()
defer d.asyncRunningMu.Unlock()
if !d.asyncRunning {
return false
}
println("sending error", err.Error())
block.sendErr(err)
d.decoding <- block
return true
}
// checkCRC will check the checksum if the frame has one.
// Will return ErrCRCMismatch if crc check failed, otherwise nil.
func (d *frameDec) checkCRC() error {
if !d.HasCheckSum {
return nil
}
// We can overwrite upper tmp now
want, err := d.rawInput.readSmall(4)
if err != nil {
println("CRC missing?", err)
return err
}
if d.o.ignoreChecksum {
return nil
}
var tmp [4]byte
got := d.crc.Sum64()
// Flip to match file order.
@@ -306,142 +318,29 @@ func (d *frameDec) checkCRC() error {
tmp[2] = byte(got >> 16)
tmp[3] = byte(got >> 24)
// We can overwrite upper tmp now
want := d.rawInput.readSmall(4)
if want == nil {
println("CRC missing?")
return io.ErrUnexpectedEOF
}
if !bytes.Equal(tmp[:], want) {
if debug {
if debugDecoder {
println("CRC Check Failed:", tmp[:], "!=", want)
}
return ErrCRCMismatch
}
if debug {
if debugDecoder {
println("CRC ok", tmp[:])
}
return nil
}
func (d *frameDec) initAsync() {
if !d.o.lowMem && !d.SingleSegment {
// set max extra size history to 10MB.
d.history.maxSize = d.history.windowSize + maxBlockSize*5
// consumeCRC reads the checksum data if the frame has one.
func (d *frameDec) consumeCRC() error {
if d.HasCheckSum {
_, err := d.rawInput.readSmall(4)
if err != nil {
println("CRC missing?", err)
return err
}
}
// re-alloc if more than one extra block size.
if d.o.lowMem && cap(d.history.b) > d.history.maxSize+maxBlockSize {
d.history.b = make([]byte, 0, d.history.maxSize)
}
if cap(d.history.b) < d.history.maxSize {
d.history.b = make([]byte, 0, d.history.maxSize)
}
if cap(d.decoding) < d.o.concurrent {
d.decoding = make(chan *blockDec, d.o.concurrent)
}
if debug {
h := d.history
printf("history init. len: %d, cap: %d", len(h.b), cap(h.b))
}
d.asyncRunningMu.Lock()
d.asyncRunning = true
d.asyncRunningMu.Unlock()
}
// startDecoder will start decoding blocks and write them to the writer.
// The decoder will stop as soon as an error occurs or at end of frame.
// When the frame has finished decoding the *bufio.Reader
// containing the remaining input will be sent on frameDec.frameDone.
func (d *frameDec) startDecoder(output chan decodeOutput) {
written := int64(0)
defer func() {
d.asyncRunningMu.Lock()
d.asyncRunning = false
d.asyncRunningMu.Unlock()
// Drain the currently decoding.
d.history.error = true
flushdone:
for {
select {
case b := <-d.decoding:
b.history <- &d.history
output <- <-b.result
default:
break flushdone
}
}
println("frame decoder done, signalling done")
d.frameDone.Done()
}()
// Get decoder for first block.
block := <-d.decoding
block.history <- &d.history
for {
var next *blockDec
// Get result
r := <-block.result
if r.err != nil {
println("Result contained error", r.err)
output <- r
return
}
if debug {
println("got result, from ", d.offset, "to", d.offset+int64(len(r.b)))
d.offset += int64(len(r.b))
}
if !block.Last {
// Send history to next block
select {
case next = <-d.decoding:
if debug {
println("Sending ", len(d.history.b), "bytes as history")
}
next.history <- &d.history
default:
// Wait until we have sent the block, so
// other decoders can potentially get the decoder.
next = nil
}
}
// Add checksum, async to decoding.
if d.HasCheckSum {
n, err := d.crc.Write(r.b)
if err != nil {
r.err = err
if n != len(r.b) {
r.err = io.ErrShortWrite
}
output <- r
return
}
}
written += int64(len(r.b))
if d.SingleSegment && uint64(written) > d.FrameContentSize {
println("runDecoder: single segment and", uint64(written), ">", d.FrameContentSize)
r.err = ErrFrameSizeExceeded
output <- r
return
}
if block.Last {
r.err = d.checkCRC()
output <- r
return
}
output <- r
if next == nil {
// There was no decoder available, we wait for one now that we have sent to the writer.
if debug {
println("Sending ", len(d.history.b), " bytes as history")
}
next = <-d.decoding
next.history <- &d.history
}
block = next
}
return nil
}
// runDecoder will create a sync decoder that will decode a block of data.
@@ -450,41 +349,67 @@ func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
// We use the history for output to avoid copying it.
d.history.b = dst
d.history.ignoreBuffer = len(dst)
// Store input length, so we only check new data.
crcStart := len(dst)
d.history.decoders.maxSyncLen = 0
if d.FrameContentSize != fcsUnknown {
d.history.decoders.maxSyncLen = d.FrameContentSize + uint64(len(dst))
if d.history.decoders.maxSyncLen > d.o.maxDecodedSize {
return dst, ErrDecoderSizeExceeded
}
if uint64(cap(dst)) < d.history.decoders.maxSyncLen {
// Alloc for output
dst2 := make([]byte, len(dst), d.history.decoders.maxSyncLen+compressedBlockOverAlloc)
copy(dst2, dst)
dst = dst2
}
}
var err error
for {
err = dec.reset(d.rawInput, d.WindowSize)
if err != nil {
break
}
if debug {
if debugDecoder {
println("next block:", dec)
}
err = dec.decodeBuf(&d.history)
if err != nil || dec.Last {
if err != nil {
break
}
if uint64(len(d.history.b)) > d.o.maxDecodedSize {
err = ErrDecoderSizeExceeded
break
}
if d.SingleSegment && uint64(len(d.history.b)) > d.o.maxDecodedSize {
println("runDecoder: single segment and", uint64(len(d.history.b)), ">", d.o.maxDecodedSize)
if uint64(len(d.history.b)-crcStart) > d.FrameContentSize {
println("runDecoder: FrameContentSize exceeded", uint64(len(d.history.b)-crcStart), ">", d.FrameContentSize)
err = ErrFrameSizeExceeded
break
}
if dec.Last {
break
}
if debugDecoder {
println("runDecoder: FrameContentSize", uint64(len(d.history.b)-crcStart), "<=", d.FrameContentSize)
}
}
dst = d.history.b
if err == nil {
if d.HasCheckSum {
var n int
n, err = d.crc.Write(dst[crcStart:])
if err == nil {
if n != len(dst)-crcStart {
err = io.ErrShortWrite
} else {
err = d.checkCRC()
if d.FrameContentSize != fcsUnknown && uint64(len(d.history.b)-crcStart) != d.FrameContentSize {
err = ErrFrameSizeMismatch
} else if d.HasCheckSum {
if d.o.ignoreChecksum {
err = d.consumeCRC()
} else {
var n int
n, err = d.crc.Write(dst[crcStart:])
if err == nil {
if n != len(dst)-crcStart {
err = io.ErrShortWrite
} else {
err = d.checkCRC()
}
}
}
}

View File

@@ -5,8 +5,10 @@
package zstd
import (
"encoding/binary"
"errors"
"fmt"
"io"
)
const (
@@ -178,10 +180,32 @@ func (s *fseDecoder) readNCount(b *byteReader, maxSymbol uint16) error {
return fmt.Errorf("corruption detected (total %d != %d)", gotTotal, 1<<s.actualTableLog)
}
b.advance((bitCount + 7) >> 3)
// println(s.norm[:s.symbolLen], s.symbolLen)
return s.buildDtable()
}
func (s *fseDecoder) mustReadFrom(r io.Reader) {
fatalErr := func(err error) {
if err != nil {
panic(err)
}
}
// dt [maxTablesize]decSymbol // Decompression table.
// symbolLen uint16 // Length of active part of the symbol table.
// actualTableLog uint8 // Selected tablelog.
// maxBits uint8 // Maximum number of additional bits
// // used for table creation to avoid allocations.
// stateTable [256]uint16
// norm [maxSymbolValue + 1]int16
// preDefined bool
fatalErr(binary.Read(r, binary.LittleEndian, &s.dt))
fatalErr(binary.Read(r, binary.LittleEndian, &s.symbolLen))
fatalErr(binary.Read(r, binary.LittleEndian, &s.actualTableLog))
fatalErr(binary.Read(r, binary.LittleEndian, &s.maxBits))
fatalErr(binary.Read(r, binary.LittleEndian, &s.stateTable))
fatalErr(binary.Read(r, binary.LittleEndian, &s.norm))
fatalErr(binary.Read(r, binary.LittleEndian, &s.preDefined))
}
// decSymbol contains information about a state entry,
// Including the state offset base, the output symbol and
// the number of bits to read for the low part of the destination state.
@@ -204,18 +228,10 @@ func (d decSymbol) newState() uint16 {
return uint16(d >> 16)
}
func (d decSymbol) baseline() uint32 {
return uint32(d >> 32)
}
func (d decSymbol) baselineInt() int {
return int(d >> 32)
}
func (d *decSymbol) set(nbits, addBits uint8, newState uint16, baseline uint32) {
*d = decSymbol(nbits) | (decSymbol(addBits) << 8) | (decSymbol(newState) << 16) | (decSymbol(baseline) << 32)
}
func (d *decSymbol) setNBits(nBits uint8) {
const mask = 0xffffffffffffff00
*d = (*d & mask) | decSymbol(nBits)
@@ -231,11 +247,6 @@ func (d *decSymbol) setNewState(state uint16) {
*d = (*d & mask) | decSymbol(state)<<16
}
func (d *decSymbol) setBaseline(baseline uint32) {
const mask = 0xffffffff
*d = (*d & mask) | decSymbol(baseline)<<32
}
func (d *decSymbol) setExt(addBits uint8, baseline uint32) {
const mask = 0xffff00ff
*d = (*d & mask) | (decSymbol(addBits) << 8) | (decSymbol(baseline) << 32)
@@ -257,68 +268,6 @@ func (s *fseDecoder) setRLE(symbol decSymbol) {
s.dt[0] = symbol
}
// buildDtable will build the decoding table.
func (s *fseDecoder) buildDtable() error {
tableSize := uint32(1 << s.actualTableLog)
highThreshold := tableSize - 1
symbolNext := s.stateTable[:256]
// Init, lay down lowprob symbols
{
for i, v := range s.norm[:s.symbolLen] {
if v == -1 {
s.dt[highThreshold].setAddBits(uint8(i))
highThreshold--
symbolNext[i] = 1
} else {
symbolNext[i] = uint16(v)
}
}
}
// Spread symbols
{
tableMask := tableSize - 1
step := tableStep(tableSize)
position := uint32(0)
for ss, v := range s.norm[:s.symbolLen] {
for i := 0; i < int(v); i++ {
s.dt[position].setAddBits(uint8(ss))
position = (position + step) & tableMask
for position > highThreshold {
// lowprob area
position = (position + step) & tableMask
}
}
}
if position != 0 {
// position must reach all cells once, otherwise normalizedCounter is incorrect
return errors.New("corrupted input (position != 0)")
}
}
// Build Decoding table
{
tableSize := uint16(1 << s.actualTableLog)
for u, v := range s.dt[:tableSize] {
symbol := v.addBits()
nextState := symbolNext[symbol]
symbolNext[symbol] = nextState + 1
nBits := s.actualTableLog - byte(highBits(uint32(nextState)))
s.dt[u&maxTableMask].setNBits(nBits)
newState := (nextState << nBits) - tableSize
if newState > tableSize {
return fmt.Errorf("newState (%d) outside table size (%d)", newState, tableSize)
}
if newState == uint16(u) && nBits == 0 {
// Seems weird that this is possible with nbits > 0.
return fmt.Errorf("newState (%d) == oldState (%d) and no bits", newState, u)
}
s.dt[u&maxTableMask].setNewState(newState)
}
}
return nil
}
// transform will transform the decoder table into a table usable for
// decoding without having to apply the transformation while decoding.
// The state will contain the base value and the number of bits to read.
@@ -352,34 +301,7 @@ func (s *fseState) init(br *bitReader, tableLog uint8, dt []decSymbol) {
s.state = dt[br.getBits(tableLog)]
}
// next returns the current symbol and sets the next state.
// At least tablelog bits must be available in the bit reader.
func (s *fseState) next(br *bitReader) {
lowBits := uint16(br.getBits(s.state.nbBits()))
s.state = s.dt[s.state.newState()+lowBits]
}
// finished returns true if all bits have been read from the bitstream
// and the next state would require reading bits from the input.
func (s *fseState) finished(br *bitReader) bool {
return br.finished() && s.state.nbBits() > 0
}
// final returns the current state symbol without decoding the next.
func (s *fseState) final() (int, uint8) {
return s.state.baselineInt(), s.state.addBits()
}
// final returns the current state symbol without decoding the next.
func (s decSymbol) final() (int, uint8) {
return s.baselineInt(), s.addBits()
}
// nextFast returns the next symbol and sets the next state.
// This can only be used if no symbols are 0 bits.
// At least tablelog bits must be available in the bit reader.
func (s *fseState) nextFast(br *bitReader) (uint32, uint8) {
lowBits := uint16(br.getBitsFast(s.state.nbBits()))
s.state = s.dt[s.state.newState()+lowBits]
return s.state.baseline(), s.state.addBits()
}

View File

@@ -0,0 +1,64 @@
//go:build amd64 && !appengine && !noasm && gc
// +build amd64,!appengine,!noasm,gc
package zstd
import (
"fmt"
)
type buildDtableAsmContext struct {
// inputs
stateTable *uint16
norm *int16
dt *uint64
// outputs --- set by the procedure in the case of error;
// for interpretation please see the error handling part below
errParam1 uint64
errParam2 uint64
}
// buildDtable_asm is an x86 assembly implementation of fseDecoder.buildDtable.
// Function returns non-zero exit code on error.
// go:noescape
func buildDtable_asm(s *fseDecoder, ctx *buildDtableAsmContext) int
// please keep in sync with _generate/gen_fse.go
const (
errorCorruptedNormalizedCounter = 1
errorNewStateTooBig = 2
errorNewStateNoBits = 3
)
// buildDtable will build the decoding table.
func (s *fseDecoder) buildDtable() error {
ctx := buildDtableAsmContext{
stateTable: &s.stateTable[0],
norm: &s.norm[0],
dt: (*uint64)(&s.dt[0]),
}
code := buildDtable_asm(s, &ctx)
if code != 0 {
switch code {
case errorCorruptedNormalizedCounter:
position := ctx.errParam1
return fmt.Errorf("corrupted input (position=%d, expected 0)", position)
case errorNewStateTooBig:
newState := decSymbol(ctx.errParam1)
size := ctx.errParam2
return fmt.Errorf("newState (%d) outside table size (%d)", newState, size)
case errorNewStateNoBits:
newState := decSymbol(ctx.errParam1)
oldState := decSymbol(ctx.errParam2)
return fmt.Errorf("newState (%d) == oldState (%d) and no bits", newState, oldState)
default:
return fmt.Errorf("buildDtable_asm returned unhandled nonzero code = %d", code)
}
}
return nil
}

View File

@@ -0,0 +1,127 @@
// Code generated by command: go run gen_fse.go -out ../fse_decoder_amd64.s -pkg=zstd. DO NOT EDIT.
//go:build !appengine && !noasm && gc && !noasm
// +build !appengine,!noasm,gc,!noasm
// func buildDtable_asm(s *fseDecoder, ctx *buildDtableAsmContext) int
TEXT ·buildDtable_asm(SB), $0-24
MOVQ ctx+8(FP), CX
MOVQ s+0(FP), DI
// Load values
MOVBQZX 4098(DI), DX
XORQ AX, AX
BTSQ DX, AX
MOVQ (CX), BX
MOVQ 16(CX), SI
LEAQ -1(AX), R8
MOVQ 8(CX), CX
MOVWQZX 4096(DI), DI
// End load values
// Init, lay down lowprob symbols
XORQ R9, R9
JMP init_main_loop_condition
init_main_loop:
MOVWQSX (CX)(R9*2), R10
CMPW R10, $-1
JNE do_not_update_high_threshold
MOVB R9, 1(SI)(R8*8)
DECQ R8
MOVQ $0x0000000000000001, R10
do_not_update_high_threshold:
MOVW R10, (BX)(R9*2)
INCQ R9
init_main_loop_condition:
CMPQ R9, DI
JL init_main_loop
// Spread symbols
// Calculate table step
MOVQ AX, R9
SHRQ $0x01, R9
MOVQ AX, R10
SHRQ $0x03, R10
LEAQ 3(R9)(R10*1), R9
// Fill add bits values
LEAQ -1(AX), R10
XORQ R11, R11
XORQ R12, R12
JMP spread_main_loop_condition
spread_main_loop:
XORQ R13, R13
MOVWQSX (CX)(R12*2), R14
JMP spread_inner_loop_condition
spread_inner_loop:
MOVB R12, 1(SI)(R11*8)
adjust_position:
ADDQ R9, R11
ANDQ R10, R11
CMPQ R11, R8
JG adjust_position
INCQ R13
spread_inner_loop_condition:
CMPQ R13, R14
JL spread_inner_loop
INCQ R12
spread_main_loop_condition:
CMPQ R12, DI
JL spread_main_loop
TESTQ R11, R11
JZ spread_check_ok
MOVQ ctx+8(FP), AX
MOVQ R11, 24(AX)
MOVQ $+1, ret+16(FP)
RET
spread_check_ok:
// Build Decoding table
XORQ DI, DI
build_table_main_table:
MOVBQZX 1(SI)(DI*8), CX
MOVWQZX (BX)(CX*2), R8
LEAQ 1(R8), R9
MOVW R9, (BX)(CX*2)
MOVQ R8, R9
BSRQ R9, R9
MOVQ DX, CX
SUBQ R9, CX
SHLQ CL, R8
SUBQ AX, R8
MOVB CL, (SI)(DI*8)
MOVW R8, 2(SI)(DI*8)
CMPQ R8, AX
JLE build_table_check1_ok
MOVQ ctx+8(FP), CX
MOVQ R8, 24(CX)
MOVQ AX, 32(CX)
MOVQ $+2, ret+16(FP)
RET
build_table_check1_ok:
TESTB CL, CL
JNZ build_table_check2_ok
CMPW R8, DI
JNE build_table_check2_ok
MOVQ ctx+8(FP), AX
MOVQ R8, 24(AX)
MOVQ DI, 32(AX)
MOVQ $+3, ret+16(FP)
RET
build_table_check2_ok:
INCQ DI
CMPQ DI, AX
JL build_table_main_table
MOVQ $+0, ret+16(FP)
RET

View File

@@ -0,0 +1,72 @@
//go:build !amd64 || appengine || !gc || noasm
// +build !amd64 appengine !gc noasm
package zstd
import (
"errors"
"fmt"
)
// buildDtable will build the decoding table.
func (s *fseDecoder) buildDtable() error {
tableSize := uint32(1 << s.actualTableLog)
highThreshold := tableSize - 1
symbolNext := s.stateTable[:256]
// Init, lay down lowprob symbols
{
for i, v := range s.norm[:s.symbolLen] {
if v == -1 {
s.dt[highThreshold].setAddBits(uint8(i))
highThreshold--
symbolNext[i] = 1
} else {
symbolNext[i] = uint16(v)
}
}
}
// Spread symbols
{
tableMask := tableSize - 1
step := tableStep(tableSize)
position := uint32(0)
for ss, v := range s.norm[:s.symbolLen] {
for i := 0; i < int(v); i++ {
s.dt[position].setAddBits(uint8(ss))
position = (position + step) & tableMask
for position > highThreshold {
// lowprob area
position = (position + step) & tableMask
}
}
}
if position != 0 {
// position must reach all cells once, otherwise normalizedCounter is incorrect
return errors.New("corrupted input (position != 0)")
}
}
// Build Decoding table
{
tableSize := uint16(1 << s.actualTableLog)
for u, v := range s.dt[:tableSize] {
symbol := v.addBits()
nextState := symbolNext[symbol]
symbolNext[symbol] = nextState + 1
nBits := s.actualTableLog - byte(highBits(uint32(nextState)))
s.dt[u&maxTableMask].setNBits(nBits)
newState := (nextState << nBits) - tableSize
if newState > tableSize {
return fmt.Errorf("newState (%d) outside table size (%d)", newState, tableSize)
}
if newState == uint16(u) && nBits == 0 {
// Seems weird that this is possible with nbits > 0.
return fmt.Errorf("newState (%d) == oldState (%d) and no bits", newState, u)
}
s.dt[u&maxTableMask].setNewState(newState)
}
}
return nil
}

View File

@@ -62,9 +62,8 @@ func (s symbolTransform) String() string {
// To indicate that you have populated the histogram call HistogramFinished
// with the value of the highest populated symbol, as well as the number of entries
// in the most populated entry. These are accepted at face value.
// The returned slice will always be length 256.
func (s *fseEncoder) Histogram() []uint32 {
return s.count[:]
func (s *fseEncoder) Histogram() *[256]uint32 {
return &s.count
}
// HistogramFinished can be called to indicate that the histogram has been populated.
@@ -77,21 +76,6 @@ func (s *fseEncoder) HistogramFinished(maxSymbol uint8, maxCount int) {
s.clearCount = maxCount != 0
}
// prepare will prepare and allocate scratch tables used for both compression and decompression.
func (s *fseEncoder) prepare() (*fseEncoder, error) {
if s == nil {
s = &fseEncoder{}
}
s.useRLE = false
if s.clearCount && s.maxCount == 0 {
for i := range s.count {
s.count[i] = 0
}
s.clearCount = false
}
return s, nil
}
// allocCtable will allocate tables needed for compression.
// If existing tables a re big enough, they are simply re-used.
func (s *fseEncoder) allocCtable() {
@@ -229,7 +213,7 @@ func (s *fseEncoder) setRLE(val byte) {
deltaFindState: 0,
deltaNbBits: 0,
}
if debug {
if debugEncoder {
println("setRLE: val", val, "symbolTT", s.ct.symbolTT[val])
}
s.rleVal = val
@@ -708,15 +692,6 @@ func (c *cState) init(bw *bitWriter, ct *cTable, first symbolTransform) {
im := int32((nbBitsOut << 16) - first.deltaNbBits)
lu := (im >> nbBitsOut) + int32(first.deltaFindState)
c.state = c.stateTable[lu]
return
}
// encode the output symbol provided and write it to the bitstream.
func (c *cState) encode(symbolTT symbolTransform) {
nbBitsOut := (uint32(c.state) + symbolTT.deltaNbBits) >> 16
dstState := int32(c.state>>(nbBitsOut&15)) + int32(symbolTT.deltaFindState)
c.bw.addBits16NC(c.state, uint8(nbBitsOut))
c.state = c.stateTable[dstState]
}
// flush will write the tablelog to the output and flush the remaining full bytes.

View File

@@ -59,7 +59,7 @@ func fillBase(dst []baseOffset, base uint32, bits ...uint8) {
}
for i, bit := range bits {
if base > math.MaxInt32 {
panic(fmt.Sprintf("invalid decoding table, base overflows int32"))
panic("invalid decoding table, base overflows int32")
}
dst[i] = baseOffset{

View File

@@ -13,65 +13,23 @@ const (
prime8bytes = 0xcf1bbcdcb7a56463
)
// hashLen returns a hash of the lowest l bytes of u for a size size of h bytes.
// l must be >=4 and <=8. Any other value will return hash for 4 bytes.
// h should always be <32.
// Preferably h and l should be a constant.
// FIXME: This does NOT get resolved, if 'mls' is constant,
// so this cannot be used.
func hashLen(u uint64, hashLog, mls uint8) uint32 {
// hashLen returns a hash of the lowest mls bytes of with length output bits.
// mls must be >=3 and <=8. Any other value will return hash for 4 bytes.
// length should always be < 32.
// Preferably length and mls should be a constant for inlining.
func hashLen(u uint64, length, mls uint8) uint32 {
switch mls {
case 3:
return (uint32(u<<8) * prime3bytes) >> (32 - length)
case 5:
return hash5(u, hashLog)
return uint32(((u << (64 - 40)) * prime5bytes) >> (64 - length))
case 6:
return hash6(u, hashLog)
return uint32(((u << (64 - 48)) * prime6bytes) >> (64 - length))
case 7:
return hash7(u, hashLog)
return uint32(((u << (64 - 56)) * prime7bytes) >> (64 - length))
case 8:
return hash8(u, hashLog)
return uint32((u * prime8bytes) >> (64 - length))
default:
return hash4x64(u, hashLog)
return (uint32(u) * prime4bytes) >> (32 - length)
}
}
// hash3 returns the hash of the lower 3 bytes of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <32.
func hash3(u uint32, h uint8) uint32 {
return ((u << (32 - 24)) * prime3bytes) >> ((32 - h) & 31)
}
// hash4 returns the hash of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <32.
func hash4(u uint32, h uint8) uint32 {
return (u * prime4bytes) >> ((32 - h) & 31)
}
// hash4x64 returns the hash of the lowest 4 bytes of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <32.
func hash4x64(u uint64, h uint8) uint32 {
return (uint32(u) * prime4bytes) >> ((32 - h) & 31)
}
// hash5 returns the hash of the lowest 5 bytes of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <64.
func hash5(u uint64, h uint8) uint32 {
return uint32(((u << (64 - 40)) * prime5bytes) >> ((64 - h) & 63))
}
// hash6 returns the hash of the lowest 6 bytes of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <64.
func hash6(u uint64, h uint8) uint32 {
return uint32(((u << (64 - 48)) * prime6bytes) >> ((64 - h) & 63))
}
// hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <64.
func hash7(u uint64, h uint8) uint32 {
return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & 63))
}
// hash8 returns the hash of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <64.
func hash8(u uint64, h uint8) uint32 {
return uint32((u * prime8bytes) >> ((64 - h) & 63))
}

View File

@@ -10,20 +10,31 @@ import (
// history contains the information transferred between blocks.
type history struct {
b []byte
huffTree *huff0.Scratch
recentOffsets [3]int
// Literal decompression
huffTree *huff0.Scratch
// Sequence decompression
decoders sequenceDecs
windowSize int
maxSize int
error bool
dict *dict
recentOffsets [3]int
// History buffer...
b []byte
// ignoreBuffer is meant to ignore a number of bytes
// when checking for matches in history
ignoreBuffer int
windowSize int
allocFrameBuffer int // needed?
error bool
dict *dict
}
// reset will reset the history to initial state of a frame.
// The history must already have been initialized to the desired size.
func (h *history) reset() {
h.b = h.b[:0]
h.ignoreBuffer = 0
h.error = false
h.recentOffsets = [3]int{1, 4, 8}
if f := h.decoders.litLengths.fse; f != nil && !f.preDefined {
@@ -35,7 +46,7 @@ func (h *history) reset() {
if f := h.decoders.matchLengths.fse; f != nil && !f.preDefined {
fseDecoderPool.Put(f)
}
h.decoders = sequenceDecs{}
h.decoders = sequenceDecs{br: h.decoders.br}
if h.huffTree != nil {
if h.dict == nil || h.dict.litEnc != h.huffTree {
huffDecoderPool.Put(h.huffTree)
@@ -54,6 +65,7 @@ func (h *history) setDict(dict *dict) {
h.decoders.litLengths = dict.llDec
h.decoders.offsets = dict.ofDec
h.decoders.matchLengths = dict.mlDec
h.decoders.dict = dict.content
h.recentOffsets = dict.offsets
h.huffTree = dict.litEnc
}
@@ -83,6 +95,24 @@ func (h *history) append(b []byte) {
copy(h.b[h.windowSize-len(b):], b)
}
// ensureBlock will ensure there is space for at least one block...
func (h *history) ensureBlock() {
if cap(h.b) < h.allocFrameBuffer {
h.b = make([]byte, 0, h.allocFrameBuffer)
return
}
avail := cap(h.b) - len(h.b)
if avail >= h.windowSize || avail > maxCompressedBlockSize {
return
}
// Move data down so we only have window size left.
// We know we have less than window size in b at this point.
discard := len(h.b) - h.windowSize
copy(h.b, h.b[discard:])
h.b = h.b[:h.windowSize]
}
// append bytes to history without ever discarding anything.
func (h *history) appendKeep(b []byte) {
h.b = append(h.b, b...)

View File

@@ -195,7 +195,6 @@ func (d *Digest) UnmarshalBinary(b []byte) error {
b, d.v4 = consumeUint64(b)
b, d.total = consumeUint64(b)
copy(d.mem[:], b)
b = b[len(d.mem):]
d.n = int(d.total % uint64(len(d.mem)))
return nil
}

View File

@@ -1,12 +1,13 @@
// +build !appengine
// +build gc
// +build !purego
// +build !noasm
#include "textflag.h"
// Register allocation:
// AX h
// CX pointer to advance through b
// SI pointer to advance through b
// DX n
// BX loop end
// R8 v1, k1
@@ -16,39 +17,39 @@
// R12 tmp
// R13 prime1v
// R14 prime2v
// R15 prime4v
// DI prime4v
// round reads from and advances the buffer pointer in CX.
// round reads from and advances the buffer pointer in SI.
// It assumes that R13 has prime1v and R14 has prime2v.
#define round(r) \
MOVQ (CX), R12 \
ADDQ $8, CX \
MOVQ (SI), R12 \
ADDQ $8, SI \
IMULQ R14, R12 \
ADDQ R12, r \
ROLQ $31, r \
IMULQ R13, r
// mergeRound applies a merge round on the two registers acc and val.
// It assumes that R13 has prime1v, R14 has prime2v, and R15 has prime4v.
// It assumes that R13 has prime1v, R14 has prime2v, and DI has prime4v.
#define mergeRound(acc, val) \
IMULQ R14, val \
ROLQ $31, val \
IMULQ R13, val \
XORQ val, acc \
IMULQ R13, acc \
ADDQ R15, acc
ADDQ DI, acc
// func Sum64(b []byte) uint64
TEXT ·Sum64(SB), NOSPLIT, $0-32
// Load fixed primes.
MOVQ ·prime1v(SB), R13
MOVQ ·prime2v(SB), R14
MOVQ ·prime4v(SB), R15
MOVQ ·prime4v(SB), DI
// Load slice.
MOVQ b_base+0(FP), CX
MOVQ b_base+0(FP), SI
MOVQ b_len+8(FP), DX
LEAQ (CX)(DX*1), BX
LEAQ (SI)(DX*1), BX
// The first loop limit will be len(b)-32.
SUBQ $32, BX
@@ -65,14 +66,14 @@ TEXT ·Sum64(SB), NOSPLIT, $0-32
XORQ R11, R11
SUBQ R13, R11
// Loop until CX > BX.
// Loop until SI > BX.
blockLoop:
round(R8)
round(R9)
round(R10)
round(R11)
CMPQ CX, BX
CMPQ SI, BX
JLE blockLoop
MOVQ R8, AX
@@ -100,16 +101,16 @@ noBlocks:
afterBlocks:
ADDQ DX, AX
// Right now BX has len(b)-32, and we want to loop until CX > len(b)-8.
// Right now BX has len(b)-32, and we want to loop until SI > len(b)-8.
ADDQ $24, BX
CMPQ CX, BX
CMPQ SI, BX
JG fourByte
wordLoop:
// Calculate k1.
MOVQ (CX), R8
ADDQ $8, CX
MOVQ (SI), R8
ADDQ $8, SI
IMULQ R14, R8
ROLQ $31, R8
IMULQ R13, R8
@@ -117,18 +118,18 @@ wordLoop:
XORQ R8, AX
ROLQ $27, AX
IMULQ R13, AX
ADDQ R15, AX
ADDQ DI, AX
CMPQ CX, BX
CMPQ SI, BX
JLE wordLoop
fourByte:
ADDQ $4, BX
CMPQ CX, BX
CMPQ SI, BX
JG singles
MOVL (CX), R8
ADDQ $4, CX
MOVL (SI), R8
ADDQ $4, SI
IMULQ R13, R8
XORQ R8, AX
@@ -138,19 +139,19 @@ fourByte:
singles:
ADDQ $4, BX
CMPQ CX, BX
CMPQ SI, BX
JGE finalize
singlesLoop:
MOVBQZX (CX), R12
ADDQ $1, CX
MOVBQZX (SI), R12
ADDQ $1, SI
IMULQ ·prime5v(SB), R12
XORQ R12, AX
ROLQ $11, AX
IMULQ R13, AX
CMPQ CX, BX
CMPQ SI, BX
JL singlesLoop
finalize:
@@ -179,13 +180,13 @@ TEXT ·writeBlocks(SB), NOSPLIT, $0-40
MOVQ ·prime2v(SB), R14
// Load slice.
MOVQ arg1_base+8(FP), CX
MOVQ arg1_len+16(FP), DX
LEAQ (CX)(DX*1), BX
MOVQ b_base+8(FP), SI
MOVQ b_len+16(FP), DX
LEAQ (SI)(DX*1), BX
SUBQ $32, BX
// Load vN from d.
MOVQ arg+0(FP), AX
MOVQ d+0(FP), AX
MOVQ 0(AX), R8 // v1
MOVQ 8(AX), R9 // v2
MOVQ 16(AX), R10 // v3
@@ -199,7 +200,7 @@ blockLoop:
round(R10)
round(R11)
CMPQ CX, BX
CMPQ SI, BX
JLE blockLoop
// Copy vN back to d.
@@ -208,8 +209,8 @@ blockLoop:
MOVQ R10, 16(AX)
MOVQ R11, 24(AX)
// The number of bytes written is CX minus the old base pointer.
SUBQ arg1_base+8(FP), CX
MOVQ CX, ret+32(FP)
// The number of bytes written is SI minus the old base pointer.
SUBQ b_base+8(FP), SI
MOVQ SI, ret+32(FP)
RET

View File

@@ -0,0 +1,186 @@
// +build gc,!purego,!noasm
#include "textflag.h"
// Register allocation.
#define digest R1
#define h R2 // Return value.
#define p R3 // Input pointer.
#define len R4
#define nblocks R5 // len / 32.
#define prime1 R7
#define prime2 R8
#define prime3 R9
#define prime4 R10
#define prime5 R11
#define v1 R12
#define v2 R13
#define v3 R14
#define v4 R15
#define x1 R20
#define x2 R21
#define x3 R22
#define x4 R23
#define round(acc, x) \
MADD prime2, acc, x, acc \
ROR $64-31, acc \
MUL prime1, acc \
// x = round(0, x).
#define round0(x) \
MUL prime2, x \
ROR $64-31, x \
MUL prime1, x \
#define mergeRound(x) \
round0(x) \
EOR x, h \
MADD h, prime4, prime1, h \
// Update v[1-4] with 32-byte blocks. Assumes len >= 32.
#define blocksLoop() \
LSR $5, len, nblocks \
PCALIGN $16 \
loop: \
LDP.P 32(p), (x1, x2) \
round(v1, x1) \
LDP -16(p), (x3, x4) \
round(v2, x2) \
SUB $1, nblocks \
round(v3, x3) \
round(v4, x4) \
CBNZ nblocks, loop \
// The primes are repeated here to ensure that they're stored
// in a contiguous array, so we can load them with LDP.
DATA primes<> +0(SB)/8, $11400714785074694791
DATA primes<> +8(SB)/8, $14029467366897019727
DATA primes<>+16(SB)/8, $1609587929392839161
DATA primes<>+24(SB)/8, $9650029242287828579
DATA primes<>+32(SB)/8, $2870177450012600261
GLOBL primes<>(SB), NOPTR+RODATA, $40
// func Sum64(b []byte) uint64
TEXT ·Sum64(SB), NOFRAME+NOSPLIT, $0-32
LDP b_base+0(FP), (p, len)
LDP primes<> +0(SB), (prime1, prime2)
LDP primes<>+16(SB), (prime3, prime4)
MOVD primes<>+32(SB), prime5
CMP $32, len
CSEL LO, prime5, ZR, h // if len < 32 { h = prime5 } else { h = 0 }
BLO afterLoop
ADD prime1, prime2, v1
MOVD prime2, v2
MOVD $0, v3
NEG prime1, v4
blocksLoop()
ROR $64-1, v1, x1
ROR $64-7, v2, x2
ADD x1, x2
ROR $64-12, v3, x3
ROR $64-18, v4, x4
ADD x3, x4
ADD x2, x4, h
mergeRound(v1)
mergeRound(v2)
mergeRound(v3)
mergeRound(v4)
afterLoop:
ADD len, h
TBZ $4, len, try8
LDP.P 16(p), (x1, x2)
round0(x1)
ROR $64-27, h
EOR x1 @> 64-27, h, h
MADD h, prime4, prime1, h
round0(x2)
ROR $64-27, h
EOR x2 @> 64-27, h
MADD h, prime4, prime1, h
try8:
TBZ $3, len, try4
MOVD.P 8(p), x1
round0(x1)
ROR $64-27, h
EOR x1 @> 64-27, h
MADD h, prime4, prime1, h
try4:
TBZ $2, len, try2
MOVWU.P 4(p), x2
MUL prime1, x2
ROR $64-23, h
EOR x2 @> 64-23, h
MADD h, prime3, prime2, h
try2:
TBZ $1, len, try1
MOVHU.P 2(p), x3
AND $255, x3, x1
LSR $8, x3, x2
MUL prime5, x1
ROR $64-11, h
EOR x1 @> 64-11, h
MUL prime1, h
MUL prime5, x2
ROR $64-11, h
EOR x2 @> 64-11, h
MUL prime1, h
try1:
TBZ $0, len, end
MOVBU (p), x4
MUL prime5, x4
ROR $64-11, h
EOR x4 @> 64-11, h
MUL prime1, h
end:
EOR h >> 33, h
MUL prime2, h
EOR h >> 29, h
MUL prime3, h
EOR h >> 32, h
MOVD h, ret+24(FP)
RET
// func writeBlocks(d *Digest, b []byte) int
//
// Assumes len(b) >= 32.
TEXT ·writeBlocks(SB), NOFRAME+NOSPLIT, $0-40
LDP primes<>(SB), (prime1, prime2)
// Load state. Assume v[1-4] are stored contiguously.
MOVD d+0(FP), digest
LDP 0(digest), (v1, v2)
LDP 16(digest), (v3, v4)
LDP b_base+8(FP), (p, len)
blocksLoop()
// Store updated state.
STP (v1, v2), 0(digest)
STP (v3, v4), 16(digest)
BIC $31, len
MOVD len, ret+32(FP)
RET

View File

@@ -1,6 +1,9 @@
//go:build (amd64 || arm64) && !appengine && gc && !purego && !noasm
// +build amd64 arm64
// +build !appengine
// +build gc
// +build !purego
// +build !noasm
package xxhash
@@ -10,4 +13,4 @@ package xxhash
func Sum64(b []byte) uint64
//go:noescape
func writeBlocks(*Digest, []byte) int
func writeBlocks(d *Digest, b []byte) int

View File

@@ -1,4 +1,5 @@
// +build !amd64 appengine !gc purego
//go:build (!amd64 && !arm64) || appengine || !gc || purego || noasm
// +build !amd64,!arm64 appengine !gc purego noasm
package xxhash

View File

@@ -20,6 +20,10 @@ type seq struct {
llCode, mlCode, ofCode uint8
}
type seqVals struct {
ll, ml, mo int
}
func (s seq) String() string {
if s.offset <= 3 {
if s.offset == 0 {
@@ -61,16 +65,19 @@ type sequenceDecs struct {
offsets sequenceDec
matchLengths sequenceDec
prevOffset [3]int
hist []byte
dict []byte
literals []byte
out []byte
nSeqs int
br *bitReader
seqSize int
windowSize int
maxBits uint8
maxSyncLen uint64
}
// initialize all 3 decoders from the stream input.
func (s *sequenceDecs) initialize(br *bitReader, hist *history, literals, out []byte) error {
func (s *sequenceDecs) initialize(br *bitReader, hist *history, out []byte) error {
if err := s.litLengths.init(br); err != nil {
return errors.New("litLengths:" + err.Error())
}
@@ -80,8 +87,7 @@ func (s *sequenceDecs) initialize(br *bitReader, hist *history, literals, out []
if err := s.matchLengths.init(br); err != nil {
return errors.New("matchLengths:" + err.Error())
}
s.literals = literals
s.hist = hist.b
s.br = br
s.prevOffset = hist.recentOffsets
s.maxBits = s.litLengths.fse.maxBits + s.offsets.fse.maxBits + s.matchLengths.fse.maxBits
s.windowSize = hist.windowSize
@@ -93,12 +99,127 @@ func (s *sequenceDecs) initialize(br *bitReader, hist *history, literals, out []
return nil
}
// execute will execute the decoded sequence with the provided history.
// The sequence must be evaluated before being sent.
func (s *sequenceDecs) execute(seqs []seqVals, hist []byte) error {
if len(s.dict) == 0 {
return s.executeSimple(seqs, hist)
}
// Ensure we have enough output size...
if len(s.out)+s.seqSize > cap(s.out) {
addBytes := s.seqSize + len(s.out)
s.out = append(s.out, make([]byte, addBytes)...)
s.out = s.out[:len(s.out)-addBytes]
}
if debugDecoder {
printf("Execute %d seqs with hist %d, dict %d, literals: %d into %d bytes\n", len(seqs), len(hist), len(s.dict), len(s.literals), s.seqSize)
}
var t = len(s.out)
out := s.out[:t+s.seqSize]
for _, seq := range seqs {
// Add literals
copy(out[t:], s.literals[:seq.ll])
t += seq.ll
s.literals = s.literals[seq.ll:]
// Copy from dictionary...
if seq.mo > t+len(hist) || seq.mo > s.windowSize {
if len(s.dict) == 0 {
return fmt.Errorf("match offset (%d) bigger than current history (%d)", seq.mo, t+len(hist))
}
// we may be in dictionary.
dictO := len(s.dict) - (seq.mo - (t + len(hist)))
if dictO < 0 || dictO >= len(s.dict) {
return fmt.Errorf("match offset (%d) bigger than current history+dict (%d)", seq.mo, t+len(hist)+len(s.dict))
}
end := dictO + seq.ml
if end > len(s.dict) {
n := len(s.dict) - dictO
copy(out[t:], s.dict[dictO:])
t += n
seq.ml -= n
} else {
copy(out[t:], s.dict[dictO:end])
t += end - dictO
continue
}
}
// Copy from history.
if v := seq.mo - t; v > 0 {
// v is the start position in history from end.
start := len(hist) - v
if seq.ml > v {
// Some goes into current block.
// Copy remainder of history
copy(out[t:], hist[start:])
t += v
seq.ml -= v
} else {
copy(out[t:], hist[start:start+seq.ml])
t += seq.ml
continue
}
}
// We must be in current buffer now
if seq.ml > 0 {
start := t - seq.mo
if seq.ml <= t-start {
// No overlap
copy(out[t:], out[start:start+seq.ml])
t += seq.ml
continue
} else {
// Overlapping copy
// Extend destination slice and copy one byte at the time.
src := out[start : start+seq.ml]
dst := out[t:]
dst = dst[:len(src)]
t += len(src)
// Destination is the space we just added.
for i := range src {
dst[i] = src[i]
}
}
}
}
// Add final literals
copy(out[t:], s.literals)
if debugDecoder {
t += len(s.literals)
if t != len(out) {
panic(fmt.Errorf("length mismatch, want %d, got %d, ss: %d", len(out), t, s.seqSize))
}
}
s.out = out
return nil
}
// decode sequences from the stream with the provided history.
func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error {
func (s *sequenceDecs) decodeSync(hist []byte) error {
supported, err := s.decodeSyncSimple(hist)
if supported {
return err
}
br := s.br
seqs := s.nSeqs
startSize := len(s.out)
// Grab full sizes tables, to avoid bounds checks.
llTable, mlTable, ofTable := s.litLengths.fse.dt[:maxTablesize], s.matchLengths.fse.dt[:maxTablesize], s.offsets.fse.dt[:maxTablesize]
llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
out := s.out
maxBlockSize := maxCompressedBlockSize
if s.windowSize < maxBlockSize {
maxBlockSize = s.windowSize
}
for i := seqs - 1; i >= 0; i-- {
if br.overread() {
@@ -151,7 +272,7 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error {
if temp == 0 {
// 0 is not valid; input is corrupted; force offset to 1
println("temp was 0")
println("WARNING: temp was 0")
temp = 1
}
@@ -176,51 +297,49 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error {
if ll > len(s.literals) {
return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, len(s.literals))
}
size := ll + ml + len(s.out)
size := ll + ml + len(out)
if size-startSize > maxBlockSize {
return fmt.Errorf("output (%d) bigger than max block size", size)
return fmt.Errorf("output (%d) bigger than max block size (%d)", size-startSize, maxBlockSize)
}
if size > cap(s.out) {
if size > cap(out) {
// Not enough size, which can happen under high volume block streaming conditions
// but could be if destination slice is too small for sync operations.
// over-allocating here can create a large amount of GC pressure so we try to keep
// it as contained as possible
used := len(s.out) - startSize
used := len(out) - startSize
addBytes := 256 + ll + ml + used>>2
// Clamp to max block size.
if used+addBytes > maxBlockSize {
addBytes = maxBlockSize - used
}
s.out = append(s.out, make([]byte, addBytes)...)
s.out = s.out[:len(s.out)-addBytes]
out = append(out, make([]byte, addBytes)...)
out = out[:len(out)-addBytes]
}
if ml > maxMatchLen {
return fmt.Errorf("match len (%d) bigger than max allowed length", ml)
}
// Add literals
s.out = append(s.out, s.literals[:ll]...)
out = append(out, s.literals[:ll]...)
s.literals = s.literals[ll:]
out := s.out
if mo == 0 && ml > 0 {
return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml)
}
if mo > len(s.out)+len(hist) || mo > s.windowSize {
if mo > len(out)+len(hist) || mo > s.windowSize {
if len(s.dict) == 0 {
return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(s.out)+len(hist))
return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(out)+len(hist)-startSize)
}
// we may be in dictionary.
dictO := len(s.dict) - (mo - (len(s.out) + len(hist)))
dictO := len(s.dict) - (mo - (len(out) + len(hist)))
if dictO < 0 || dictO >= len(s.dict) {
return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(s.out)+len(hist))
return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(out)+len(hist)-startSize)
}
end := dictO + ml
if end > len(s.dict) {
out = append(out, s.dict[dictO:]...)
mo -= len(s.dict) - dictO
ml -= len(s.dict) - dictO
} else {
out = append(out, s.dict[dictO:end]...)
@@ -231,26 +350,25 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error {
// Copy from history.
// TODO: Blocks without history could be made to ignore this completely.
if v := mo - len(s.out); v > 0 {
if v := mo - len(out); v > 0 {
// v is the start position in history from end.
start := len(s.hist) - v
start := len(hist) - v
if ml > v {
// Some goes into current block.
// Copy remainder of history
out = append(out, s.hist[start:]...)
mo -= v
out = append(out, hist[start:]...)
ml -= v
} else {
out = append(out, s.hist[start:start+ml]...)
out = append(out, hist[start:start+ml]...)
ml = 0
}
}
// We must be in current buffer now
if ml > 0 {
start := len(s.out) - mo
if ml <= len(s.out)-start {
start := len(out) - mo
if ml <= len(out)-start {
// No overlap
out = append(out, s.out[start:start+ml]...)
out = append(out, out[start:start+ml]...)
} else {
// Overlapping copy
// Extend destination slice and copy one byte at the time.
@@ -264,7 +382,6 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error {
}
}
}
s.out = out
if i == 0 {
// This is the last sequence, so we shouldn't update state.
break
@@ -278,7 +395,8 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error {
mlState = mlTable[mlState.newState()&maxTableMask]
ofState = ofTable[ofState.newState()&maxTableMask]
} else {
bits := br.getBitsFast(nBits)
bits := br.get32BitsFast(nBits)
lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31))
llState = llTable[(llState.newState()+lowBits)&maxTableMask]
@@ -291,19 +409,14 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error {
}
}
// Add final literals
s.out = append(s.out, s.literals...)
return nil
}
// Check if space for literals
if size := len(s.literals) + len(s.out) - startSize; size > maxBlockSize {
return fmt.Errorf("output (%d) bigger than max block size (%d)", size, maxBlockSize)
}
// update states, at least 27 bits must be available.
func (s *sequenceDecs) update(br *bitReader) {
// Max 8 bits
s.litLengths.state.next(br)
// Max 9 bits
s.matchLengths.state.next(br)
// Max 8 bits
s.offsets.state.next(br)
// Add final literals
s.out = append(out, s.literals...)
return br.close()
}
var bitMask [16]uint16
@@ -314,87 +427,6 @@ func init() {
}
}
// update states, at least 27 bits must be available.
func (s *sequenceDecs) updateAlt(br *bitReader) {
// Update all 3 states at once. Approx 20% faster.
a, b, c := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
nBits := a.nbBits() + b.nbBits() + c.nbBits()
if nBits == 0 {
s.litLengths.state.state = s.litLengths.state.dt[a.newState()]
s.matchLengths.state.state = s.matchLengths.state.dt[b.newState()]
s.offsets.state.state = s.offsets.state.dt[c.newState()]
return
}
bits := br.getBitsFast(nBits)
lowBits := uint16(bits >> ((c.nbBits() + b.nbBits()) & 31))
s.litLengths.state.state = s.litLengths.state.dt[a.newState()+lowBits]
lowBits = uint16(bits >> (c.nbBits() & 31))
lowBits &= bitMask[b.nbBits()&15]
s.matchLengths.state.state = s.matchLengths.state.dt[b.newState()+lowBits]
lowBits = uint16(bits) & bitMask[c.nbBits()&15]
s.offsets.state.state = s.offsets.state.dt[c.newState()+lowBits]
}
// nextFast will return new states when there are at least 4 unused bytes left on the stream when done.
func (s *sequenceDecs) nextFast(br *bitReader, llState, mlState, ofState decSymbol) (ll, mo, ml int) {
// Final will not read from stream.
ll, llB := llState.final()
ml, mlB := mlState.final()
mo, moB := ofState.final()
// extra bits are stored in reverse order.
br.fillFast()
mo += br.getBits(moB)
if s.maxBits > 32 {
br.fillFast()
}
ml += br.getBits(mlB)
ll += br.getBits(llB)
if moB > 1 {
s.prevOffset[2] = s.prevOffset[1]
s.prevOffset[1] = s.prevOffset[0]
s.prevOffset[0] = mo
return
}
// mo = s.adjustOffset(mo, ll, moB)
// Inlined for rather big speedup
if ll == 0 {
// There is an exception though, when current sequence's literals_length = 0.
// In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2,
// an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte.
mo++
}
if mo == 0 {
mo = s.prevOffset[0]
return
}
var temp int
if mo == 3 {
temp = s.prevOffset[0] - 1
} else {
temp = s.prevOffset[mo]
}
if temp == 0 {
// 0 is not valid; input is corrupted; force offset to 1
println("temp was 0")
temp = 1
}
if mo != 1 {
s.prevOffset[2] = s.prevOffset[1]
}
s.prevOffset[1] = s.prevOffset[0]
s.prevOffset[0] = temp
mo = temp
return
}
func (s *sequenceDecs) next(br *bitReader, llState, mlState, ofState decSymbol) (ll, mo, ml int) {
// Final will not read from stream.
ll, llB := llState.final()
@@ -457,36 +489,3 @@ func (s *sequenceDecs) adjustOffset(offset, litLen int, offsetB uint8) int {
s.prevOffset[0] = temp
return temp
}
// mergeHistory will merge history.
func (s *sequenceDecs) mergeHistory(hist *sequenceDecs) (*sequenceDecs, error) {
for i := uint(0); i < 3; i++ {
var sNew, sHist *sequenceDec
switch i {
default:
// same as "case 0":
sNew = &s.litLengths
sHist = &hist.litLengths
case 1:
sNew = &s.offsets
sHist = &hist.offsets
case 2:
sNew = &s.matchLengths
sHist = &hist.matchLengths
}
if sNew.repeat {
if sHist.fse == nil {
return nil, fmt.Errorf("sequence stream %d, repeat requested, but no history", i)
}
continue
}
if sNew.fse == nil {
return nil, fmt.Errorf("sequence stream %d, no fse found", i)
}
if sHist.fse != nil && !sHist.fse.preDefined {
fseDecoderPool.Put(sHist.fse)
}
sHist.fse = sNew.fse
}
return hist, nil
}

View File

@@ -0,0 +1,368 @@
//go:build amd64 && !appengine && !noasm && gc
// +build amd64,!appengine,!noasm,gc
package zstd
import (
"fmt"
"github.com/klauspost/compress/internal/cpuinfo"
)
type decodeSyncAsmContext struct {
llTable []decSymbol
mlTable []decSymbol
ofTable []decSymbol
llState uint64
mlState uint64
ofState uint64
iteration int
litRemain int
out []byte
outPosition int
literals []byte
litPosition int
history []byte
windowSize int
ll int // set on error (not for all errors, please refer to _generate/gen.go)
ml int // set on error (not for all errors, please refer to _generate/gen.go)
mo int // set on error (not for all errors, please refer to _generate/gen.go)
}
// sequenceDecs_decodeSync_amd64 implements the main loop of sequenceDecs.decodeSync in x86 asm.
//
// Please refer to seqdec_generic.go for the reference implementation.
//go:noescape
func sequenceDecs_decodeSync_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
// sequenceDecs_decodeSync_bmi2 implements the main loop of sequenceDecs.decodeSync in x86 asm with BMI2 extensions.
//go:noescape
func sequenceDecs_decodeSync_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
// sequenceDecs_decodeSync_safe_amd64 does the same as above, but does not write more than output buffer.
//go:noescape
func sequenceDecs_decodeSync_safe_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
// sequenceDecs_decodeSync_safe_bmi2 does the same as above, but does not write more than output buffer.
//go:noescape
func sequenceDecs_decodeSync_safe_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
// decode sequences from the stream with the provided history but without a dictionary.
func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) {
if len(s.dict) > 0 {
return false, nil
}
if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSize {
return false, nil
}
// FIXME: Using unsafe memory copies leads to rare, random crashes
// with fuzz testing. It is therefore disabled for now.
const useSafe = true
/*
useSafe := false
if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSizeAlloc {
useSafe = true
}
if s.maxSyncLen > 0 && cap(s.out)-len(s.out)-compressedBlockOverAlloc < int(s.maxSyncLen) {
useSafe = true
}
if cap(s.literals) < len(s.literals)+compressedBlockOverAlloc {
useSafe = true
}
*/
br := s.br
maxBlockSize := maxCompressedBlockSize
if s.windowSize < maxBlockSize {
maxBlockSize = s.windowSize
}
ctx := decodeSyncAsmContext{
llTable: s.litLengths.fse.dt[:maxTablesize],
mlTable: s.matchLengths.fse.dt[:maxTablesize],
ofTable: s.offsets.fse.dt[:maxTablesize],
llState: uint64(s.litLengths.state.state),
mlState: uint64(s.matchLengths.state.state),
ofState: uint64(s.offsets.state.state),
iteration: s.nSeqs - 1,
litRemain: len(s.literals),
out: s.out,
outPosition: len(s.out),
literals: s.literals,
windowSize: s.windowSize,
history: hist,
}
s.seqSize = 0
startSize := len(s.out)
var errCode int
if cpuinfo.HasBMI2() {
if useSafe {
errCode = sequenceDecs_decodeSync_safe_bmi2(s, br, &ctx)
} else {
errCode = sequenceDecs_decodeSync_bmi2(s, br, &ctx)
}
} else {
if useSafe {
errCode = sequenceDecs_decodeSync_safe_amd64(s, br, &ctx)
} else {
errCode = sequenceDecs_decodeSync_amd64(s, br, &ctx)
}
}
switch errCode {
case noError:
break
case errorMatchLenOfsMismatch:
return true, fmt.Errorf("zero matchoff and matchlen (%d) > 0", ctx.ml)
case errorMatchLenTooBig:
return true, fmt.Errorf("match len (%d) bigger than max allowed length", ctx.ml)
case errorMatchOffTooBig:
return true, fmt.Errorf("match offset (%d) bigger than current history (%d)",
ctx.mo, ctx.outPosition+len(hist)-startSize)
case errorNotEnoughLiterals:
return true, fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available",
ctx.ll, ctx.litRemain+ctx.ll)
case errorNotEnoughSpace:
size := ctx.outPosition + ctx.ll + ctx.ml
if debugDecoder {
println("msl:", s.maxSyncLen, "cap", cap(s.out), "bef:", startSize, "sz:", size-startSize, "mbs:", maxBlockSize, "outsz:", cap(s.out)-startSize)
}
return true, fmt.Errorf("output (%d) bigger than max block size (%d)", size-startSize, maxBlockSize)
default:
return true, fmt.Errorf("sequenceDecs_decode returned erronous code %d", errCode)
}
s.seqSize += ctx.litRemain
if s.seqSize > maxBlockSize {
return true, fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
}
err := br.close()
if err != nil {
printf("Closing sequences: %v, %+v\n", err, *br)
return true, err
}
s.literals = s.literals[ctx.litPosition:]
t := ctx.outPosition
s.out = s.out[:t]
// Add final literals
s.out = append(s.out, s.literals...)
if debugDecoder {
t += len(s.literals)
if t != len(s.out) {
panic(fmt.Errorf("length mismatch, want %d, got %d", len(s.out), t))
}
}
return true, nil
}
// --------------------------------------------------------------------------------
type decodeAsmContext struct {
llTable []decSymbol
mlTable []decSymbol
ofTable []decSymbol
llState uint64
mlState uint64
ofState uint64
iteration int
seqs []seqVals
litRemain int
}
const noError = 0
// error reported when mo == 0 && ml > 0
const errorMatchLenOfsMismatch = 1
// error reported when ml > maxMatchLen
const errorMatchLenTooBig = 2
// error reported when mo > available history or mo > s.windowSize
const errorMatchOffTooBig = 3
// error reported when the sum of literal lengths exeeceds the literal buffer size
const errorNotEnoughLiterals = 4
// error reported when capacity of `out` is too small
const errorNotEnoughSpace = 5
// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm.
//
// Please refer to seqdec_generic.go for the reference implementation.
//go:noescape
func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm.
//
// Please refer to seqdec_generic.go for the reference implementation.
//go:noescape
func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm with BMI2 extensions.
//go:noescape
func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm with BMI2 extensions.
//go:noescape
func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// decode sequences from the stream without the provided history.
func (s *sequenceDecs) decode(seqs []seqVals) error {
br := s.br
maxBlockSize := maxCompressedBlockSize
if s.windowSize < maxBlockSize {
maxBlockSize = s.windowSize
}
ctx := decodeAsmContext{
llTable: s.litLengths.fse.dt[:maxTablesize],
mlTable: s.matchLengths.fse.dt[:maxTablesize],
ofTable: s.offsets.fse.dt[:maxTablesize],
llState: uint64(s.litLengths.state.state),
mlState: uint64(s.matchLengths.state.state),
ofState: uint64(s.offsets.state.state),
seqs: seqs,
iteration: len(seqs) - 1,
litRemain: len(s.literals),
}
s.seqSize = 0
lte56bits := s.maxBits+s.offsets.fse.actualTableLog+s.matchLengths.fse.actualTableLog+s.litLengths.fse.actualTableLog <= 56
var errCode int
if cpuinfo.HasBMI2() {
if lte56bits {
errCode = sequenceDecs_decode_56_bmi2(s, br, &ctx)
} else {
errCode = sequenceDecs_decode_bmi2(s, br, &ctx)
}
} else {
if lte56bits {
errCode = sequenceDecs_decode_56_amd64(s, br, &ctx)
} else {
errCode = sequenceDecs_decode_amd64(s, br, &ctx)
}
}
if errCode != 0 {
i := len(seqs) - ctx.iteration - 1
switch errCode {
case errorMatchLenOfsMismatch:
ml := ctx.seqs[i].ml
return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml)
case errorMatchLenTooBig:
ml := ctx.seqs[i].ml
return fmt.Errorf("match len (%d) bigger than max allowed length", ml)
case errorNotEnoughLiterals:
ll := ctx.seqs[i].ll
return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, ctx.litRemain+ll)
}
return fmt.Errorf("sequenceDecs_decode_amd64 returned erronous code %d", errCode)
}
if ctx.litRemain < 0 {
return fmt.Errorf("literal count is too big: total available %d, total requested %d",
len(s.literals), len(s.literals)-ctx.litRemain)
}
s.seqSize += ctx.litRemain
if s.seqSize > maxBlockSize {
return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
}
err := br.close()
if err != nil {
printf("Closing sequences: %v, %+v\n", err, *br)
}
return err
}
// --------------------------------------------------------------------------------
type executeAsmContext struct {
seqs []seqVals
seqIndex int
out []byte
history []byte
literals []byte
outPosition int
litPosition int
windowSize int
}
// sequenceDecs_executeSimple_amd64 implements the main loop of sequenceDecs.executeSimple in x86 asm.
//
// Returns false if a match offset is too big.
//
// Please refer to seqdec_generic.go for the reference implementation.
//go:noescape
func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool
// Same as above, but with safe memcopies
//go:noescape
func sequenceDecs_executeSimple_safe_amd64(ctx *executeAsmContext) bool
// executeSimple handles cases when dictionary is not used.
func (s *sequenceDecs) executeSimple(seqs []seqVals, hist []byte) error {
// Ensure we have enough output size...
if len(s.out)+s.seqSize+compressedBlockOverAlloc > cap(s.out) {
addBytes := s.seqSize + len(s.out) + compressedBlockOverAlloc
s.out = append(s.out, make([]byte, addBytes)...)
s.out = s.out[:len(s.out)-addBytes]
}
if debugDecoder {
printf("Execute %d seqs with literals: %d into %d bytes\n", len(seqs), len(s.literals), s.seqSize)
}
var t = len(s.out)
out := s.out[:t+s.seqSize]
ctx := executeAsmContext{
seqs: seqs,
seqIndex: 0,
out: out,
history: hist,
outPosition: t,
litPosition: 0,
literals: s.literals,
windowSize: s.windowSize,
}
var ok bool
if cap(s.literals) < len(s.literals)+compressedBlockOverAlloc {
ok = sequenceDecs_executeSimple_safe_amd64(&ctx)
} else {
ok = sequenceDecs_executeSimple_amd64(&ctx)
}
if !ok {
return fmt.Errorf("match offset (%d) bigger than current history (%d)",
seqs[ctx.seqIndex].mo, ctx.outPosition+len(hist))
}
s.literals = s.literals[ctx.litPosition:]
t = ctx.outPosition
// Add final literals
copy(out[t:], s.literals)
if debugDecoder {
t += len(s.literals)
if t != len(out) {
panic(fmt.Errorf("length mismatch, want %d, got %d, ss: %d", len(out), t, s.seqSize))
}
}
s.out = out
return nil
}

4100
vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s generated vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,237 @@
//go:build !amd64 || appengine || !gc || noasm
// +build !amd64 appengine !gc noasm
package zstd
import (
"fmt"
"io"
)
// decode sequences from the stream with the provided history but without dictionary.
func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) {
return false, nil
}
// decode sequences from the stream without the provided history.
func (s *sequenceDecs) decode(seqs []seqVals) error {
br := s.br
// Grab full sizes tables, to avoid bounds checks.
llTable, mlTable, ofTable := s.litLengths.fse.dt[:maxTablesize], s.matchLengths.fse.dt[:maxTablesize], s.offsets.fse.dt[:maxTablesize]
llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
s.seqSize = 0
litRemain := len(s.literals)
maxBlockSize := maxCompressedBlockSize
if s.windowSize < maxBlockSize {
maxBlockSize = s.windowSize
}
for i := range seqs {
var ll, mo, ml int
if br.off > 4+((maxOffsetBits+16+16)>>3) {
// inlined function:
// ll, mo, ml = s.nextFast(br, llState, mlState, ofState)
// Final will not read from stream.
var llB, mlB, moB uint8
ll, llB = llState.final()
ml, mlB = mlState.final()
mo, moB = ofState.final()
// extra bits are stored in reverse order.
br.fillFast()
mo += br.getBits(moB)
if s.maxBits > 32 {
br.fillFast()
}
ml += br.getBits(mlB)
ll += br.getBits(llB)
if moB > 1 {
s.prevOffset[2] = s.prevOffset[1]
s.prevOffset[1] = s.prevOffset[0]
s.prevOffset[0] = mo
} else {
// mo = s.adjustOffset(mo, ll, moB)
// Inlined for rather big speedup
if ll == 0 {
// There is an exception though, when current sequence's literals_length = 0.
// In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2,
// an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte.
mo++
}
if mo == 0 {
mo = s.prevOffset[0]
} else {
var temp int
if mo == 3 {
temp = s.prevOffset[0] - 1
} else {
temp = s.prevOffset[mo]
}
if temp == 0 {
// 0 is not valid; input is corrupted; force offset to 1
println("WARNING: temp was 0")
temp = 1
}
if mo != 1 {
s.prevOffset[2] = s.prevOffset[1]
}
s.prevOffset[1] = s.prevOffset[0]
s.prevOffset[0] = temp
mo = temp
}
}
br.fillFast()
} else {
if br.overread() {
if debugDecoder {
printf("reading sequence %d, exceeded available data\n", i)
}
return io.ErrUnexpectedEOF
}
ll, mo, ml = s.next(br, llState, mlState, ofState)
br.fill()
}
if debugSequences {
println("Seq", i, "Litlen:", ll, "mo:", mo, "(abs) ml:", ml)
}
// Evaluate.
// We might be doing this async, so do it early.
if mo == 0 && ml > 0 {
return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml)
}
if ml > maxMatchLen {
return fmt.Errorf("match len (%d) bigger than max allowed length", ml)
}
s.seqSize += ll + ml
if s.seqSize > maxBlockSize {
return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
}
litRemain -= ll
if litRemain < 0 {
return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, litRemain+ll)
}
seqs[i] = seqVals{
ll: ll,
ml: ml,
mo: mo,
}
if i == len(seqs)-1 {
// This is the last sequence, so we shouldn't update state.
break
}
// Manually inlined, ~ 5-20% faster
// Update all 3 states at once. Approx 20% faster.
nBits := llState.nbBits() + mlState.nbBits() + ofState.nbBits()
if nBits == 0 {
llState = llTable[llState.newState()&maxTableMask]
mlState = mlTable[mlState.newState()&maxTableMask]
ofState = ofTable[ofState.newState()&maxTableMask]
} else {
bits := br.get32BitsFast(nBits)
lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31))
llState = llTable[(llState.newState()+lowBits)&maxTableMask]
lowBits = uint16(bits >> (ofState.nbBits() & 31))
lowBits &= bitMask[mlState.nbBits()&15]
mlState = mlTable[(mlState.newState()+lowBits)&maxTableMask]
lowBits = uint16(bits) & bitMask[ofState.nbBits()&15]
ofState = ofTable[(ofState.newState()+lowBits)&maxTableMask]
}
}
s.seqSize += litRemain
if s.seqSize > maxBlockSize {
return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
}
err := br.close()
if err != nil {
printf("Closing sequences: %v, %+v\n", err, *br)
}
return err
}
// executeSimple handles cases when a dictionary is not used.
func (s *sequenceDecs) executeSimple(seqs []seqVals, hist []byte) error {
// Ensure we have enough output size...
if len(s.out)+s.seqSize > cap(s.out) {
addBytes := s.seqSize + len(s.out)
s.out = append(s.out, make([]byte, addBytes)...)
s.out = s.out[:len(s.out)-addBytes]
}
if debugDecoder {
printf("Execute %d seqs with literals: %d into %d bytes\n", len(seqs), len(s.literals), s.seqSize)
}
var t = len(s.out)
out := s.out[:t+s.seqSize]
for _, seq := range seqs {
// Add literals
copy(out[t:], s.literals[:seq.ll])
t += seq.ll
s.literals = s.literals[seq.ll:]
// Malformed input
if seq.mo > t+len(hist) || seq.mo > s.windowSize {
return fmt.Errorf("match offset (%d) bigger than current history (%d)", seq.mo, t+len(hist))
}
// Copy from history.
if v := seq.mo - t; v > 0 {
// v is the start position in history from end.
start := len(hist) - v
if seq.ml > v {
// Some goes into the current block.
// Copy remainder of history
copy(out[t:], hist[start:])
t += v
seq.ml -= v
} else {
copy(out[t:], hist[start:start+seq.ml])
t += seq.ml
continue
}
}
// We must be in the current buffer now
if seq.ml > 0 {
start := t - seq.mo
if seq.ml <= t-start {
// No overlap
copy(out[t:], out[start:start+seq.ml])
t += seq.ml
} else {
// Overlapping copy
// Extend destination slice and copy one byte at the time.
src := out[start : start+seq.ml]
dst := out[t:]
dst = dst[:len(src)]
t += len(src)
// Destination is the space we just added.
for i := range src {
dst[i] = src[i]
}
}
}
}
// Add final literals
copy(out[t:], s.literals)
if debugDecoder {
t += len(s.literals)
if t != len(out) {
panic(fmt.Errorf("length mismatch, want %d, got %d, ss: %d", len(out), t, s.seqSize))
}
}
s.out = out
return nil
}

View File

@@ -35,7 +35,6 @@ func (s *seqCoders) setPrev(ll, ml, of *fseEncoder) {
// Ensure we cannot reuse by accident
prevEnc := *prev
prevEnc.symbolLen = 0
return
}
compareSwap(ll, &s.llEnc, &s.llPrev)
compareSwap(ml, &s.mlEnc, &s.mlPrev)

View File

@@ -11,7 +11,7 @@ import (
"io"
"github.com/klauspost/compress/huff0"
"github.com/klauspost/compress/snappy"
snappy "github.com/klauspost/compress/internal/snapref"
)
const (
@@ -185,7 +185,6 @@ func (r *SnappyConverter) Convert(in io.Reader, w io.Writer) (int64, error) {
r.block.reset(nil)
r.block.literals, err = snappy.Decode(r.block.literals[:n], r.buf[snappyChecksumSize:chunkLen])
if err != nil {
println("snappy.Decode:", err)
return written, err
}
err = r.block.encodeLits(r.block.literals, false)
@@ -204,7 +203,7 @@ func (r *SnappyConverter) Convert(in io.Reader, w io.Writer) (int64, error) {
written += int64(n)
continue
case chunkTypeUncompressedData:
if debug {
if debugEncoder {
println("Uncompressed, chunklen", chunkLen)
}
// Section 4.3. Uncompressed data (chunk type 0x01).
@@ -247,7 +246,7 @@ func (r *SnappyConverter) Convert(in io.Reader, w io.Writer) (int64, error) {
continue
case chunkTypeStreamIdentifier:
if debug {
if debugEncoder {
println("stream id", chunkLen, len(snappyMagicBody))
}
// Section 4.1. Stream identifier (chunk type 0xff).

141
vendor/github.com/klauspost/compress/zstd/zip.go generated vendored Normal file
View File

@@ -0,0 +1,141 @@
// Copyright 2019+ Klaus Post. All rights reserved.
// License information can be found in the LICENSE file.
package zstd
import (
"errors"
"io"
"sync"
)
// ZipMethodWinZip is the method for Zstandard compressed data inside Zip files for WinZip.
// See https://www.winzip.com/win/en/comp_info.html
const ZipMethodWinZip = 93
// ZipMethodPKWare is the original method number used by PKWARE to indicate Zstandard compression.
// Deprecated: This has been deprecated by PKWARE, use ZipMethodWinZip instead for compression.
// See https://pkware.cachefly.net/webdocs/APPNOTE/APPNOTE-6.3.9.TXT
const ZipMethodPKWare = 20
// zipReaderPool is the default reader pool.
var zipReaderPool = sync.Pool{New: func() interface{} {
z, err := NewReader(nil, WithDecoderLowmem(true), WithDecoderMaxWindow(128<<20), WithDecoderConcurrency(1))
if err != nil {
panic(err)
}
return z
}}
// newZipReader creates a pooled zip decompressor.
func newZipReader(opts ...DOption) func(r io.Reader) io.ReadCloser {
pool := &zipReaderPool
if len(opts) > 0 {
opts = append([]DOption{WithDecoderLowmem(true), WithDecoderMaxWindow(128 << 20)}, opts...)
// Force concurrency 1
opts = append(opts, WithDecoderConcurrency(1))
// Create our own pool
pool = &sync.Pool{}
}
return func(r io.Reader) io.ReadCloser {
dec, ok := pool.Get().(*Decoder)
if ok {
dec.Reset(r)
} else {
d, err := NewReader(r, opts...)
if err != nil {
panic(err)
}
dec = d
}
return &pooledZipReader{dec: dec, pool: pool}
}
}
type pooledZipReader struct {
mu sync.Mutex // guards Close and Read
pool *sync.Pool
dec *Decoder
}
func (r *pooledZipReader) Read(p []byte) (n int, err error) {
r.mu.Lock()
defer r.mu.Unlock()
if r.dec == nil {
return 0, errors.New("read after close or EOF")
}
dec, err := r.dec.Read(p)
if err == io.EOF {
r.dec.Reset(nil)
r.pool.Put(r.dec)
r.dec = nil
}
return dec, err
}
func (r *pooledZipReader) Close() error {
r.mu.Lock()
defer r.mu.Unlock()
var err error
if r.dec != nil {
err = r.dec.Reset(nil)
r.pool.Put(r.dec)
r.dec = nil
}
return err
}
type pooledZipWriter struct {
mu sync.Mutex // guards Close and Read
enc *Encoder
pool *sync.Pool
}
func (w *pooledZipWriter) Write(p []byte) (n int, err error) {
w.mu.Lock()
defer w.mu.Unlock()
if w.enc == nil {
return 0, errors.New("Write after Close")
}
return w.enc.Write(p)
}
func (w *pooledZipWriter) Close() error {
w.mu.Lock()
defer w.mu.Unlock()
var err error
if w.enc != nil {
err = w.enc.Close()
w.pool.Put(w.enc)
w.enc = nil
}
return err
}
// ZipCompressor returns a compressor that can be registered with zip libraries.
// The provided encoder options will be used on all encodes.
func ZipCompressor(opts ...EOption) func(w io.Writer) (io.WriteCloser, error) {
var pool sync.Pool
return func(w io.Writer) (io.WriteCloser, error) {
enc, ok := pool.Get().(*Encoder)
if ok {
enc.Reset(w)
} else {
var err error
enc, err = NewWriter(w, opts...)
if err != nil {
return nil, err
}
}
return &pooledZipWriter{enc: enc, pool: &pool}, nil
}
}
// ZipDecompressor returns a decompressor that can be registered with zip libraries.
// See ZipCompressor for example.
// Options can be specified. WithDecoderConcurrency(1) is forced,
// and by default a 128MB maximum decompression window is specified.
// The window size can be overridden if required.
func ZipDecompressor(opts ...DOption) func(r io.Reader) io.ReadCloser {
return newZipReader(opts...)
}

View File

@@ -5,6 +5,7 @@ package zstd
import (
"bytes"
"encoding/binary"
"errors"
"log"
"math"
@@ -14,6 +15,12 @@ import (
// enable debug printing
const debug = false
// enable encoding debug printing
const debugEncoder = debug
// enable decoding debug printing
const debugDecoder = debug
// Enable extra assertions.
const debugAsserts = debug || false
@@ -32,6 +39,9 @@ const zstdMinMatch = 3
// Reset the buffer offset when reaching this.
const bufferReset = math.MaxInt32 - MaxWindowSize
// fcsUnknown is used for unknown frame content size.
const fcsUnknown = math.MaxUint64
var (
// ErrReservedBlockType is returned when a reserved block type is found.
// Typically this indicates wrong or corrupted input.
@@ -45,6 +55,10 @@ var (
// Typically returned on invalid input.
ErrBlockTooSmall = errors.New("block too small")
// ErrUnexpectedBlockSize is returned when a block has unexpected size.
// Typically returned on invalid input.
ErrUnexpectedBlockSize = errors.New("unexpected block size")
// ErrMagicMismatch is returned when a "magic" number isn't what is expected.
// Typically this indicates wrong or corrupted input.
ErrMagicMismatch = errors.New("invalid input: magic number mismatch")
@@ -68,6 +82,10 @@ var (
// This is only returned if SingleSegment is specified on the frame.
ErrFrameSizeExceeded = errors.New("frame size exceeded")
// ErrFrameSizeMismatch is returned if the stated frame size does not match the expected size.
// This is only returned if SingleSegment is specified on the frame.
ErrFrameSizeMismatch = errors.New("frame size does not match size on stream")
// ErrCRCMismatch is returned if CRC mismatches.
ErrCRCMismatch = errors.New("CRC check failed")
@@ -81,28 +99,17 @@ var (
)
func println(a ...interface{}) {
if debug {
if debug || debugDecoder || debugEncoder {
log.Println(a...)
}
}
func printf(format string, a ...interface{}) {
if debug {
if debug || debugDecoder || debugEncoder {
log.Printf(format, a...)
}
}
// matchLenFast does matching, but will not match the last up to 7 bytes.
func matchLenFast(a, b []byte) int {
endI := len(a) & (math.MaxInt32 - 7)
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
return i + bits.TrailingZeros64(diff)>>3
}
}
return endI
}
// matchLen returns the maximum length.
// a must be the shortest of the two.
// The function also returns whether all bytes matched.
@@ -126,26 +133,15 @@ func matchLen(a, b []byte) int {
}
func load3232(b []byte, i int32) uint32 {
// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
b = b[i:]
b = b[:4]
return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
return binary.LittleEndian.Uint32(b[i:])
}
func load6432(b []byte, i int32) uint64 {
// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
b = b[i:]
b = b[:8]
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
return binary.LittleEndian.Uint64(b[i:])
}
func load64(b []byte, i int) uint64 {
// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
b = b[i:]
b = b[:8]
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
return binary.LittleEndian.Uint64(b[i:])
}
type byter interface {