From 21a175860de0532ed55153a19f9c6d8e6c3fd57b Mon Sep 17 00:00:00 2001 From: Sebastiaan van Stijn Date: Sat, 3 Apr 2021 00:54:03 +0200 Subject: [PATCH] go.mod github.com/klauspost/compress v1.11.13 full diff: https://github.com/klauspost/compress/compare/v1.11.3...v1.11.13 adds arm64 decompression support, various performance improvements Signed-off-by: Sebastiaan van Stijn --- go.mod | 2 +- go.sum | 3 +- integration/client/go.sum | 7 +- .../klauspost/compress/fse/compress.go | 10 +- .../klauspost/compress/fse/decompress.go | 4 +- .../klauspost/compress/huff0/README.md | 4 +- .../klauspost/compress/huff0/compress.go | 4 +- .../klauspost/compress/snappy/snappy.go | 2 +- .../klauspost/compress/zstd/README.md | 25 +- .../klauspost/compress/zstd/blockdec.go | 2 +- .../klauspost/compress/zstd/blockenc.go | 43 +- .../klauspost/compress/zstd/decodeheader.go | 202 ++++++ .../klauspost/compress/zstd/decoder.go | 27 +- .../klauspost/compress/zstd/enc_base.go | 40 +- .../klauspost/compress/zstd/enc_best.go | 487 +++++++++++++++ .../klauspost/compress/zstd/enc_better.go | 591 +++++++++++++++++- .../klauspost/compress/zstd/enc_dfast.go | 414 +++++++++++- .../klauspost/compress/zstd/enc_fast.go | 373 ++++++++++- .../klauspost/compress/zstd/encoder.go | 10 +- .../compress/zstd/encoder_options.go | 54 +- .../klauspost/compress/zstd/fse_encoder.go | 12 +- .../klauspost/compress/zstd/seqdec.go | 15 +- .../klauspost/compress/zstd/snappy.go | 2 +- .../klauspost/compress/zstd/zstd.go | 12 + vendor/modules.txt | 2 +- 25 files changed, 2248 insertions(+), 99 deletions(-) create mode 100644 vendor/github.com/klauspost/compress/zstd/decodeheader.go create mode 100644 vendor/github.com/klauspost/compress/zstd/enc_best.go diff --git a/go.mod b/go.mod index dd7dfbdef..0b1cd372e 100644 --- a/go.mod +++ b/go.mod @@ -34,7 +34,7 @@ require ( github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 github.com/hashicorp/go-multierror v1.0.0 github.com/imdario/mergo v0.3.11 - github.com/klauspost/compress v1.11.3 + github.com/klauspost/compress v1.11.13 github.com/moby/sys/mountinfo v0.4.1 github.com/moby/sys/symlink v0.1.0 github.com/opencontainers/go-digest v1.0.0 diff --git a/go.sum b/go.sum index f0ca6e183..dca85fe58 100644 --- a/go.sum +++ b/go.sum @@ -350,8 +350,9 @@ github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvW github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/klauspost/compress v1.11.3 h1:dB4Bn0tN3wdCzQxnS8r06kV74qN/TAfaIS0bVE8h3jc= github.com/klauspost/compress v1.11.3/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= +github.com/klauspost/compress v1.11.13 h1:eSvu8Tmq6j2psUJqJrLcWH6K3w5Dwc+qipbaA6eVEN4= +github.com/klauspost/compress v1.11.13/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= diff --git a/integration/client/go.sum b/integration/client/go.sum index a385a5815..aa4d0c18f 100644 --- a/integration/client/go.sum +++ b/integration/client/go.sum @@ -31,7 +31,6 @@ github.com/Azure/go-autorest/autorest/mocks v0.4.0/go.mod h1:LTp+uSrOhSkaKrUy935 github.com/Azure/go-autorest/autorest/mocks v0.4.1/go.mod h1:LTp+uSrOhSkaKrUy935gNZuuIPPVsHlr9DSOxSayd+k= github.com/Azure/go-autorest/logger v0.2.0/go.mod h1:T9E3cAhj2VqvPOtCYAvby9aBXkZmbF5NWuPV8+WeEW8= github.com/Azure/go-autorest/tracing v0.6.0/go.mod h1:+vhtPC754Xsa23ID7GlGsrdKBpUA79WCAKPPZVC2DeU= -github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= github.com/Microsoft/go-winio v0.4.11/go.mod h1:VhR8bwka0BXejwEJY73c50VrPtXAaKcyvVC4A4RozmA= @@ -236,8 +235,8 @@ github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvW github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/klauspost/compress v1.11.3 h1:dB4Bn0tN3wdCzQxnS8r06kV74qN/TAfaIS0bVE8h3jc= -github.com/klauspost/compress v1.11.3/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= +github.com/klauspost/compress v1.11.13 h1:eSvu8Tmq6j2psUJqJrLcWH6K3w5Dwc+qipbaA6eVEN4= +github.com/klauspost/compress v1.11.13/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= @@ -298,6 +297,8 @@ github.com/opencontainers/runtime-spec v1.0.3-0.20200929063507-e6143ca7d51d/go.m github.com/opencontainers/runtime-tools v0.0.0-20181011054405-1d69bd0f9c39/go.mod h1:r3f7wjNzSs2extwzU3Y+6pKfobzPh+kKFJ3ofN+3nfs= github.com/opencontainers/selinux v1.8.0 h1:+77ba4ar4jsCbL1GLbFL8fFM57w6suPfSS9PDLDY7KM= github.com/opencontainers/selinux v1.8.0/go.mod h1:RScLhm78qiWa2gbVCcGkC7tCGdgk3ogry1nUQF8Evvo= +github.com/pelletier/go-toml v1.8.1 h1:1Nf83orprkJyknT6h7zbuEGUEjcyVlCxSUGTENmNCRM= +github.com/pelletier/go-toml v1.8.1/go.mod h1:T2/BmBdy8dvIRq1a/8aqjN41wvWlN4lrapLU/GW4pbc= github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= diff --git a/vendor/github.com/klauspost/compress/fse/compress.go b/vendor/github.com/klauspost/compress/fse/compress.go index b69237c9b..0d31f5ebc 100644 --- a/vendor/github.com/klauspost/compress/fse/compress.go +++ b/vendor/github.com/klauspost/compress/fse/compress.go @@ -301,7 +301,7 @@ func (s *Scratch) writeCount() error { out[outP+1] = byte(bitStream >> 8) outP += (bitCount + 7) / 8 - if uint16(charnum) > s.symbolLen { + if charnum > s.symbolLen { return errors.New("internal error: charnum > s.symbolLen") } s.Out = out[:outP] @@ -331,7 +331,7 @@ type cTable struct { func (s *Scratch) allocCtable() { tableSize := 1 << s.actualTableLog // get tableSymbol that is big enough. - if cap(s.ct.tableSymbol) < int(tableSize) { + if cap(s.ct.tableSymbol) < tableSize { s.ct.tableSymbol = make([]byte, tableSize) } s.ct.tableSymbol = s.ct.tableSymbol[:tableSize] @@ -565,8 +565,8 @@ func (s *Scratch) normalizeCount2() error { distributed uint32 total = uint32(s.br.remain()) tableLog = s.actualTableLog - lowThreshold = uint32(total >> tableLog) - lowOne = uint32((total * 3) >> (tableLog + 1)) + lowThreshold = total >> tableLog + lowOne = (total * 3) >> (tableLog + 1) ) for i, cnt := range s.count[:s.symbolLen] { if cnt == 0 { @@ -591,7 +591,7 @@ func (s *Scratch) normalizeCount2() error { if (total / toDistribute) > lowOne { // risk of rounding to zero - lowOne = uint32((total * 3) / (toDistribute * 2)) + lowOne = (total * 3) / (toDistribute * 2) for i, cnt := range s.count[:s.symbolLen] { if (s.norm[i] == notYetAssigned) && (cnt <= lowOne) { s.norm[i] = 1 diff --git a/vendor/github.com/klauspost/compress/fse/decompress.go b/vendor/github.com/klauspost/compress/fse/decompress.go index 413ec3b3c..926f5f153 100644 --- a/vendor/github.com/klauspost/compress/fse/decompress.go +++ b/vendor/github.com/klauspost/compress/fse/decompress.go @@ -172,7 +172,7 @@ type decSymbol struct { // allocDtable will allocate decoding tables if they are not big enough. func (s *Scratch) allocDtable() { tableSize := 1 << s.actualTableLog - if cap(s.decTable) < int(tableSize) { + if cap(s.decTable) < tableSize { s.decTable = make([]decSymbol, tableSize) } s.decTable = s.decTable[:tableSize] @@ -340,7 +340,7 @@ type decoder struct { func (d *decoder) init(in *bitReader, dt []decSymbol, tableLog uint8) { d.dt = dt d.br = in - d.state = uint16(in.getBits(tableLog)) + d.state = in.getBits(tableLog) } // next returns the next symbol and sets the next state. diff --git a/vendor/github.com/klauspost/compress/huff0/README.md b/vendor/github.com/klauspost/compress/huff0/README.md index e12da4db2..8b6e5c663 100644 --- a/vendor/github.com/klauspost/compress/huff0/README.md +++ b/vendor/github.com/klauspost/compress/huff0/README.md @@ -14,7 +14,9 @@ but it can be used as a secondary step to compressors (like Snappy) that does no ## News - * Mar 2018: First implementation released. Consider this beta software for now. +This is used as part of the [zstandard](https://github.com/klauspost/compress/tree/master/zstd#zstd) compression and decompression package. + +This ensures that most functionality is well tested. # Usage diff --git a/vendor/github.com/klauspost/compress/huff0/compress.go b/vendor/github.com/klauspost/compress/huff0/compress.go index f9ed5f830..dea115b33 100644 --- a/vendor/github.com/klauspost/compress/huff0/compress.go +++ b/vendor/github.com/klauspost/compress/huff0/compress.go @@ -403,7 +403,7 @@ func (s *Scratch) buildCTable() error { var startNode = int16(s.symbolLen) nonNullRank := s.symbolLen - 1 - nodeNb := int16(startNode) + nodeNb := startNode huffNode := s.nodes[1 : huffNodesLen+1] // This overlays the slice above, but allows "-1" index lookups. @@ -580,7 +580,7 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 { // Get pos of last (smallest) symbol per rank { - currentNbBits := uint8(maxNbBits) + currentNbBits := maxNbBits for pos := int(n); pos >= 0; pos-- { if huffNode[pos].nbBits >= currentNbBits { continue diff --git a/vendor/github.com/klauspost/compress/snappy/snappy.go b/vendor/github.com/klauspost/compress/snappy/snappy.go index 74a36689e..ea58ced88 100644 --- a/vendor/github.com/klauspost/compress/snappy/snappy.go +++ b/vendor/github.com/klauspost/compress/snappy/snappy.go @@ -94,5 +94,5 @@ var crcTable = crc32.MakeTable(crc32.Castagnoli) // https://github.com/google/snappy/blob/master/framing_format.txt func crc(b []byte) uint32 { c := crc32.Update(0, crcTable, b) - return uint32(c>>15|c<<17) + 0xa282ead8 + return c>>15 | c<<17 + 0xa282ead8 } diff --git a/vendor/github.com/klauspost/compress/zstd/README.md b/vendor/github.com/klauspost/compress/zstd/README.md index 08e553f75..7680bfe1d 100644 --- a/vendor/github.com/klauspost/compress/zstd/README.md +++ b/vendor/github.com/klauspost/compress/zstd/README.md @@ -24,22 +24,21 @@ Godoc Documentation: https://godoc.org/github.com/klauspost/compress/zstd ### Status: STABLE - there may always be subtle bugs, a wide variety of content has been tested and the library is actively -used by several projects. This library is being continuously [fuzz-tested](https://github.com/klauspost/compress-fuzz), -kindly supplied by [fuzzit.dev](https://fuzzit.dev/). +used by several projects. This library is being [fuzz-tested](https://github.com/klauspost/compress-fuzz) for all updates. There may still be specific combinations of data types/size/settings that could lead to edge cases, so as always, testing is recommended. For now, a high speed (fastest) and medium-fast (default) compressor has been implemented. -The "Fastest" compression ratio is roughly equivalent to zstd level 1. -The "Default" compression ratio is roughly equivalent to zstd level 3 (default). +* The "Fastest" compression ratio is roughly equivalent to zstd level 1. +* The "Default" compression ratio is roughly equivalent to zstd level 3 (default). +* The "Better" compression ratio is roughly equivalent to zstd level 7. +* The "Best" compression ratio is roughly equivalent to zstd level 11. In terms of speed, it is typically 2x as fast as the stdlib deflate/gzip in its fastest mode. The compression ratio compared to stdlib is around level 3, but usually 3x as fast. -Compared to cgo zstd, the speed is around level 3 (default), but compression slightly worse, between level 1&2. - ### Usage @@ -140,7 +139,7 @@ I have collected some speed examples to compare speed and compression against ot * `file` is the input file. * `out` is the compressor used. `zskp` is this package. `zstd` is the Datadog cgo library. `gzstd/gzkp` is gzip standard and this library. -* `level` is the compression level used. For `zskp` level 1 is "fastest", level 2 is "default". +* `level` is the compression level used. For `zskp` level 1 is "fastest", level 2 is "default"; 3 is "better", 4 is "best". * `insize`/`outsize` is the input/output size. * `millis` is the number of milliseconds used for compression. * `mb/s` is megabytes (2^20 bytes) per second. @@ -154,11 +153,13 @@ file out level insize outsize millis mb/s silesia.tar zskp 1 211947520 73101992 643 313.87 silesia.tar zskp 2 211947520 67504318 969 208.38 silesia.tar zskp 3 211947520 65177448 1899 106.44 +silesia.tar zskp 4 211947520 61381950 8115 24.91 cgo zstd: silesia.tar zstd 1 211947520 73605392 543 371.56 silesia.tar zstd 3 211947520 66793289 864 233.68 silesia.tar zstd 6 211947520 62916450 1913 105.66 +silesia.tar zstd 9 211947520 60212393 5063 39.92 gzip, stdlib/this package: silesia.tar gzstd 1 211947520 80007735 1654 122.21 @@ -171,9 +172,11 @@ file out level insize outsize millis mb/s gob-stream zskp 1 1911399616 235022249 3088 590.30 gob-stream zskp 2 1911399616 205669791 3786 481.34 gob-stream zskp 3 1911399616 185792019 9324 195.48 +gob-stream zskp 4 1911399616 171537212 32113 56.76 gob-stream zstd 1 1911399616 249810424 2637 691.26 gob-stream zstd 3 1911399616 208192146 3490 522.31 gob-stream zstd 6 1911399616 193632038 6687 272.56 +gob-stream zstd 9 1911399616 177620386 16175 112.70 gob-stream gzstd 1 1911399616 357382641 10251 177.82 gob-stream gzkp 1 1911399616 362156523 5695 320.08 @@ -185,9 +188,11 @@ file out level insize outsize millis mb/s enwik9 zskp 1 1000000000 343848582 3609 264.18 enwik9 zskp 2 1000000000 317276632 5746 165.97 enwik9 zskp 3 1000000000 294540704 11725 81.34 +enwik9 zskp 4 1000000000 276609671 44029 21.66 enwik9 zstd 1 1000000000 358072021 3110 306.65 enwik9 zstd 3 1000000000 313734672 4784 199.35 enwik9 zstd 6 1000000000 295138875 10290 92.68 +enwik9 zstd 9 1000000000 278348700 28549 33.40 enwik9 gzstd 1 1000000000 382578136 9604 99.30 enwik9 gzkp 1 1000000000 383825945 6544 145.73 @@ -198,9 +203,11 @@ file out level insize outsize millis mb/s github-june-2days-2019.json zskp 1 6273951764 699045015 10620 563.40 github-june-2days-2019.json zskp 2 6273951764 617881763 11687 511.96 github-june-2days-2019.json zskp 3 6273951764 537511906 29252 204.54 +github-june-2days-2019.json zskp 4 6273951764 512796117 97791 61.18 github-june-2days-2019.json zstd 1 6273951764 766284037 8450 708.00 github-june-2days-2019.json zstd 3 6273951764 661889476 10927 547.57 github-june-2days-2019.json zstd 6 6273951764 642756859 22996 260.18 +github-june-2days-2019.json zstd 9 6273951764 601974523 52413 114.16 github-june-2days-2019.json gzstd 1 6273951764 1164400847 29948 199.79 github-june-2days-2019.json gzkp 1 6273951764 1128755542 19236 311.03 @@ -211,9 +218,11 @@ file out level insize outsize millis mb/s rawstudio-mint14.tar zskp 1 8558382592 3667489370 20210 403.84 rawstudio-mint14.tar zskp 2 8558382592 3364592300 31873 256.07 rawstudio-mint14.tar zskp 3 8558382592 3224594213 71751 113.75 +rawstudio-mint14.tar zskp 4 8558382592 3027332295 486243 16.79 rawstudio-mint14.tar zstd 1 8558382592 3609250104 17136 476.27 rawstudio-mint14.tar zstd 3 8558382592 3341679997 29262 278.92 rawstudio-mint14.tar zstd 6 8558382592 3235846406 77904 104.77 +rawstudio-mint14.tar zstd 9 8558382592 3160778861 140946 57.91 rawstudio-mint14.tar gzstd 1 8558382592 3926257486 57722 141.40 rawstudio-mint14.tar gzkp 1 8558382592 3970463184 41749 195.49 @@ -224,9 +233,11 @@ file out level insize outsize millis mb/s nyc-taxi-data-10M.csv zskp 1 3325605752 641339945 8925 355.35 nyc-taxi-data-10M.csv zskp 2 3325605752 591748091 11268 281.44 nyc-taxi-data-10M.csv zskp 3 3325605752 538490114 19880 159.53 +nyc-taxi-data-10M.csv zskp 4 3325605752 495986829 89368 35.49 nyc-taxi-data-10M.csv zstd 1 3325605752 687399637 8233 385.18 nyc-taxi-data-10M.csv zstd 3 3325605752 598514411 10065 315.07 nyc-taxi-data-10M.csv zstd 6 3325605752 570522953 20038 158.27 +nyc-taxi-data-10M.csv zstd 9 3325605752 517554797 64565 49.12 nyc-taxi-data-10M.csv gzstd 1 3325605752 928656485 23876 132.83 nyc-taxi-data-10M.csv gzkp 1 3325605752 924718719 16388 193.53 ``` diff --git a/vendor/github.com/klauspost/compress/zstd/blockdec.go b/vendor/github.com/klauspost/compress/zstd/blockdec.go index 4733ea876..b51d922bd 100644 --- a/vendor/github.com/klauspost/compress/zstd/blockdec.go +++ b/vendor/github.com/klauspost/compress/zstd/blockdec.go @@ -613,7 +613,7 @@ func (b *blockDec) decodeCompressed(hist *history) error { // Decode treeless literal block. if litType == literalsBlockTreeless { // TODO: We could send the history early WITHOUT the stream history. - // This would allow decoding treeless literials before the byte history is available. + // This would allow decoding treeless literals before the byte history is available. // Silencia stats: Treeless 4393, with: 32775, total: 37168, 11% treeless. // So not much obvious gain here. diff --git a/vendor/github.com/klauspost/compress/zstd/blockenc.go b/vendor/github.com/klauspost/compress/zstd/blockenc.go index 083fbb502..9647c64e5 100644 --- a/vendor/github.com/klauspost/compress/zstd/blockenc.go +++ b/vendor/github.com/klauspost/compress/zstd/blockenc.go @@ -22,28 +22,44 @@ type blockEnc struct { dictLitEnc *huff0.Scratch wr bitWriter - extraLits int - last bool - + extraLits int output []byte recentOffsets [3]uint32 prevRecentOffsets [3]uint32 + + last bool + lowMem bool } // init should be used once the block has been created. // If called more than once, the effect is the same as calling reset. func (b *blockEnc) init() { - if cap(b.literals) < maxCompressedLiteralSize { - b.literals = make([]byte, 0, maxCompressedLiteralSize) - } - const defSeqs = 200 - b.literals = b.literals[:0] - if cap(b.sequences) < defSeqs { - b.sequences = make([]seq, 0, defSeqs) - } - if cap(b.output) < maxCompressedBlockSize { - b.output = make([]byte, 0, maxCompressedBlockSize) + if b.lowMem { + // 1K literals + if cap(b.literals) < 1<<10 { + b.literals = make([]byte, 0, 1<<10) + } + const defSeqs = 20 + if cap(b.sequences) < defSeqs { + b.sequences = make([]seq, 0, defSeqs) + } + // 1K + if cap(b.output) < 1<<10 { + b.output = make([]byte, 0, 1<<10) + } + } else { + if cap(b.literals) < maxCompressedBlockSize { + b.literals = make([]byte, 0, maxCompressedBlockSize) + } + const defSeqs = 200 + if cap(b.sequences) < defSeqs { + b.sequences = make([]seq, 0, defSeqs) + } + if cap(b.output) < maxCompressedBlockSize { + b.output = make([]byte, 0, maxCompressedBlockSize) + } } + if b.coders.mlEnc == nil { b.coders.mlEnc = &fseEncoder{} b.coders.mlPrev = &fseEncoder{} @@ -76,6 +92,7 @@ func (b *blockEnc) reset(prev *blockEnc) { if prev != nil { b.recentOffsets = prev.prevRecentOffsets } + b.dictLitEnc = nil } // reset will reset the block for a new encode, but in the same stream, diff --git a/vendor/github.com/klauspost/compress/zstd/decodeheader.go b/vendor/github.com/klauspost/compress/zstd/decodeheader.go new file mode 100644 index 000000000..87896c5ea --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/decodeheader.go @@ -0,0 +1,202 @@ +// Copyright 2020+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. + +package zstd + +import ( + "bytes" + "errors" + "io" +) + +// HeaderMaxSize is the maximum size of a Frame and Block Header. +// If less is sent to Header.Decode it *may* still contain enough information. +const HeaderMaxSize = 14 + 3 + +// Header contains information about the first frame and block within that. +type Header struct { + // Window Size the window of data to keep while decoding. + // Will only be set if HasFCS is false. + WindowSize uint64 + + // Frame content size. + // Expected size of the entire frame. + FrameContentSize uint64 + + // Dictionary ID. + // If 0, no dictionary. + DictionaryID uint32 + + // First block information. + FirstBlock struct { + // OK will be set if first block could be decoded. + OK bool + + // Is this the last block of a frame? + Last bool + + // Is the data compressed? + // If true CompressedSize will be populated. + // Unfortunately DecompressedSize cannot be determined + // without decoding the blocks. + Compressed bool + + // DecompressedSize is the expected decompressed size of the block. + // Will be 0 if it cannot be determined. + DecompressedSize int + + // CompressedSize of the data in the block. + // Does not include the block header. + // Will be equal to DecompressedSize if not Compressed. + CompressedSize int + } + + // Skippable will be true if the frame is meant to be skipped. + // No other information will be populated. + Skippable bool + + // If set there is a checksum present for the block content. + HasCheckSum bool + + // If this is true FrameContentSize will have a valid value + HasFCS bool + + SingleSegment bool +} + +// Decode the header from the beginning of the stream. +// This will decode the frame header and the first block header if enough bytes are provided. +// It is recommended to provide at least HeaderMaxSize bytes. +// If the frame header cannot be read an error will be returned. +// If there isn't enough input, io.ErrUnexpectedEOF is returned. +// The FirstBlock.OK will indicate if enough information was available to decode the first block header. +func (h *Header) Decode(in []byte) error { + if len(in) < 4 { + return io.ErrUnexpectedEOF + } + b, in := in[:4], in[4:] + if !bytes.Equal(b, frameMagic) { + if !bytes.Equal(b[1:4], skippableFrameMagic) || b[0]&0xf0 != 0x50 { + return ErrMagicMismatch + } + *h = Header{Skippable: true} + return nil + } + if len(in) < 1 { + return io.ErrUnexpectedEOF + } + + // Clear output + *h = Header{} + fhd, in := in[0], in[1:] + h.SingleSegment = fhd&(1<<5) != 0 + h.HasCheckSum = fhd&(1<<2) != 0 + + if fhd&(1<<3) != 0 { + return errors.New("Reserved bit set on frame header") + } + + // Read Window_Descriptor + // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor + if !h.SingleSegment { + if len(in) < 1 { + return io.ErrUnexpectedEOF + } + var wd byte + wd, in = in[0], in[1:] + windowLog := 10 + (wd >> 3) + windowBase := uint64(1) << windowLog + windowAdd := (windowBase / 8) * uint64(wd&0x7) + h.WindowSize = windowBase + windowAdd + } + + // Read Dictionary_ID + // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary_id + if size := fhd & 3; size != 0 { + if size == 3 { + size = 4 + } + if len(in) < int(size) { + return io.ErrUnexpectedEOF + } + b, in = in[:size], in[size:] + if b == nil { + return io.ErrUnexpectedEOF + } + switch size { + case 1: + h.DictionaryID = uint32(b[0]) + case 2: + h.DictionaryID = uint32(b[0]) | (uint32(b[1]) << 8) + case 4: + h.DictionaryID = uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24) + } + } + + // Read Frame_Content_Size + // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame_content_size + var fcsSize int + v := fhd >> 6 + switch v { + case 0: + if h.SingleSegment { + fcsSize = 1 + } + default: + fcsSize = 1 << v + } + + if fcsSize > 0 { + h.HasFCS = true + if len(in) < fcsSize { + return io.ErrUnexpectedEOF + } + b, in = in[:fcsSize], in[fcsSize:] + if b == nil { + return io.ErrUnexpectedEOF + } + switch fcsSize { + case 1: + h.FrameContentSize = uint64(b[0]) + case 2: + // When FCS_Field_Size is 2, the offset of 256 is added. + h.FrameContentSize = uint64(b[0]) | (uint64(b[1]) << 8) + 256 + case 4: + h.FrameContentSize = uint64(b[0]) | (uint64(b[1]) << 8) | (uint64(b[2]) << 16) | (uint64(b[3]) << 24) + case 8: + d1 := uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24) + d2 := uint32(b[4]) | (uint32(b[5]) << 8) | (uint32(b[6]) << 16) | (uint32(b[7]) << 24) + h.FrameContentSize = uint64(d1) | (uint64(d2) << 32) + } + } + + // Frame Header done, we will not fail from now on. + if len(in) < 3 { + return nil + } + tmp, in := in[:3], in[3:] + bh := uint32(tmp[0]) | (uint32(tmp[1]) << 8) | (uint32(tmp[2]) << 16) + h.FirstBlock.Last = bh&1 != 0 + blockType := blockType((bh >> 1) & 3) + // find size. + cSize := int(bh >> 3) + switch blockType { + case blockTypeReserved: + return nil + case blockTypeRLE: + h.FirstBlock.Compressed = true + h.FirstBlock.DecompressedSize = cSize + h.FirstBlock.CompressedSize = 1 + case blockTypeCompressed: + h.FirstBlock.Compressed = true + h.FirstBlock.CompressedSize = cSize + case blockTypeRaw: + h.FirstBlock.DecompressedSize = cSize + h.FirstBlock.CompressedSize = cSize + default: + panic("Invalid block type") + } + + h.FirstBlock.OK = true + return nil +} diff --git a/vendor/github.com/klauspost/compress/zstd/decoder.go b/vendor/github.com/klauspost/compress/zstd/decoder.go index cdda0de58..1d41c25d2 100644 --- a/vendor/github.com/klauspost/compress/zstd/decoder.go +++ b/vendor/github.com/klauspost/compress/zstd/decoder.go @@ -5,7 +5,6 @@ package zstd import ( - "bytes" "errors" "io" "sync" @@ -85,6 +84,10 @@ func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) { d.current.output = make(chan decodeOutput, d.o.concurrent) d.current.flushed = true + if r == nil { + d.current.err = ErrDecoderNilInput + } + // Transfer option dicts. d.dicts = make(map[uint32]dict, len(d.o.dicts)) for _, dc := range d.o.dicts { @@ -111,7 +114,7 @@ func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) { // When the stream is done, io.EOF will be returned. func (d *Decoder) Read(p []byte) (int, error) { if d.stream == nil { - return 0, errors.New("no input has been initialized") + return 0, ErrDecoderNilInput } var n int for { @@ -152,12 +155,20 @@ func (d *Decoder) Read(p []byte) (int, error) { // Reset will reset the decoder the supplied stream after the current has finished processing. // Note that this functionality cannot be used after Close has been called. +// Reset can be called with a nil reader to release references to the previous reader. +// After being called with a nil reader, no other operations than Reset or DecodeAll or Close +// should be used. func (d *Decoder) Reset(r io.Reader) error { if d.current.err == ErrDecoderClosed { return d.current.err } + + d.drainOutput() + if r == nil { - return errors.New("nil Reader sent as input") + d.current.err = ErrDecoderNilInput + d.current.flushed = true + return nil } if d.stream == nil { @@ -166,14 +177,14 @@ func (d *Decoder) Reset(r io.Reader) error { go d.startStreamDecoder(d.stream) } - d.drainOutput() - // If bytes buffer and < 1MB, do sync decoding anyway. - if bb, ok := r.(*bytes.Buffer); ok && bb.Len() < 1<<20 { + if bb, ok := r.(byter); ok && bb.Len() < 1<<20 { + var bb2 byter + bb2 = bb if debug { println("*bytes.Buffer detected, doing sync decode, len:", bb.Len()) } - b := bb.Bytes() + b := bb2.Bytes() var dst []byte if cap(d.current.b) > 0 { dst = d.current.b @@ -249,7 +260,7 @@ func (d *Decoder) drainOutput() { // Any error encountered during the write is also returned. func (d *Decoder) WriteTo(w io.Writer) (int64, error) { if d.stream == nil { - return 0, errors.New("no input has been initialized") + return 0, ErrDecoderNilInput } var n int64 for { diff --git a/vendor/github.com/klauspost/compress/zstd/enc_base.go b/vendor/github.com/klauspost/compress/zstd/enc_base.go index b1b7c6e6a..2d4d893eb 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_base.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_base.go @@ -7,6 +7,10 @@ import ( "github.com/klauspost/compress/zstd/internal/xxhash" ) +const ( + dictShardBits = 6 +) + type fastBase struct { // cur is the offset at the start of hist cur int32 @@ -17,6 +21,7 @@ type fastBase struct { tmp [8]byte blk *blockEnc lastDictID uint32 + lowMem bool } // CRC returns the underlying CRC writer. @@ -57,15 +62,10 @@ func (e *fastBase) addBlock(src []byte) int32 { // check if we have space already if len(e.hist)+len(src) > cap(e.hist) { if cap(e.hist) == 0 { - l := e.maxMatchOff * 2 - // Make it at least 1MB. - if l < 1<<20 { - l = 1 << 20 - } - e.hist = make([]byte, 0, l) + e.ensureHist(len(src)) } else { - if cap(e.hist) < int(e.maxMatchOff*2) { - panic("unexpected buffer size") + if cap(e.hist) < int(e.maxMatchOff+maxCompressedBlockSize) { + panic(fmt.Errorf("unexpected buffer cap %d, want at least %d with window %d", cap(e.hist), e.maxMatchOff+maxCompressedBlockSize, e.maxMatchOff)) } // Move down offset := int32(len(e.hist)) - e.maxMatchOff @@ -79,6 +79,28 @@ func (e *fastBase) addBlock(src []byte) int32 { return s } +// ensureHist will ensure that history can keep at least this many bytes. +func (e *fastBase) ensureHist(n int) { + if cap(e.hist) >= n { + return + } + l := e.maxMatchOff + if (e.lowMem && e.maxMatchOff > maxCompressedBlockSize) || e.maxMatchOff <= maxCompressedBlockSize { + l += maxCompressedBlockSize + } else { + l += e.maxMatchOff + } + // Make it at least 1MB. + if l < 1<<20 && !e.lowMem { + l = 1 << 20 + } + // Make it at least the requested size. + if l < int32(n) { + l = int32(n) + } + e.hist = make([]byte, 0, l) +} + // useBlock will replace the block with the provided one, // but transfer recent offsets from the previous. func (e *fastBase) UseBlock(enc *blockEnc) { @@ -117,7 +139,7 @@ func (e *fastBase) matchlen(s, t int32, src []byte) int32 { // Reset the encoding table. func (e *fastBase) resetBase(d *dict, singleBlock bool) { if e.blk == nil { - e.blk = &blockEnc{} + e.blk = &blockEnc{lowMem: e.lowMem} e.blk.init() } else { e.blk.reset(nil) diff --git a/vendor/github.com/klauspost/compress/zstd/enc_best.go b/vendor/github.com/klauspost/compress/zstd/enc_best.go new file mode 100644 index 000000000..fe3625c5f --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/enc_best.go @@ -0,0 +1,487 @@ +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. +// Based on work by Yann Collet, released under BSD License. + +package zstd + +import ( + "fmt" + "math/bits" +) + +const ( + bestLongTableBits = 20 // Bits used in the long match table + bestLongTableSize = 1 << bestLongTableBits // Size of the table + + // Note: Increasing the short table bits or making the hash shorter + // can actually lead to compression degradation since it will 'steal' more from the + // long match table and match offsets are quite big. + // This greatly depends on the type of input. + bestShortTableBits = 16 // Bits used in the short match table + bestShortTableSize = 1 << bestShortTableBits // Size of the table +) + +// bestFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches. +// The long match table contains the previous entry with the same hash, +// effectively making it a "chain" of length 2. +// When we find a long match we choose between the two values and select the longest. +// When we find a short match, after checking the long, we check if we can find a long at n+1 +// and that it is longer (lazy matching). +type bestFastEncoder struct { + fastBase + table [bestShortTableSize]prevEntry + longTable [bestLongTableSize]prevEntry + dictTable []prevEntry + dictLongTable []prevEntry +} + +// Encode improves compression... +func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) { + const ( + // Input margin is the number of bytes we read (8) + // and the maximum we will read ahead (2) + inputMargin = 8 + 4 + minNonLiteralBlockSize = 16 + ) + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = prevEntry{} + } + for i := range e.longTable[:] { + e.longTable[i] = prevEntry{} + } + e.cur = e.maxMatchOff + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff + for i := range e.table[:] { + v := e.table[i].offset + v2 := e.table[i].prev + if v < minOff { + v = 0 + v2 = 0 + } else { + v = v - e.cur + e.maxMatchOff + if v2 < minOff { + v2 = 0 + } else { + v2 = v2 - e.cur + e.maxMatchOff + } + } + e.table[i] = prevEntry{ + offset: v, + prev: v2, + } + } + for i := range e.longTable[:] { + v := e.longTable[i].offset + v2 := e.longTable[i].prev + if v < minOff { + v = 0 + v2 = 0 + } else { + v = v - e.cur + e.maxMatchOff + if v2 < minOff { + v2 = 0 + } else { + v2 = v2 - e.cur + e.maxMatchOff + } + } + e.longTable[i] = prevEntry{ + offset: v, + prev: v2, + } + } + e.cur = e.maxMatchOff + break + } + + s := e.addBlock(src) + blk.size = len(src) + if len(src) < minNonLiteralBlockSize { + blk.extraLits = len(src) + blk.literals = blk.literals[:len(src)] + copy(blk.literals, src) + return + } + + // Override src + src = e.hist + sLimit := int32(len(src)) - inputMargin + const kSearchStrength = 10 + + // nextEmit is where in src the next emitLiteral should start from. + nextEmit := s + cv := load6432(src, s) + + // Relative offsets + offset1 := int32(blk.recentOffsets[0]) + offset2 := int32(blk.recentOffsets[1]) + offset3 := int32(blk.recentOffsets[2]) + + addLiterals := func(s *seq, until int32) { + if until == nextEmit { + return + } + blk.literals = append(blk.literals, src[nextEmit:until]...) + s.litLen = uint32(until - nextEmit) + } + _ = addLiterals + + if debug { + println("recent offsets:", blk.recentOffsets) + } + +encodeLoop: + for { + // We allow the encoder to optionally turn off repeat offsets across blocks + canRepeat := len(blk.sequences) > 2 + + if debugAsserts && canRepeat && offset1 == 0 { + panic("offset0 was 0") + } + + type match struct { + offset int32 + s int32 + length int32 + rep int32 + } + matchAt := func(offset int32, s int32, first uint32, rep int32) match { + if s-offset >= e.maxMatchOff || load3232(src, offset) != first { + return match{offset: offset, s: s} + } + return match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep} + } + + bestOf := func(a, b match) match { + aScore := b.s - a.s + a.length + bScore := a.s - b.s + b.length + if a.rep < 0 { + aScore = aScore - int32(bits.Len32(uint32(a.offset)))/8 + } + if b.rep < 0 { + bScore = bScore - int32(bits.Len32(uint32(b.offset)))/8 + } + if aScore >= bScore { + return a + } + return b + } + const goodEnough = 100 + + nextHashL := hash8(cv, bestLongTableBits) + nextHashS := hash4x64(cv, bestShortTableBits) + candidateL := e.longTable[nextHashL] + candidateS := e.table[nextHashS] + + best := bestOf(matchAt(candidateL.offset-e.cur, s, uint32(cv), -1), matchAt(candidateL.prev-e.cur, s, uint32(cv), -1)) + best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1)) + best = bestOf(best, matchAt(candidateS.prev-e.cur, s, uint32(cv), -1)) + if canRepeat && best.length < goodEnough { + best = bestOf(best, matchAt(s-offset1+1, s+1, uint32(cv>>8), 1)) + best = bestOf(best, matchAt(s-offset2+1, s+1, uint32(cv>>8), 2)) + best = bestOf(best, matchAt(s-offset3+1, s+1, uint32(cv>>8), 3)) + if best.length > 0 { + best = bestOf(best, matchAt(s-offset1+3, s+3, uint32(cv>>24), 1)) + best = bestOf(best, matchAt(s-offset2+3, s+3, uint32(cv>>24), 2)) + best = bestOf(best, matchAt(s-offset3+3, s+3, uint32(cv>>24), 3)) + } + } + // Load next and check... + e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: candidateL.offset} + e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: candidateS.offset} + + // Look far ahead, unless we have a really long match already... + if best.length < goodEnough { + // No match found, move forward on input, no need to check forward... + if best.length < 4 { + s += 1 + (s-nextEmit)>>(kSearchStrength-1) + if s >= sLimit { + break encodeLoop + } + cv = load6432(src, s) + continue + } + + s++ + candidateS = e.table[hash4x64(cv>>8, bestShortTableBits)] + cv = load6432(src, s) + cv2 := load6432(src, s+1) + candidateL = e.longTable[hash8(cv, bestLongTableBits)] + candidateL2 := e.longTable[hash8(cv2, bestLongTableBits)] + + best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1)) + best = bestOf(best, matchAt(candidateL.offset-e.cur, s, uint32(cv), -1)) + best = bestOf(best, matchAt(candidateL.prev-e.cur, s, uint32(cv), -1)) + best = bestOf(best, matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1)) + best = bestOf(best, matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1)) + } + + // We have a match, we can store the forward value + if best.rep > 0 { + s = best.s + var seq seq + seq.matchLen = uint32(best.length - zstdMinMatch) + + // We might be able to match backwards. + // Extend as long as we can. + start := best.s + // We end the search early, so we don't risk 0 literals + // and have to do special offset treatment. + startLimit := nextEmit + 1 + + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + repIndex := best.offset + for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { + repIndex-- + start-- + seq.matchLen++ + } + addLiterals(&seq, start) + + // rep 0 + seq.offset = uint32(best.rep) + if debugSequences { + println("repeat sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + + // Index match start+1 (long) -> s - 1 + index0 := s + s = best.s + best.length + + nextEmit = s + if s >= sLimit { + if debug { + println("repeat ended", s, best.length) + + } + break encodeLoop + } + // Index skipped... + off := index0 + e.cur + for index0 < s-1 { + cv0 := load6432(src, index0) + h0 := hash8(cv0, bestLongTableBits) + h1 := hash4x64(cv0, bestShortTableBits) + e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} + e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset} + off++ + index0++ + } + switch best.rep { + case 2: + offset1, offset2 = offset2, offset1 + case 3: + offset1, offset2, offset3 = offset3, offset1, offset2 + } + cv = load6432(src, s) + continue + } + + // A 4-byte match has been found. Update recent offsets. + // We'll later see if more than 4 bytes. + s = best.s + t := best.offset + offset1, offset2, offset3 = s-t, offset1, offset2 + + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + + if debugAsserts && canRepeat && int(offset1) > len(src) { + panic("invalid offset") + } + + // Extend the n-byte match as long as possible. + l := best.length + + // Extend backwards + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength { + s-- + t-- + l++ + } + + // Write our sequence + var seq seq + seq.litLen = uint32(s - nextEmit) + seq.matchLen = uint32(l - zstdMinMatch) + if seq.litLen > 0 { + blk.literals = append(blk.literals, src[nextEmit:s]...) + } + seq.offset = uint32(s-t) + 3 + s += l + if debugSequences { + println("sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + nextEmit = s + if s >= sLimit { + break encodeLoop + } + + // Index match start+1 (long) -> s - 1 + index0 := s - l + 1 + // every entry + for index0 < s-1 { + cv0 := load6432(src, index0) + h0 := hash8(cv0, bestLongTableBits) + h1 := hash4x64(cv0, bestShortTableBits) + off := index0 + e.cur + e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} + e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset} + index0++ + } + + cv = load6432(src, s) + if !canRepeat { + continue + } + + // Check offset 2 + for { + o2 := s - offset2 + if load3232(src, o2) != uint32(cv) { + // Do regular search + break + } + + // Store this, since we have it. + nextHashS := hash4x64(cv, bestShortTableBits) + nextHashL := hash8(cv, bestLongTableBits) + + // We have at least 4 byte match. + // No need to check backwards. We come straight from a match + l := 4 + e.matchlen(s+4, o2+4, src) + + e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset} + e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: e.table[nextHashS].offset} + seq.matchLen = uint32(l) - zstdMinMatch + seq.litLen = 0 + + // Since litlen is always 0, this is offset 1. + seq.offset = 1 + s += l + nextEmit = s + if debugSequences { + println("sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + + // Swap offset 1 and 2. + offset1, offset2 = offset2, offset1 + if s >= sLimit { + // Finished + break encodeLoop + } + cv = load6432(src, s) + } + } + + if int(nextEmit) < len(src) { + blk.literals = append(blk.literals, src[nextEmit:]...) + blk.extraLits = len(src) - int(nextEmit) + } + blk.recentOffsets[0] = uint32(offset1) + blk.recentOffsets[1] = uint32(offset2) + blk.recentOffsets[2] = uint32(offset3) + if debug { + println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) + } +} + +// EncodeNoHist will encode a block with no history and no following blocks. +// Most notable difference is that src will not be copied for history and +// we do not need to check for max match length. +func (e *bestFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { + e.ensureHist(len(src)) + e.Encode(blk, src) +} + +// ResetDict will reset and set a dictionary if not nil +func (e *bestFastEncoder) Reset(d *dict, singleBlock bool) { + e.resetBase(d, singleBlock) + if d == nil { + return + } + // Init or copy dict table + if len(e.dictTable) != len(e.table) || d.id != e.lastDictID { + if len(e.dictTable) != len(e.table) { + e.dictTable = make([]prevEntry, len(e.table)) + } + end := int32(len(d.content)) - 8 + e.maxMatchOff + for i := e.maxMatchOff; i < end; i += 4 { + const hashLog = bestShortTableBits + + cv := load6432(d.content, i-e.maxMatchOff) + nextHash := hash4x64(cv, hashLog) // 0 -> 4 + nextHash1 := hash4x64(cv>>8, hashLog) // 1 -> 5 + nextHash2 := hash4x64(cv>>16, hashLog) // 2 -> 6 + nextHash3 := hash4x64(cv>>24, hashLog) // 3 -> 7 + e.dictTable[nextHash] = prevEntry{ + prev: e.dictTable[nextHash].offset, + offset: i, + } + e.dictTable[nextHash1] = prevEntry{ + prev: e.dictTable[nextHash1].offset, + offset: i + 1, + } + e.dictTable[nextHash2] = prevEntry{ + prev: e.dictTable[nextHash2].offset, + offset: i + 2, + } + e.dictTable[nextHash3] = prevEntry{ + prev: e.dictTable[nextHash3].offset, + offset: i + 3, + } + } + e.lastDictID = d.id + } + + // Init or copy dict table + if len(e.dictLongTable) != len(e.longTable) || d.id != e.lastDictID { + if len(e.dictLongTable) != len(e.longTable) { + e.dictLongTable = make([]prevEntry, len(e.longTable)) + } + if len(d.content) >= 8 { + cv := load6432(d.content, 0) + h := hash8(cv, bestLongTableBits) + e.dictLongTable[h] = prevEntry{ + offset: e.maxMatchOff, + prev: e.dictLongTable[h].offset, + } + + end := int32(len(d.content)) - 8 + e.maxMatchOff + off := 8 // First to read + for i := e.maxMatchOff + 1; i < end; i++ { + cv = cv>>8 | (uint64(d.content[off]) << 56) + h := hash8(cv, bestLongTableBits) + e.dictLongTable[h] = prevEntry{ + offset: i, + prev: e.dictLongTable[h].offset, + } + off++ + } + } + e.lastDictID = d.id + } + // Reset table to initial state + copy(e.longTable[:], e.dictLongTable) + + e.cur = e.maxMatchOff + // Reset table to initial state + copy(e.table[:], e.dictTable) +} diff --git a/vendor/github.com/klauspost/compress/zstd/enc_better.go b/vendor/github.com/klauspost/compress/zstd/enc_better.go index 94a5343d0..c2ce4a2ba 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_better.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_better.go @@ -16,6 +16,12 @@ const ( // This greatly depends on the type of input. betterShortTableBits = 13 // Bits used in the short match table betterShortTableSize = 1 << betterShortTableBits // Size of the table + + betterLongTableShardCnt = 1 << (betterLongTableBits - dictShardBits) // Number of shards in the table + betterLongTableShardSize = betterLongTableSize / betterLongTableShardCnt // Size of an individual shard + + betterShortTableShardCnt = 1 << (betterShortTableBits - dictShardBits) // Number of shards in the table + betterShortTableShardSize = betterShortTableSize / betterShortTableShardCnt // Size of an individual shard ) type prevEntry struct { @@ -31,10 +37,17 @@ type prevEntry struct { // and that it is longer (lazy matching). type betterFastEncoder struct { fastBase - table [betterShortTableSize]tableEntry - longTable [betterLongTableSize]prevEntry - dictTable []tableEntry - dictLongTable []prevEntry + table [betterShortTableSize]tableEntry + longTable [betterLongTableSize]prevEntry +} + +type betterFastEncoderDict struct { + betterFastEncoder + dictTable []tableEntry + dictLongTable []prevEntry + shortTableShardDirty [betterShortTableShardCnt]bool + longTableShardDirty [betterLongTableShardCnt]bool + allDirty bool } // Encode improves compression... @@ -516,11 +529,511 @@ encodeLoop: // Most notable difference is that src will not be copied for history and // we do not need to check for max match length. func (e *betterFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { + e.ensureHist(len(src)) e.Encode(blk, src) } +// Encode improves compression... +func (e *betterFastEncoderDict) Encode(blk *blockEnc, src []byte) { + const ( + // Input margin is the number of bytes we read (8) + // and the maximum we will read ahead (2) + inputMargin = 8 + 2 + minNonLiteralBlockSize = 16 + ) + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + for i := range e.longTable[:] { + e.longTable[i] = prevEntry{} + } + e.cur = e.maxMatchOff + e.allDirty = true + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff + for i := range e.table[:] { + v := e.table[i].offset + if v < minOff { + v = 0 + } else { + v = v - e.cur + e.maxMatchOff + } + e.table[i].offset = v + } + for i := range e.longTable[:] { + v := e.longTable[i].offset + v2 := e.longTable[i].prev + if v < minOff { + v = 0 + v2 = 0 + } else { + v = v - e.cur + e.maxMatchOff + if v2 < minOff { + v2 = 0 + } else { + v2 = v2 - e.cur + e.maxMatchOff + } + } + e.longTable[i] = prevEntry{ + offset: v, + prev: v2, + } + } + e.allDirty = true + e.cur = e.maxMatchOff + break + } + + s := e.addBlock(src) + blk.size = len(src) + if len(src) < minNonLiteralBlockSize { + blk.extraLits = len(src) + blk.literals = blk.literals[:len(src)] + copy(blk.literals, src) + return + } + + // Override src + src = e.hist + sLimit := int32(len(src)) - inputMargin + // stepSize is the number of bytes to skip on every main loop iteration. + // It should be >= 1. + const stepSize = 1 + + const kSearchStrength = 9 + + // nextEmit is where in src the next emitLiteral should start from. + nextEmit := s + cv := load6432(src, s) + + // Relative offsets + offset1 := int32(blk.recentOffsets[0]) + offset2 := int32(blk.recentOffsets[1]) + + addLiterals := func(s *seq, until int32) { + if until == nextEmit { + return + } + blk.literals = append(blk.literals, src[nextEmit:until]...) + s.litLen = uint32(until - nextEmit) + } + if debug { + println("recent offsets:", blk.recentOffsets) + } + +encodeLoop: + for { + var t int32 + // We allow the encoder to optionally turn off repeat offsets across blocks + canRepeat := len(blk.sequences) > 2 + var matched int32 + + for { + if debugAsserts && canRepeat && offset1 == 0 { + panic("offset0 was 0") + } + + nextHashS := hash5(cv, betterShortTableBits) + nextHashL := hash8(cv, betterLongTableBits) + candidateL := e.longTable[nextHashL] + candidateS := e.table[nextHashS] + + const repOff = 1 + repIndex := s - offset1 + repOff + off := s + e.cur + e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset} + e.markLongShardDirty(nextHashL) + e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)} + e.markShortShardDirty(nextHashS) + + if canRepeat { + if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { + // Consider history as well. + var seq seq + lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src) + + seq.matchLen = uint32(lenght - zstdMinMatch) + + // We might be able to match backwards. + // Extend as long as we can. + start := s + repOff + // We end the search early, so we don't risk 0 literals + // and have to do special offset treatment. + startLimit := nextEmit + 1 + + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { + repIndex-- + start-- + seq.matchLen++ + } + addLiterals(&seq, start) + + // rep 0 + seq.offset = 1 + if debugSequences { + println("repeat sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + + // Index match start+1 (long) -> s - 1 + index0 := s + repOff + s += lenght + repOff + + nextEmit = s + if s >= sLimit { + if debug { + println("repeat ended", s, lenght) + + } + break encodeLoop + } + // Index skipped... + for index0 < s-1 { + cv0 := load6432(src, index0) + cv1 := cv0 >> 8 + h0 := hash8(cv0, betterLongTableBits) + off := index0 + e.cur + e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} + e.markLongShardDirty(h0) + h1 := hash5(cv1, betterShortTableBits) + e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)} + e.markShortShardDirty(h1) + index0 += 2 + } + cv = load6432(src, s) + continue + } + const repOff2 = 1 + + // We deviate from the reference encoder and also check offset 2. + // Still slower and not much better, so disabled. + // repIndex = s - offset2 + repOff2 + if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) { + // Consider history as well. + var seq seq + lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src) + + seq.matchLen = uint32(lenght - zstdMinMatch) + + // We might be able to match backwards. + // Extend as long as we can. + start := s + repOff2 + // We end the search early, so we don't risk 0 literals + // and have to do special offset treatment. + startLimit := nextEmit + 1 + + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { + repIndex-- + start-- + seq.matchLen++ + } + addLiterals(&seq, start) + + // rep 2 + seq.offset = 2 + if debugSequences { + println("repeat sequence 2", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + + index0 := s + repOff2 + s += lenght + repOff2 + nextEmit = s + if s >= sLimit { + if debug { + println("repeat ended", s, lenght) + + } + break encodeLoop + } + + // Index skipped... + for index0 < s-1 { + cv0 := load6432(src, index0) + cv1 := cv0 >> 8 + h0 := hash8(cv0, betterLongTableBits) + off := index0 + e.cur + e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} + e.markLongShardDirty(h0) + h1 := hash5(cv1, betterShortTableBits) + e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)} + e.markShortShardDirty(h1) + index0 += 2 + } + cv = load6432(src, s) + // Swap offsets + offset1, offset2 = offset2, offset1 + continue + } + } + // Find the offsets of our two matches. + coffsetL := candidateL.offset - e.cur + coffsetLP := candidateL.prev - e.cur + + // Check if we have a long match. + if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { + // Found a long match, at least 8 bytes. + matched = e.matchlen(s+8, coffsetL+8, src) + 8 + t = coffsetL + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugMatches { + println("long match") + } + + if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) { + // Found a long match, at least 8 bytes. + prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8 + if prevMatch > matched { + matched = prevMatch + t = coffsetLP + } + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugMatches { + println("long match") + } + } + break + } + + // Check if we have a long match on prev. + if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) { + // Found a long match, at least 8 bytes. + matched = e.matchlen(s+8, coffsetLP+8, src) + 8 + t = coffsetLP + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugMatches { + println("long match") + } + break + } + + coffsetS := candidateS.offset - e.cur + + // Check if we have a short match. + if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val { + // found a regular match + matched = e.matchlen(s+4, coffsetS+4, src) + 4 + + // See if we can find a long match at s+1 + const checkAt = 1 + cv := load6432(src, s+checkAt) + nextHashL = hash8(cv, betterLongTableBits) + candidateL = e.longTable[nextHashL] + coffsetL = candidateL.offset - e.cur + + // We can store it, since we have at least a 4 byte match. + e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset} + e.markLongShardDirty(nextHashL) + if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { + // Found a long match, at least 8 bytes. + matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8 + if matchedNext > matched { + t = coffsetL + s += checkAt + matched = matchedNext + if debugMatches { + println("long match (after short)") + } + break + } + } + + // Check prev long... + coffsetL = candidateL.prev - e.cur + if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { + // Found a long match, at least 8 bytes. + matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8 + if matchedNext > matched { + t = coffsetL + s += checkAt + matched = matchedNext + if debugMatches { + println("prev long match (after short)") + } + break + } + } + t = coffsetS + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugAsserts && t < 0 { + panic("t<0") + } + if debugMatches { + println("short match") + } + break + } + + // No match found, move forward in input. + s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1)) + if s >= sLimit { + break encodeLoop + } + cv = load6432(src, s) + } + + // A 4-byte match has been found. Update recent offsets. + // We'll later see if more than 4 bytes. + offset2 = offset1 + offset1 = s - t + + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + + if debugAsserts && canRepeat && int(offset1) > len(src) { + panic("invalid offset") + } + + // Extend the n-byte match as long as possible. + l := matched + + // Extend backwards + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength { + s-- + t-- + l++ + } + + // Write our sequence + var seq seq + seq.litLen = uint32(s - nextEmit) + seq.matchLen = uint32(l - zstdMinMatch) + if seq.litLen > 0 { + blk.literals = append(blk.literals, src[nextEmit:s]...) + } + seq.offset = uint32(s-t) + 3 + s += l + if debugSequences { + println("sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + nextEmit = s + if s >= sLimit { + break encodeLoop + } + + // Index match start+1 (long) -> s - 1 + index0 := s - l + 1 + for index0 < s-1 { + cv0 := load6432(src, index0) + cv1 := cv0 >> 8 + h0 := hash8(cv0, betterLongTableBits) + off := index0 + e.cur + e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} + e.markLongShardDirty(h0) + h1 := hash5(cv1, betterShortTableBits) + e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)} + e.markShortShardDirty(h1) + index0 += 2 + } + + cv = load6432(src, s) + if !canRepeat { + continue + } + + // Check offset 2 + for { + o2 := s - offset2 + if load3232(src, o2) != uint32(cv) { + // Do regular search + break + } + + // Store this, since we have it. + nextHashS := hash5(cv, betterShortTableBits) + nextHashL := hash8(cv, betterLongTableBits) + + // We have at least 4 byte match. + // No need to check backwards. We come straight from a match + l := 4 + e.matchlen(s+4, o2+4, src) + + e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset} + e.markLongShardDirty(nextHashL) + e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)} + e.markShortShardDirty(nextHashS) + seq.matchLen = uint32(l) - zstdMinMatch + seq.litLen = 0 + + // Since litlen is always 0, this is offset 1. + seq.offset = 1 + s += l + nextEmit = s + if debugSequences { + println("sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + + // Swap offset 1 and 2. + offset1, offset2 = offset2, offset1 + if s >= sLimit { + // Finished + break encodeLoop + } + cv = load6432(src, s) + } + } + + if int(nextEmit) < len(src) { + blk.literals = append(blk.literals, src[nextEmit:]...) + blk.extraLits = len(src) - int(nextEmit) + } + blk.recentOffsets[0] = uint32(offset1) + blk.recentOffsets[1] = uint32(offset2) + if debug { + println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) + } +} + // ResetDict will reset and set a dictionary if not nil func (e *betterFastEncoder) Reset(d *dict, singleBlock bool) { + e.resetBase(d, singleBlock) + if d != nil { + panic("betterFastEncoder: Reset with dict") + } +} + +// ResetDict will reset and set a dictionary if not nil +func (e *betterFastEncoderDict) Reset(d *dict, singleBlock bool) { e.resetBase(d, singleBlock) if d == nil { return @@ -557,6 +1070,7 @@ func (e *betterFastEncoder) Reset(d *dict, singleBlock bool) { } } e.lastDictID = d.id + e.allDirty = true } // Init or copy dict table @@ -585,11 +1099,72 @@ func (e *betterFastEncoder) Reset(d *dict, singleBlock bool) { } } e.lastDictID = d.id + e.allDirty = true } - // Reset table to initial state - copy(e.longTable[:], e.dictLongTable) - e.cur = e.maxMatchOff // Reset table to initial state - copy(e.table[:], e.dictTable) + { + dirtyShardCnt := 0 + if !e.allDirty { + for i := range e.shortTableShardDirty { + if e.shortTableShardDirty[i] { + dirtyShardCnt++ + } + } + } + const shardCnt = betterShortTableShardCnt + const shardSize = betterShortTableShardSize + if e.allDirty || dirtyShardCnt > shardCnt*4/6 { + copy(e.table[:], e.dictTable) + for i := range e.shortTableShardDirty { + e.shortTableShardDirty[i] = false + } + } else { + for i := range e.shortTableShardDirty { + if !e.shortTableShardDirty[i] { + continue + } + + copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize]) + e.shortTableShardDirty[i] = false + } + } + } + { + dirtyShardCnt := 0 + if !e.allDirty { + for i := range e.shortTableShardDirty { + if e.shortTableShardDirty[i] { + dirtyShardCnt++ + } + } + } + const shardCnt = betterLongTableShardCnt + const shardSize = betterLongTableShardSize + if e.allDirty || dirtyShardCnt > shardCnt*4/6 { + copy(e.longTable[:], e.dictLongTable) + for i := range e.longTableShardDirty { + e.longTableShardDirty[i] = false + } + } else { + for i := range e.longTableShardDirty { + if !e.longTableShardDirty[i] { + continue + } + + copy(e.longTable[i*shardSize:(i+1)*shardSize], e.dictLongTable[i*shardSize:(i+1)*shardSize]) + e.longTableShardDirty[i] = false + } + } + } + e.cur = e.maxMatchOff + e.allDirty = false +} + +func (e *betterFastEncoderDict) markLongShardDirty(entryNum uint32) { + e.longTableShardDirty[entryNum/betterLongTableShardSize] = true +} + +func (e *betterFastEncoderDict) markShortShardDirty(entryNum uint32) { + e.shortTableShardDirty[entryNum/betterShortTableShardSize] = true } diff --git a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go index 19eebf66e..8629d43d8 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go @@ -11,6 +11,9 @@ const ( dFastLongTableSize = 1 << dFastLongTableBits // Size of the table dFastLongTableMask = dFastLongTableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks. + dLongTableShardCnt = 1 << (dFastLongTableBits - dictShardBits) // Number of shards in the table + dLongTableShardSize = dFastLongTableSize / tableShardCnt // Size of an individual shard + dFastShortTableBits = tableBits // Bits used in the short match table dFastShortTableSize = 1 << dFastShortTableBits // Size of the table dFastShortTableMask = dFastShortTableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks. @@ -18,8 +21,14 @@ const ( type doubleFastEncoder struct { fastEncoder - longTable [dFastLongTableSize]tableEntry - dictLongTable []tableEntry + longTable [dFastLongTableSize]tableEntry +} + +type doubleFastEncoderDict struct { + fastEncoderDict + longTable [dFastLongTableSize]tableEntry + dictLongTable []tableEntry + longTableShardDirty [dLongTableShardCnt]bool } // Encode mimmics functionality in zstd_dfast.c @@ -678,9 +687,379 @@ encodeLoop: } } +// Encode will encode the content, with a dictionary if initialized for it. +func (e *doubleFastEncoderDict) Encode(blk *blockEnc, src []byte) { + const ( + // Input margin is the number of bytes we read (8) + // and the maximum we will read ahead (2) + inputMargin = 8 + 2 + minNonLiteralBlockSize = 16 + ) + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + for i := range e.longTable[:] { + e.longTable[i] = tableEntry{} + } + e.markAllShardsDirty() + e.cur = e.maxMatchOff + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff + for i := range e.table[:] { + v := e.table[i].offset + if v < minOff { + v = 0 + } else { + v = v - e.cur + e.maxMatchOff + } + e.table[i].offset = v + } + for i := range e.longTable[:] { + v := e.longTable[i].offset + if v < minOff { + v = 0 + } else { + v = v - e.cur + e.maxMatchOff + } + e.longTable[i].offset = v + } + e.markAllShardsDirty() + e.cur = e.maxMatchOff + break + } + + s := e.addBlock(src) + blk.size = len(src) + if len(src) < minNonLiteralBlockSize { + blk.extraLits = len(src) + blk.literals = blk.literals[:len(src)] + copy(blk.literals, src) + return + } + + // Override src + src = e.hist + sLimit := int32(len(src)) - inputMargin + // stepSize is the number of bytes to skip on every main loop iteration. + // It should be >= 1. + const stepSize = 1 + + const kSearchStrength = 8 + + // nextEmit is where in src the next emitLiteral should start from. + nextEmit := s + cv := load6432(src, s) + + // Relative offsets + offset1 := int32(blk.recentOffsets[0]) + offset2 := int32(blk.recentOffsets[1]) + + addLiterals := func(s *seq, until int32) { + if until == nextEmit { + return + } + blk.literals = append(blk.literals, src[nextEmit:until]...) + s.litLen = uint32(until - nextEmit) + } + if debug { + println("recent offsets:", blk.recentOffsets) + } + +encodeLoop: + for { + var t int32 + // We allow the encoder to optionally turn off repeat offsets across blocks + canRepeat := len(blk.sequences) > 2 + + for { + if debugAsserts && canRepeat && offset1 == 0 { + panic("offset0 was 0") + } + + nextHashS := hash5(cv, dFastShortTableBits) + nextHashL := hash8(cv, dFastLongTableBits) + candidateL := e.longTable[nextHashL] + candidateS := e.table[nextHashS] + + const repOff = 1 + repIndex := s - offset1 + repOff + entry := tableEntry{offset: s + e.cur, val: uint32(cv)} + e.longTable[nextHashL] = entry + e.markLongShardDirty(nextHashL) + e.table[nextHashS] = entry + e.markShardDirty(nextHashS) + + if canRepeat { + if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { + // Consider history as well. + var seq seq + lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src) + + seq.matchLen = uint32(lenght - zstdMinMatch) + + // We might be able to match backwards. + // Extend as long as we can. + start := s + repOff + // We end the search early, so we don't risk 0 literals + // and have to do special offset treatment. + startLimit := nextEmit + 1 + + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { + repIndex-- + start-- + seq.matchLen++ + } + addLiterals(&seq, start) + + // rep 0 + seq.offset = 1 + if debugSequences { + println("repeat sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + s += lenght + repOff + nextEmit = s + if s >= sLimit { + if debug { + println("repeat ended", s, lenght) + + } + break encodeLoop + } + cv = load6432(src, s) + continue + } + } + // Find the offsets of our two matches. + coffsetL := s - (candidateL.offset - e.cur) + coffsetS := s - (candidateS.offset - e.cur) + + // Check if we have a long match. + if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val { + // Found a long match, likely at least 8 bytes. + // Reference encoder checks all 8 bytes, we only check 4, + // but the likelihood of both the first 4 bytes and the hash matching should be enough. + t = candidateL.offset - e.cur + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugMatches { + println("long match") + } + break + } + + // Check if we have a short match. + if coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val { + // found a regular match + // See if we can find a long match at s+1 + const checkAt = 1 + cv := load6432(src, s+checkAt) + nextHashL = hash8(cv, dFastLongTableBits) + candidateL = e.longTable[nextHashL] + coffsetL = s - (candidateL.offset - e.cur) + checkAt + + // We can store it, since we have at least a 4 byte match. + e.longTable[nextHashL] = tableEntry{offset: s + checkAt + e.cur, val: uint32(cv)} + e.markLongShardDirty(nextHashL) + if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val { + // Found a long match, likely at least 8 bytes. + // Reference encoder checks all 8 bytes, we only check 4, + // but the likelihood of both the first 4 bytes and the hash matching should be enough. + t = candidateL.offset - e.cur + s += checkAt + if debugMatches { + println("long match (after short)") + } + break + } + + t = candidateS.offset - e.cur + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugAsserts && t < 0 { + panic("t<0") + } + if debugMatches { + println("short match") + } + break + } + + // No match found, move forward in input. + s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1)) + if s >= sLimit { + break encodeLoop + } + cv = load6432(src, s) + } + + // A 4-byte match has been found. Update recent offsets. + // We'll later see if more than 4 bytes. + offset2 = offset1 + offset1 = s - t + + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + + if debugAsserts && canRepeat && int(offset1) > len(src) { + panic("invalid offset") + } + + // Extend the 4-byte match as long as possible. + l := e.matchlen(s+4, t+4, src) + 4 + + // Extend backwards + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength { + s-- + t-- + l++ + } + + // Write our sequence + var seq seq + seq.litLen = uint32(s - nextEmit) + seq.matchLen = uint32(l - zstdMinMatch) + if seq.litLen > 0 { + blk.literals = append(blk.literals, src[nextEmit:s]...) + } + seq.offset = uint32(s-t) + 3 + s += l + if debugSequences { + println("sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + nextEmit = s + if s >= sLimit { + break encodeLoop + } + + // Index match start+1 (long) and start+2 (short) + index0 := s - l + 1 + // Index match end-2 (long) and end-1 (short) + index1 := s - 2 + + cv0 := load6432(src, index0) + cv1 := load6432(src, index1) + te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)} + te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)} + longHash1 := hash8(cv0, dFastLongTableBits) + longHash2 := hash8(cv0, dFastLongTableBits) + e.longTable[longHash1] = te0 + e.longTable[longHash2] = te1 + e.markLongShardDirty(longHash1) + e.markLongShardDirty(longHash2) + cv0 >>= 8 + cv1 >>= 8 + te0.offset++ + te1.offset++ + te0.val = uint32(cv0) + te1.val = uint32(cv1) + hashVal1 := hash5(cv0, dFastShortTableBits) + hashVal2 := hash5(cv1, dFastShortTableBits) + e.table[hashVal1] = te0 + e.markShardDirty(hashVal1) + e.table[hashVal2] = te1 + e.markShardDirty(hashVal2) + + cv = load6432(src, s) + + if !canRepeat { + continue + } + + // Check offset 2 + for { + o2 := s - offset2 + if load3232(src, o2) != uint32(cv) { + // Do regular search + break + } + + // Store this, since we have it. + nextHashS := hash5(cv, dFastShortTableBits) + nextHashL := hash8(cv, dFastLongTableBits) + + // We have at least 4 byte match. + // No need to check backwards. We come straight from a match + l := 4 + e.matchlen(s+4, o2+4, src) + + entry := tableEntry{offset: s + e.cur, val: uint32(cv)} + e.longTable[nextHashL] = entry + e.markLongShardDirty(nextHashL) + e.table[nextHashS] = entry + e.markShardDirty(nextHashS) + seq.matchLen = uint32(l) - zstdMinMatch + seq.litLen = 0 + + // Since litlen is always 0, this is offset 1. + seq.offset = 1 + s += l + nextEmit = s + if debugSequences { + println("sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + + // Swap offset 1 and 2. + offset1, offset2 = offset2, offset1 + if s >= sLimit { + // Finished + break encodeLoop + } + cv = load6432(src, s) + } + } + + if int(nextEmit) < len(src) { + blk.literals = append(blk.literals, src[nextEmit:]...) + blk.extraLits = len(src) - int(nextEmit) + } + blk.recentOffsets[0] = uint32(offset1) + blk.recentOffsets[1] = uint32(offset2) + if debug { + println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) + } + // If we encoded more than 64K mark all dirty. + if len(src) > 64<<10 { + e.markAllShardsDirty() + } +} + // ResetDict will reset and set a dictionary if not nil func (e *doubleFastEncoder) Reset(d *dict, singleBlock bool) { e.fastEncoder.Reset(d, singleBlock) + if d != nil { + panic("doubleFastEncoder: Reset with dict not supported") + } +} + +// ResetDict will reset and set a dictionary if not nil +func (e *doubleFastEncoderDict) Reset(d *dict, singleBlock bool) { + allDirty := e.allDirty + e.fastEncoderDict.Reset(d, singleBlock) if d == nil { return } @@ -706,8 +1085,37 @@ func (e *doubleFastEncoder) Reset(d *dict, singleBlock bool) { } } e.lastDictID = d.id + e.allDirty = true } // Reset table to initial state e.cur = e.maxMatchOff - copy(e.longTable[:], e.dictLongTable) + + dirtyShardCnt := 0 + if !allDirty { + for i := range e.longTableShardDirty { + if e.longTableShardDirty[i] { + dirtyShardCnt++ + } + } + } + + if allDirty || dirtyShardCnt > dLongTableShardCnt/2 { + copy(e.longTable[:], e.dictLongTable) + for i := range e.longTableShardDirty { + e.longTableShardDirty[i] = false + } + return + } + for i := range e.longTableShardDirty { + if !e.longTableShardDirty[i] { + continue + } + + copy(e.longTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize], e.dictLongTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize]) + e.longTableShardDirty[i] = false + } +} + +func (e *doubleFastEncoderDict) markLongShardDirty(entryNum uint32) { + e.longTableShardDirty[entryNum/dLongTableShardSize] = true } diff --git a/vendor/github.com/klauspost/compress/zstd/enc_fast.go b/vendor/github.com/klauspost/compress/zstd/enc_fast.go index 0b301df43..ba4a17e10 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_fast.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_fast.go @@ -11,9 +11,11 @@ import ( ) const ( - tableBits = 15 // Bits used in the table - tableSize = 1 << tableBits // Size of the table - tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks. + tableBits = 15 // Bits used in the table + tableSize = 1 << tableBits // Size of the table + tableShardCnt = 1 << (tableBits - dictShardBits) // Number of shards in the table + tableShardSize = tableSize / tableShardCnt // Size of an individual shard + tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks. maxMatchLength = 131074 ) @@ -24,8 +26,14 @@ type tableEntry struct { type fastEncoder struct { fastBase - table [tableSize]tableEntry - dictTable []tableEntry + table [tableSize]tableEntry +} + +type fastEncoderDict struct { + fastEncoder + dictTable []tableEntry + tableShardDirty [tableShardCnt]bool + allDirty bool } // Encode mimmics functionality in zstd_fast.c @@ -78,7 +86,7 @@ func (e *fastEncoder) Encode(blk *blockEnc, src []byte) { // TEMPLATE const hashLog = tableBits // seems global, but would be nice to tweak. - const kSearchStrength = 8 + const kSearchStrength = 7 // nextEmit is where in src the next emitLiteral should start from. nextEmit := s @@ -617,8 +625,322 @@ encodeLoop: } } +// Encode will encode the content, with a dictionary if initialized for it. +func (e *fastEncoderDict) Encode(blk *blockEnc, src []byte) { + const ( + inputMargin = 8 + minNonLiteralBlockSize = 1 + 1 + inputMargin + ) + if e.allDirty || len(src) > 32<<10 { + e.fastEncoder.Encode(blk, src) + e.allDirty = true + return + } + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + e.cur = e.maxMatchOff + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff + for i := range e.table[:] { + v := e.table[i].offset + if v < minOff { + v = 0 + } else { + v = v - e.cur + e.maxMatchOff + } + e.table[i].offset = v + } + e.cur = e.maxMatchOff + break + } + + s := e.addBlock(src) + blk.size = len(src) + if len(src) < minNonLiteralBlockSize { + blk.extraLits = len(src) + blk.literals = blk.literals[:len(src)] + copy(blk.literals, src) + return + } + + // Override src + src = e.hist + sLimit := int32(len(src)) - inputMargin + // stepSize is the number of bytes to skip on every main loop iteration. + // It should be >= 2. + const stepSize = 2 + + // TEMPLATE + const hashLog = tableBits + // seems global, but would be nice to tweak. + const kSearchStrength = 7 + + // nextEmit is where in src the next emitLiteral should start from. + nextEmit := s + cv := load6432(src, s) + + // Relative offsets + offset1 := int32(blk.recentOffsets[0]) + offset2 := int32(blk.recentOffsets[1]) + + addLiterals := func(s *seq, until int32) { + if until == nextEmit { + return + } + blk.literals = append(blk.literals, src[nextEmit:until]...) + s.litLen = uint32(until - nextEmit) + } + if debug { + println("recent offsets:", blk.recentOffsets) + } + +encodeLoop: + for { + // t will contain the match offset when we find one. + // When existing the search loop, we have already checked 4 bytes. + var t int32 + + // We will not use repeat offsets across blocks. + // By not using them for the first 3 matches + canRepeat := len(blk.sequences) > 2 + + for { + if debugAsserts && canRepeat && offset1 == 0 { + panic("offset0 was 0") + } + + nextHash := hash6(cv, hashLog) + nextHash2 := hash6(cv>>8, hashLog) + candidate := e.table[nextHash] + candidate2 := e.table[nextHash2] + repIndex := s - offset1 + 2 + + e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)} + e.markShardDirty(nextHash) + e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)} + e.markShardDirty(nextHash2) + + if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) { + // Consider history as well. + var seq seq + var length int32 + // length = 4 + e.matchlen(s+6, repIndex+4, src) + { + a := src[s+6:] + b := src[repIndex+4:] + endI := len(a) & (math.MaxInt32 - 7) + length = int32(endI) + 4 + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + length = int32(i+bits.TrailingZeros64(diff)>>3) + 4 + break + } + } + } + + seq.matchLen = uint32(length - zstdMinMatch) + + // We might be able to match backwards. + // Extend as long as we can. + start := s + 2 + // We end the search early, so we don't risk 0 literals + // and have to do special offset treatment. + startLimit := nextEmit + 1 + + sMin := s - e.maxMatchOff + if sMin < 0 { + sMin = 0 + } + for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch { + repIndex-- + start-- + seq.matchLen++ + } + addLiterals(&seq, start) + + // rep 0 + seq.offset = 1 + if debugSequences { + println("repeat sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + s += length + 2 + nextEmit = s + if s >= sLimit { + if debug { + println("repeat ended", s, length) + + } + break encodeLoop + } + cv = load6432(src, s) + continue + } + coffset0 := s - (candidate.offset - e.cur) + coffset1 := s - (candidate2.offset - e.cur) + 1 + if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val { + // found a regular match + t = candidate.offset - e.cur + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + break + } + + if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val { + // found a regular match + t = candidate2.offset - e.cur + s++ + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugAsserts && t < 0 { + panic("t<0") + } + break + } + s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1)) + if s >= sLimit { + break encodeLoop + } + cv = load6432(src, s) + } + // A 4-byte match has been found. We'll later see if more than 4 bytes. + offset2 = offset1 + offset1 = s - t + + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + + if debugAsserts && canRepeat && int(offset1) > len(src) { + panic("invalid offset") + } + + // Extend the 4-byte match as long as possible. + //l := e.matchlen(s+4, t+4, src) + 4 + var l int32 + { + a := src[s+4:] + b := src[t+4:] + endI := len(a) & (math.MaxInt32 - 7) + l = int32(endI) + 4 + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 + break + } + } + } + + // Extend backwards + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength { + s-- + t-- + l++ + } + + // Write our sequence. + var seq seq + seq.litLen = uint32(s - nextEmit) + seq.matchLen = uint32(l - zstdMinMatch) + if seq.litLen > 0 { + blk.literals = append(blk.literals, src[nextEmit:s]...) + } + // Don't use repeat offsets + seq.offset = uint32(s-t) + 3 + s += l + if debugSequences { + println("sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + nextEmit = s + if s >= sLimit { + break encodeLoop + } + cv = load6432(src, s) + + // Check offset 2 + if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) { + // We have at least 4 byte match. + // No need to check backwards. We come straight from a match + //l := 4 + e.matchlen(s+4, o2+4, src) + var l int32 + { + a := src[s+4:] + b := src[o2+4:] + endI := len(a) & (math.MaxInt32 - 7) + l = int32(endI) + 4 + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 + break + } + } + } + + // Store this, since we have it. + nextHash := hash6(cv, hashLog) + e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)} + e.markShardDirty(nextHash) + seq.matchLen = uint32(l) - zstdMinMatch + seq.litLen = 0 + // Since litlen is always 0, this is offset 1. + seq.offset = 1 + s += l + nextEmit = s + if debugSequences { + println("sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + + // Swap offset 1 and 2. + offset1, offset2 = offset2, offset1 + if s >= sLimit { + break encodeLoop + } + // Prepare next loop. + cv = load6432(src, s) + } + } + + if int(nextEmit) < len(src) { + blk.literals = append(blk.literals, src[nextEmit:]...) + blk.extraLits = len(src) - int(nextEmit) + } + blk.recentOffsets[0] = uint32(offset1) + blk.recentOffsets[1] = uint32(offset2) + if debug { + println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) + } +} + // ResetDict will reset and set a dictionary if not nil func (e *fastEncoder) Reset(d *dict, singleBlock bool) { + e.resetBase(d, singleBlock) + if d != nil { + panic("fastEncoder: Reset with dict") + } +} + +// ResetDict will reset and set a dictionary if not nil +func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) { e.resetBase(d, singleBlock) if d == nil { return @@ -653,9 +975,44 @@ func (e *fastEncoder) Reset(d *dict, singleBlock bool) { } } e.lastDictID = d.id + e.allDirty = true } e.cur = e.maxMatchOff - // Reset table to initial state - copy(e.table[:], e.dictTable) + dirtyShardCnt := 0 + if !e.allDirty { + for i := range e.tableShardDirty { + if e.tableShardDirty[i] { + dirtyShardCnt++ + } + } + } + + const shardCnt = tableShardCnt + const shardSize = tableShardSize + if e.allDirty || dirtyShardCnt > shardCnt*4/6 { + copy(e.table[:], e.dictTable) + for i := range e.tableShardDirty { + e.tableShardDirty[i] = false + } + e.allDirty = false + return + } + for i := range e.tableShardDirty { + if !e.tableShardDirty[i] { + continue + } + + copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize]) + e.tableShardDirty[i] = false + } + e.allDirty = false +} + +func (e *fastEncoderDict) markAllShardsDirty() { + e.allDirty = true +} + +func (e *fastEncoderDict) markShardDirty(entryNum uint32) { + e.tableShardDirty[entryNum/tableShardSize] = true } diff --git a/vendor/github.com/klauspost/compress/zstd/encoder.go b/vendor/github.com/klauspost/compress/zstd/encoder.go index f5759211d..6f0265099 100644 --- a/vendor/github.com/klauspost/compress/zstd/encoder.go +++ b/vendor/github.com/klauspost/compress/zstd/encoder.go @@ -106,7 +106,7 @@ func (e *Encoder) Reset(w io.Writer) { s.encoder = e.o.encoder() } if s.writing == nil { - s.writing = &blockEnc{} + s.writing = &blockEnc{lowMem: e.o.lowMem} s.writing.init() } s.writing.initNewEncode() @@ -176,6 +176,12 @@ func (e *Encoder) nextBlock(final bool) error { } if !s.headerWritten { // If we have a single block encode, do a sync compression. + if final && len(s.filling) == 0 && !e.o.fullZero { + s.headerWritten = true + s.fullFrameWritten = true + s.eofWritten = true + return nil + } if final && len(s.filling) > 0 { s.current = e.EncodeAll(s.filling, s.current[:0]) var n2 int @@ -471,7 +477,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { } // If less than 1MB, allocate a buffer up front. - if len(dst) == 0 && cap(dst) == 0 && len(src) < 1<<20 { + if len(dst) == 0 && cap(dst) == 0 && len(src) < 1<<20 && !e.o.lowMem { dst = make([]byte, 0, len(src)) } dst, err := fh.appendTo(dst) diff --git a/vendor/github.com/klauspost/compress/zstd/encoder_options.go b/vendor/github.com/klauspost/compress/zstd/encoder_options.go index 1209915bc..18a47eb03 100644 --- a/vendor/github.com/klauspost/compress/zstd/encoder_options.go +++ b/vendor/github.com/klauspost/compress/zstd/encoder_options.go @@ -24,12 +24,12 @@ type encoderOptions struct { allLitEntropy bool customWindow bool customALEntropy bool + lowMem bool dict *dict } func (o *encoderOptions) setDefault() { *o = encoderOptions{ - // use less ram: true for now, but may change. concurrent: runtime.GOMAXPROCS(0), crc: true, single: nil, @@ -37,18 +37,31 @@ func (o *encoderOptions) setDefault() { windowSize: 8 << 20, level: SpeedDefault, allLitEntropy: true, + lowMem: false, } } // encoder returns an encoder with the selected options. func (o encoderOptions) encoder() encoder { switch o.level { - case SpeedDefault: - return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}}} - case SpeedBetterCompression: - return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}} case SpeedFastest: - return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}} + if o.dict != nil { + return &fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}} + } + return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}} + + case SpeedDefault: + if o.dict != nil { + return &doubleFastEncoderDict{fastEncoderDict: fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}} + } + return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}} + case SpeedBetterCompression: + if o.dict != nil { + return &betterFastEncoderDict{betterFastEncoder: betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}} + } + return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}} + case SpeedBestCompression: + return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}} } panic("unknown compression level") } @@ -143,20 +156,20 @@ const ( // By using this, notice that CPU usage may go up in the future. SpeedBetterCompression + // SpeedBestCompression will choose the best available compression option. + // This will offer the best compression no matter the CPU cost. + SpeedBestCompression + // speedLast should be kept as the last actual compression option. // The is not for external usage, but is used to keep track of the valid options. speedLast - - // SpeedBestCompression will choose the best available compression option. - // For now this is not implemented. - SpeedBestCompression = SpeedBetterCompression ) // EncoderLevelFromString will convert a string representation of an encoding level back // to a compression level. The compare is not case sensitive. // If the string wasn't recognized, (false, SpeedDefault) will be returned. func EncoderLevelFromString(s string) (bool, EncoderLevel) { - for l := EncoderLevel(speedNotSet + 1); l < speedLast; l++ { + for l := speedNotSet + 1; l < speedLast; l++ { if strings.EqualFold(s, l.String()) { return true, l } @@ -173,7 +186,9 @@ func EncoderLevelFromZstd(level int) EncoderLevel { return SpeedFastest case level >= 3 && level < 6: return SpeedDefault - case level > 5: + case level >= 6 && level < 10: + return SpeedBetterCompression + case level >= 10: return SpeedBetterCompression } return SpeedDefault @@ -188,6 +203,8 @@ func (e EncoderLevel) String() string { return "default" case SpeedBetterCompression: return "better" + case SpeedBestCompression: + return "best" default: return "invalid" } @@ -209,6 +226,8 @@ func WithEncoderLevel(l EncoderLevel) EOption { o.windowSize = 8 << 20 case SpeedBetterCompression: o.windowSize = 16 << 20 + case SpeedBestCompression: + o.windowSize = 32 << 20 } } if !o.customALEntropy { @@ -268,6 +287,17 @@ func WithSingleSegment(b bool) EOption { } } +// WithLowerEncoderMem will trade in some memory cases trade less memory usage for +// slower encoding speed. +// This will not change the window size which is the primary function for reducing +// memory usage. See WithWindowSize. +func WithLowerEncoderMem(b bool) EOption { + return func(o *encoderOptions) error { + o.lowMem = b + return nil + } +} + // WithEncoderDict allows to register a dictionary that will be used for the encode. // The encoder *may* choose to use no dictionary instead for certain payloads. func WithEncoderDict(dict []byte) EOption { diff --git a/vendor/github.com/klauspost/compress/zstd/fse_encoder.go b/vendor/github.com/klauspost/compress/zstd/fse_encoder.go index aa9eba88b..b80709d5e 100644 --- a/vendor/github.com/klauspost/compress/zstd/fse_encoder.go +++ b/vendor/github.com/klauspost/compress/zstd/fse_encoder.go @@ -97,7 +97,7 @@ func (s *fseEncoder) prepare() (*fseEncoder, error) { func (s *fseEncoder) allocCtable() { tableSize := 1 << s.actualTableLog // get tableSymbol that is big enough. - if cap(s.ct.tableSymbol) < int(tableSize) { + if cap(s.ct.tableSymbol) < tableSize { s.ct.tableSymbol = make([]byte, tableSize) } s.ct.tableSymbol = s.ct.tableSymbol[:tableSize] @@ -202,13 +202,13 @@ func (s *fseEncoder) buildCTable() error { case 0: case -1, 1: symbolTT[i].deltaNbBits = tl - symbolTT[i].deltaFindState = int16(total - 1) + symbolTT[i].deltaFindState = total - 1 total++ default: maxBitsOut := uint32(tableLog) - highBit(uint32(v-1)) minStatePlus := uint32(v) << maxBitsOut symbolTT[i].deltaNbBits = (maxBitsOut << 16) - minStatePlus - symbolTT[i].deltaFindState = int16(total - v) + symbolTT[i].deltaFindState = total - v total += v } } @@ -353,8 +353,8 @@ func (s *fseEncoder) normalizeCount2(length int) error { distributed uint32 total = uint32(length) tableLog = s.actualTableLog - lowThreshold = uint32(total >> tableLog) - lowOne = uint32((total * 3) >> (tableLog + 1)) + lowThreshold = total >> tableLog + lowOne = (total * 3) >> (tableLog + 1) ) for i, cnt := range s.count[:s.symbolLen] { if cnt == 0 { @@ -379,7 +379,7 @@ func (s *fseEncoder) normalizeCount2(length int) error { if (total / toDistribute) > lowOne { // risk of rounding to zero - lowOne = uint32((total * 3) / (toDistribute * 2)) + lowOne = (total * 3) / (toDistribute * 2) for i, cnt := range s.count[:s.symbolLen] { if (s.norm[i] == notYetAssigned) && (cnt <= lowOne) { s.norm[i] = 1 diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec.go b/vendor/github.com/klauspost/compress/zstd/seqdec.go index b5c8ef133..1dd39e63b 100644 --- a/vendor/github.com/klauspost/compress/zstd/seqdec.go +++ b/vendor/github.com/klauspost/compress/zstd/seqdec.go @@ -181,11 +181,18 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error { return fmt.Errorf("output (%d) bigger than max block size", size) } if size > cap(s.out) { - // Not enough size, will be extremely rarely triggered, + // Not enough size, which can happen under high volume block streaming conditions // but could be if destination slice is too small for sync operations. - // We add maxBlockSize to the capacity. - s.out = append(s.out, make([]byte, maxBlockSize)...) - s.out = s.out[:len(s.out)-maxBlockSize] + // over-allocating here can create a large amount of GC pressure so we try to keep + // it as contained as possible + used := len(s.out) - startSize + addBytes := 256 + ll + ml + used>>2 + // Clamp to max block size. + if used+addBytes > maxBlockSize { + addBytes = maxBlockSize - used + } + s.out = append(s.out, make([]byte, addBytes)...) + s.out = s.out[:len(s.out)-addBytes] } if ml > maxMatchLen { return fmt.Errorf("match len (%d) bigger than max allowed length", ml) diff --git a/vendor/github.com/klauspost/compress/zstd/snappy.go b/vendor/github.com/klauspost/compress/zstd/snappy.go index 841fd95ac..c95fe5111 100644 --- a/vendor/github.com/klauspost/compress/zstd/snappy.go +++ b/vendor/github.com/klauspost/compress/zstd/snappy.go @@ -417,7 +417,7 @@ var crcTable = crc32.MakeTable(crc32.Castagnoli) // https://github.com/google/snappy/blob/master/framing_format.txt func snappyCRC(b []byte) uint32 { c := crc32.Update(0, crcTable, b) - return uint32(c>>15|c<<17) + 0xa282ead8 + return c>>15 | c<<17 + 0xa282ead8 } // snappyDecodedLen returns the length of the decoded block and the number of bytes diff --git a/vendor/github.com/klauspost/compress/zstd/zstd.go b/vendor/github.com/klauspost/compress/zstd/zstd.go index 0807719c8..9056beef2 100644 --- a/vendor/github.com/klauspost/compress/zstd/zstd.go +++ b/vendor/github.com/klauspost/compress/zstd/zstd.go @@ -4,6 +4,7 @@ package zstd import ( + "bytes" "errors" "log" "math" @@ -73,6 +74,10 @@ var ( // ErrDecoderClosed will be returned if the Decoder was used after // Close has been called. ErrDecoderClosed = errors.New("decoder used after Close") + + // ErrDecoderNilInput is returned when a nil Reader was provided + // and an operation other than Reset/DecodeAll/Close was attempted. + ErrDecoderNilInput = errors.New("nil input provided as reader") ) func println(a ...interface{}) { @@ -142,3 +147,10 @@ func load64(b []byte, i int) uint64 { return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 } + +type byter interface { + Bytes() []byte + Len() int +} + +var _ byter = &bytes.Buffer{} diff --git a/vendor/modules.txt b/vendor/modules.txt index 76a26f93b..bd01f33c8 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -231,7 +231,7 @@ github.com/hashicorp/go-multierror github.com/imdario/mergo # github.com/json-iterator/go v1.1.10 github.com/json-iterator/go -# github.com/klauspost/compress v1.11.3 +# github.com/klauspost/compress v1.11.13 ## explicit github.com/klauspost/compress/fse github.com/klauspost/compress/huff0