[security] Vulnerability in golang.org/x/text/encoding/unicode v0.3.2

Hello gophers, Version v0.3.3 of golang.org/x/text fixes a vulnerability in the golang.org/x/text/encoding/unicode package which could lead to the UTF-16 decoder entering an infinite loop, causing the program to crash or run out of memory. An attacker could provide a single byte to a UTF16 decoder instantiated with UseBOM or ExpectBOM to trigger an infinite loop if the String function on the Decoder is called, or the Decoder is passed to golang.org/x/text/transform.String. transform.String has also been hardened not to enter an infinite loop if a Transformer keeps returning ErrShortSrc even if atEOF is true. This issue was first filed as Issue 39491 by GitHub user abacabadabacaba and reported to the security team by Anton Gyllenberg. It is tracked as CVE-2020-14040. Cheers, Katie for the Go team
2020-06-17 07:57:05 -04:00
parent 537a602195
commit 3cef97e8b5
42 changed files with 11138 additions and 31 deletions
--- a/vendor/golang.org/x/text/encoding/unicode/unicode.go
+++ b/vendor/golang.org/x/text/encoding/unicode/unicode.go
@@ -6,6 +6,7 @@
 package unicode // import "golang.org/x/text/encoding/unicode"

 import (
+	"bytes"
 	"errors"
 	"unicode/utf16"
 	"unicode/utf8"
@@ -25,15 +26,95 @@ import (
 // the introduction of some kind of error type for conveying the erroneous code
 // point.

-// UTF8 is the UTF-8 encoding.
+// UTF8 is the UTF-8 encoding. It neither removes nor adds byte order marks.
 var UTF8 encoding.Encoding = utf8enc

+// UTF8BOM is an UTF-8 encoding where the decoder strips a leading byte order
+// mark while the encoder adds one.
+//
+// Some editors add a byte order mark as a signature to UTF-8 files. Although
+// the byte order mark is not useful for detecting byte order in UTF-8, it is
+// sometimes used as a convention to mark UTF-8-encoded files. This relies on
+// the observation that the UTF-8 byte order mark is either an illegal or at
+// least very unlikely sequence in any other character encoding.
+var UTF8BOM encoding.Encoding = utf8bomEncoding{}
+
+type utf8bomEncoding struct{}
+
+func (utf8bomEncoding) String() string {
+	return "UTF-8-BOM"
+}
+
+func (utf8bomEncoding) ID() (identifier.MIB, string) {
+	return identifier.Unofficial, "x-utf8bom"
+}
+
+func (utf8bomEncoding) NewEncoder() *encoding.Encoder {
+	return &encoding.Encoder{
+		Transformer: &utf8bomEncoder{t: runes.ReplaceIllFormed()},
+	}
+}
+
+func (utf8bomEncoding) NewDecoder() *encoding.Decoder {
+	return &encoding.Decoder{Transformer: &utf8bomDecoder{}}
+}
+
 var utf8enc = &internal.Encoding{
 	&internal.SimpleEncoding{utf8Decoder{}, runes.ReplaceIllFormed()},
 	"UTF-8",
 	identifier.UTF8,
 }

+type utf8bomDecoder struct {
+	checked bool
+}
+
+func (t *utf8bomDecoder) Reset() {
+	t.checked = false
+}
+
+func (t *utf8bomDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
+	if !t.checked {
+		if !atEOF && len(src) < len(utf8BOM) {
+			if len(src) == 0 {
+				return 0, 0, nil
+			}
+			return 0, 0, transform.ErrShortSrc
+		}
+		if bytes.HasPrefix(src, []byte(utf8BOM)) {
+			nSrc += len(utf8BOM)
+			src = src[len(utf8BOM):]
+		}
+		t.checked = true
+	}
+	nDst, n, err := utf8Decoder.Transform(utf8Decoder{}, dst[nDst:], src, atEOF)
+	nSrc += n
+	return nDst, nSrc, err
+}
+
+type utf8bomEncoder struct {
+	written bool
+	t       transform.Transformer
+}
+
+func (t *utf8bomEncoder) Reset() {
+	t.written = false
+	t.t.Reset()
+}
+
+func (t *utf8bomEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
+	if !t.written {
+		if len(dst) < len(utf8BOM) {
+			return nDst, 0, transform.ErrShortDst
+		}
+		nDst = copy(dst, utf8BOM)
+		t.written = true
+	}
+	n, nSrc, err := utf8Decoder.Transform(utf8Decoder{}, dst[nDst:], src, atEOF)
+	nDst += n
+	return nDst, nSrc, err
+}
+
 type utf8Decoder struct{ transform.NopResetter }

 func (utf8Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
@@ -287,16 +368,13 @@ func (u *utf16Decoder) Reset() {
 }

 func (u *utf16Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
+	if len(src) < 2 && atEOF && u.current.bomPolicy&requireBOM != 0 {
+		return 0, 0, ErrMissingBOM
+	}
 	if len(src) == 0 {
-		if atEOF && u.current.bomPolicy&requireBOM != 0 {
-			return 0, 0, ErrMissingBOM
-		}
 		return 0, 0, nil
 	}
-	if u.current.bomPolicy&acceptBOM != 0 {
-		if len(src) < 2 {
-			return 0, 0, transform.ErrShortSrc
-		}
+	if len(src) >= 2 && u.current.bomPolicy&acceptBOM != 0 {
 		switch {
 		case src[0] == 0xfe && src[1] == 0xff:
 			u.current.endianness = BigEndian