Merge pull request #123988 from skitt/use-cases-titler
code-generator: use cases.Title instead of strings.Title
This commit is contained in:
		| @@ -21,6 +21,8 @@ import ( | ||||
| 	"path" | ||||
| 	"strings" | ||||
|  | ||||
| 	"golang.org/x/text/cases" | ||||
| 	"golang.org/x/text/language" | ||||
| 	"k8s.io/gengo/v2/generator" | ||||
| 	"k8s.io/gengo/v2/namer" | ||||
| 	"k8s.io/gengo/v2/types" | ||||
| @@ -43,6 +45,8 @@ type genFakeForType struct { | ||||
|  | ||||
| var _ generator.Generator = &genFakeForType{} | ||||
|  | ||||
| var titler = cases.Title(language.Und) | ||||
|  | ||||
| // Filter ignores all but one type because we're making a single file per type. | ||||
| func (g *genFakeForType) Filter(c *generator.Context, t *types.Type) bool { return t == g.typeToMatch } | ||||
|  | ||||
| @@ -299,9 +303,7 @@ func (g *genFakeForType) GenerateType(c *generator.Context, t *types.Type, w io. | ||||
| // TODO: Make the verbs in templates parametrized so the strings.Replace() is | ||||
| // not needed. | ||||
| func adjustTemplate(name, verbType, template string) string { | ||||
| 	//nolint:staticcheck | ||||
| 	// TODO: convert this to use golang.org/x/text/cases | ||||
| 	return strings.ReplaceAll(template, " "+strings.Title(verbType), " "+name) | ||||
| 	return strings.ReplaceAll(template, " "+titler.String(verbType), " "+name) | ||||
| } | ||||
|  | ||||
| // template for the struct that implements the type's interface | ||||
|   | ||||
| @@ -21,6 +21,8 @@ import ( | ||||
| 	"path" | ||||
| 	"strings" | ||||
|  | ||||
| 	"golang.org/x/text/cases" | ||||
| 	"golang.org/x/text/language" | ||||
| 	"k8s.io/gengo/v2/generator" | ||||
| 	"k8s.io/gengo/v2/namer" | ||||
| 	"k8s.io/gengo/v2/types" | ||||
| @@ -44,6 +46,8 @@ type genClientForType struct { | ||||
|  | ||||
| var _ generator.Generator = &genClientForType{} | ||||
|  | ||||
| var titler = cases.Title(language.Und) | ||||
|  | ||||
| // Filter ignores all but one type because we're making a single file per type. | ||||
| func (g *genClientForType) Filter(c *generator.Context, t *types.Type) bool { | ||||
| 	return t == g.typeToMatch | ||||
| @@ -119,13 +123,9 @@ func (g *genClientForType) GenerateType(c *generator.Context, t *types.Type, w i | ||||
| 		} | ||||
| 		var updatedVerbtemplate string | ||||
| 		if _, exists := subresourceDefaultVerbTemplates[e.VerbType]; e.IsSubresource() && exists { | ||||
| 			//nolint:staticcheck | ||||
| 			// TODO: convert this to use golang.org/x/text/cases | ||||
| 			updatedVerbtemplate = e.VerbName + "(" + strings.TrimPrefix(subresourceDefaultVerbTemplates[e.VerbType], strings.Title(e.VerbType)+"(") | ||||
| 			updatedVerbtemplate = e.VerbName + "(" + strings.TrimPrefix(subresourceDefaultVerbTemplates[e.VerbType], titler.String(e.VerbType)+"(") | ||||
| 		} else { | ||||
| 			//nolint:staticcheck | ||||
| 			// TODO: convert this to use golang.org/x/text/cases | ||||
| 			updatedVerbtemplate = e.VerbName + "(" + strings.TrimPrefix(defaultVerbTemplates[e.VerbType], strings.Title(e.VerbType)+"(") | ||||
| 			updatedVerbtemplate = e.VerbName + "(" + strings.TrimPrefix(defaultVerbTemplates[e.VerbType], titler.String(e.VerbType)+"(") | ||||
| 		} | ||||
| 		extendedMethod := extendedInterfaceMethod{ | ||||
| 			template: updatedVerbtemplate, | ||||
| @@ -348,9 +348,7 @@ func (g *genClientForType) GenerateType(c *generator.Context, t *types.Type, w i | ||||
| // TODO: Make the verbs in templates parametrized so the strings.Replace() is | ||||
| // not needed. | ||||
| func adjustTemplate(name, verbType, template string) string { | ||||
| 	//nolint:staticcheck | ||||
| 	// TODO: convert this to use golang.org/x/text/cases | ||||
| 	return strings.ReplaceAll(template, " "+strings.Title(verbType), " "+name) | ||||
| 	return strings.ReplaceAll(template, " "+titler.String(verbType), " "+name) | ||||
| } | ||||
|  | ||||
| func generateInterface(defaultVerbTemplates map[string]string, tags util.Tags) string { | ||||
|   | ||||
| @@ -11,6 +11,7 @@ require ( | ||||
| 	github.com/google/go-cmp v0.6.0 | ||||
| 	github.com/google/gofuzz v1.2.0 | ||||
| 	github.com/spf13/pflag v1.0.5 | ||||
| 	golang.org/x/text v0.14.0 | ||||
| 	gopkg.in/yaml.v2 v2.4.0 | ||||
| 	k8s.io/apimachinery v0.0.0 | ||||
| 	k8s.io/gengo/v2 v2.0.0-20240228010128-51d4e06bde70 | ||||
|   | ||||
							
								
								
									
										162
									
								
								vendor/golang.org/x/text/cases/cases.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										162
									
								
								vendor/golang.org/x/text/cases/cases.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,162 @@ | ||||
| // Copyright 2014 The Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
|  | ||||
| //go:generate go run gen.go gen_trieval.go | ||||
|  | ||||
| // Package cases provides general and language-specific case mappers. | ||||
| package cases // import "golang.org/x/text/cases" | ||||
|  | ||||
| import ( | ||||
| 	"golang.org/x/text/language" | ||||
| 	"golang.org/x/text/transform" | ||||
| ) | ||||
|  | ||||
| // References: | ||||
| // - Unicode Reference Manual Chapter 3.13, 4.2, and 5.18. | ||||
| // - https://www.unicode.org/reports/tr29/ | ||||
| // - https://www.unicode.org/Public/6.3.0/ucd/CaseFolding.txt | ||||
| // - https://www.unicode.org/Public/6.3.0/ucd/SpecialCasing.txt | ||||
| // - https://www.unicode.org/Public/6.3.0/ucd/DerivedCoreProperties.txt | ||||
| // - https://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakProperty.txt | ||||
| // - https://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakTest.txt | ||||
| // - http://userguide.icu-project.org/transforms/casemappings | ||||
|  | ||||
| // TODO: | ||||
| // - Case folding | ||||
| // - Wide and Narrow? | ||||
| // - Segmenter option for title casing. | ||||
| // - ASCII fast paths | ||||
| // - Encode Soft-Dotted property within trie somehow. | ||||
|  | ||||
| // A Caser transforms given input to a certain case. It implements | ||||
| // transform.Transformer. | ||||
| // | ||||
| // A Caser may be stateful and should therefore not be shared between | ||||
| // goroutines. | ||||
| type Caser struct { | ||||
| 	t transform.SpanningTransformer | ||||
| } | ||||
|  | ||||
| // Bytes returns a new byte slice with the result of converting b to the case | ||||
| // form implemented by c. | ||||
| func (c Caser) Bytes(b []byte) []byte { | ||||
| 	b, _, _ = transform.Bytes(c.t, b) | ||||
| 	return b | ||||
| } | ||||
|  | ||||
| // String returns a string with the result of transforming s to the case form | ||||
| // implemented by c. | ||||
| func (c Caser) String(s string) string { | ||||
| 	s, _, _ = transform.String(c.t, s) | ||||
| 	return s | ||||
| } | ||||
|  | ||||
| // Reset resets the Caser to be reused for new input after a previous call to | ||||
| // Transform. | ||||
| func (c Caser) Reset() { c.t.Reset() } | ||||
|  | ||||
| // Transform implements the transform.Transformer interface and transforms the | ||||
| // given input to the case form implemented by c. | ||||
| func (c Caser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { | ||||
| 	return c.t.Transform(dst, src, atEOF) | ||||
| } | ||||
|  | ||||
| // Span implements the transform.SpanningTransformer interface. | ||||
| func (c Caser) Span(src []byte, atEOF bool) (n int, err error) { | ||||
| 	return c.t.Span(src, atEOF) | ||||
| } | ||||
|  | ||||
| // Upper returns a Caser for language-specific uppercasing. | ||||
| func Upper(t language.Tag, opts ...Option) Caser { | ||||
| 	return Caser{makeUpper(t, getOpts(opts...))} | ||||
| } | ||||
|  | ||||
| // Lower returns a Caser for language-specific lowercasing. | ||||
| func Lower(t language.Tag, opts ...Option) Caser { | ||||
| 	return Caser{makeLower(t, getOpts(opts...))} | ||||
| } | ||||
|  | ||||
| // Title returns a Caser for language-specific title casing. It uses an | ||||
| // approximation of the default Unicode Word Break algorithm. | ||||
| func Title(t language.Tag, opts ...Option) Caser { | ||||
| 	return Caser{makeTitle(t, getOpts(opts...))} | ||||
| } | ||||
|  | ||||
| // Fold returns a Caser that implements Unicode case folding. The returned Caser | ||||
| // is stateless and safe to use concurrently by multiple goroutines. | ||||
| // | ||||
| // Case folding does not normalize the input and may not preserve a normal form. | ||||
| // Use the collate or search package for more convenient and linguistically | ||||
| // sound comparisons. Use golang.org/x/text/secure/precis for string comparisons | ||||
| // where security aspects are a concern. | ||||
| func Fold(opts ...Option) Caser { | ||||
| 	return Caser{makeFold(getOpts(opts...))} | ||||
| } | ||||
|  | ||||
| // An Option is used to modify the behavior of a Caser. | ||||
| type Option func(o options) options | ||||
|  | ||||
| // TODO: consider these options to take a boolean as well, like FinalSigma. | ||||
| // The advantage of using this approach is that other providers of a lower-case | ||||
| // algorithm could set different defaults by prefixing a user-provided slice | ||||
| // of options with their own. This is handy, for instance, for the precis | ||||
| // package which would override the default to not handle the Greek final sigma. | ||||
|  | ||||
| var ( | ||||
| 	// NoLower disables the lowercasing of non-leading letters for a title | ||||
| 	// caser. | ||||
| 	NoLower Option = noLower | ||||
|  | ||||
| 	// Compact omits mappings in case folding for characters that would grow the | ||||
| 	// input. (Unimplemented.) | ||||
| 	Compact Option = compact | ||||
| ) | ||||
|  | ||||
| // TODO: option to preserve a normal form, if applicable? | ||||
|  | ||||
| type options struct { | ||||
| 	noLower bool | ||||
| 	simple  bool | ||||
|  | ||||
| 	// TODO: segmenter, max ignorable, alternative versions, etc. | ||||
|  | ||||
| 	ignoreFinalSigma bool | ||||
| } | ||||
|  | ||||
| func getOpts(o ...Option) (res options) { | ||||
| 	for _, f := range o { | ||||
| 		res = f(res) | ||||
| 	} | ||||
| 	return | ||||
| } | ||||
|  | ||||
| func noLower(o options) options { | ||||
| 	o.noLower = true | ||||
| 	return o | ||||
| } | ||||
|  | ||||
| func compact(o options) options { | ||||
| 	o.simple = true | ||||
| 	return o | ||||
| } | ||||
|  | ||||
| // HandleFinalSigma specifies whether the special handling of Greek final sigma | ||||
| // should be enabled. Unicode prescribes handling the Greek final sigma for all | ||||
| // locales, but standards like IDNA and PRECIS override this default. | ||||
| func HandleFinalSigma(enable bool) Option { | ||||
| 	if enable { | ||||
| 		return handleFinalSigma | ||||
| 	} | ||||
| 	return ignoreFinalSigma | ||||
| } | ||||
|  | ||||
| func ignoreFinalSigma(o options) options { | ||||
| 	o.ignoreFinalSigma = true | ||||
| 	return o | ||||
| } | ||||
|  | ||||
| func handleFinalSigma(o options) options { | ||||
| 	o.ignoreFinalSigma = false | ||||
| 	return o | ||||
| } | ||||
							
								
								
									
										376
									
								
								vendor/golang.org/x/text/cases/context.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										376
									
								
								vendor/golang.org/x/text/cases/context.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,376 @@ | ||||
| // Copyright 2014 The Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
|  | ||||
| package cases | ||||
|  | ||||
| import "golang.org/x/text/transform" | ||||
|  | ||||
| // A context is used for iterating over source bytes, fetching case info and | ||||
| // writing to a destination buffer. | ||||
| // | ||||
| // Casing operations may need more than one rune of context to decide how a rune | ||||
| // should be cased. Casing implementations should call checkpoint on context | ||||
| // whenever it is known to be safe to return the runes processed so far. | ||||
| // | ||||
| // It is recommended for implementations to not allow for more than 30 case | ||||
| // ignorables as lookahead (analogous to the limit in norm) and to use state if | ||||
| // unbounded lookahead is needed for cased runes. | ||||
| type context struct { | ||||
| 	dst, src []byte | ||||
| 	atEOF    bool | ||||
|  | ||||
| 	pDst int // pDst points past the last written rune in dst. | ||||
| 	pSrc int // pSrc points to the start of the currently scanned rune. | ||||
|  | ||||
| 	// checkpoints safe to return in Transform, where nDst <= pDst and nSrc <= pSrc. | ||||
| 	nDst, nSrc int | ||||
| 	err        error | ||||
|  | ||||
| 	sz   int  // size of current rune | ||||
| 	info info // case information of currently scanned rune | ||||
|  | ||||
| 	// State preserved across calls to Transform. | ||||
| 	isMidWord bool // false if next cased letter needs to be title-cased. | ||||
| } | ||||
|  | ||||
| func (c *context) Reset() { | ||||
| 	c.isMidWord = false | ||||
| } | ||||
|  | ||||
| // ret returns the return values for the Transform method. It checks whether | ||||
| // there were insufficient bytes in src to complete and introduces an error | ||||
| // accordingly, if necessary. | ||||
| func (c *context) ret() (nDst, nSrc int, err error) { | ||||
| 	if c.err != nil || c.nSrc == len(c.src) { | ||||
| 		return c.nDst, c.nSrc, c.err | ||||
| 	} | ||||
| 	// This point is only reached by mappers if there was no short destination | ||||
| 	// buffer. This means that the source buffer was exhausted and that c.sz was | ||||
| 	// set to 0 by next. | ||||
| 	if c.atEOF && c.pSrc == len(c.src) { | ||||
| 		return c.pDst, c.pSrc, nil | ||||
| 	} | ||||
| 	return c.nDst, c.nSrc, transform.ErrShortSrc | ||||
| } | ||||
|  | ||||
| // retSpan returns the return values for the Span method. It checks whether | ||||
| // there were insufficient bytes in src to complete and introduces an error | ||||
| // accordingly, if necessary. | ||||
| func (c *context) retSpan() (n int, err error) { | ||||
| 	_, nSrc, err := c.ret() | ||||
| 	return nSrc, err | ||||
| } | ||||
|  | ||||
| // checkpoint sets the return value buffer points for Transform to the current | ||||
| // positions. | ||||
| func (c *context) checkpoint() { | ||||
| 	if c.err == nil { | ||||
| 		c.nDst, c.nSrc = c.pDst, c.pSrc+c.sz | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // unreadRune causes the last rune read by next to be reread on the next | ||||
| // invocation of next. Only one unreadRune may be called after a call to next. | ||||
| func (c *context) unreadRune() { | ||||
| 	c.sz = 0 | ||||
| } | ||||
|  | ||||
| func (c *context) next() bool { | ||||
| 	c.pSrc += c.sz | ||||
| 	if c.pSrc == len(c.src) || c.err != nil { | ||||
| 		c.info, c.sz = 0, 0 | ||||
| 		return false | ||||
| 	} | ||||
| 	v, sz := trie.lookup(c.src[c.pSrc:]) | ||||
| 	c.info, c.sz = info(v), sz | ||||
| 	if c.sz == 0 { | ||||
| 		if c.atEOF { | ||||
| 			// A zero size means we have an incomplete rune. If we are atEOF, | ||||
| 			// this means it is an illegal rune, which we will consume one | ||||
| 			// byte at a time. | ||||
| 			c.sz = 1 | ||||
| 		} else { | ||||
| 			c.err = transform.ErrShortSrc | ||||
| 			return false | ||||
| 		} | ||||
| 	} | ||||
| 	return true | ||||
| } | ||||
|  | ||||
| // writeBytes adds bytes to dst. | ||||
| func (c *context) writeBytes(b []byte) bool { | ||||
| 	if len(c.dst)-c.pDst < len(b) { | ||||
| 		c.err = transform.ErrShortDst | ||||
| 		return false | ||||
| 	} | ||||
| 	// This loop is faster than using copy. | ||||
| 	for _, ch := range b { | ||||
| 		c.dst[c.pDst] = ch | ||||
| 		c.pDst++ | ||||
| 	} | ||||
| 	return true | ||||
| } | ||||
|  | ||||
| // writeString writes the given string to dst. | ||||
| func (c *context) writeString(s string) bool { | ||||
| 	if len(c.dst)-c.pDst < len(s) { | ||||
| 		c.err = transform.ErrShortDst | ||||
| 		return false | ||||
| 	} | ||||
| 	// This loop is faster than using copy. | ||||
| 	for i := 0; i < len(s); i++ { | ||||
| 		c.dst[c.pDst] = s[i] | ||||
| 		c.pDst++ | ||||
| 	} | ||||
| 	return true | ||||
| } | ||||
|  | ||||
| // copy writes the current rune to dst. | ||||
| func (c *context) copy() bool { | ||||
| 	return c.writeBytes(c.src[c.pSrc : c.pSrc+c.sz]) | ||||
| } | ||||
|  | ||||
| // copyXOR copies the current rune to dst and modifies it by applying the XOR | ||||
| // pattern of the case info. It is the responsibility of the caller to ensure | ||||
| // that this is a rune with a XOR pattern defined. | ||||
| func (c *context) copyXOR() bool { | ||||
| 	if !c.copy() { | ||||
| 		return false | ||||
| 	} | ||||
| 	if c.info&xorIndexBit == 0 { | ||||
| 		// Fast path for 6-bit XOR pattern, which covers most cases. | ||||
| 		c.dst[c.pDst-1] ^= byte(c.info >> xorShift) | ||||
| 	} else { | ||||
| 		// Interpret XOR bits as an index. | ||||
| 		// TODO: test performance for unrolling this loop. Verify that we have | ||||
| 		// at least two bytes and at most three. | ||||
| 		idx := c.info >> xorShift | ||||
| 		for p := c.pDst - 1; ; p-- { | ||||
| 			c.dst[p] ^= xorData[idx] | ||||
| 			idx-- | ||||
| 			if xorData[idx] == 0 { | ||||
| 				break | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	return true | ||||
| } | ||||
|  | ||||
| // hasPrefix returns true if src[pSrc:] starts with the given string. | ||||
| func (c *context) hasPrefix(s string) bool { | ||||
| 	b := c.src[c.pSrc:] | ||||
| 	if len(b) < len(s) { | ||||
| 		return false | ||||
| 	} | ||||
| 	for i, c := range b[:len(s)] { | ||||
| 		if c != s[i] { | ||||
| 			return false | ||||
| 		} | ||||
| 	} | ||||
| 	return true | ||||
| } | ||||
|  | ||||
| // caseType returns an info with only the case bits, normalized to either | ||||
| // cLower, cUpper, cTitle or cUncased. | ||||
| func (c *context) caseType() info { | ||||
| 	cm := c.info & 0x7 | ||||
| 	if cm < 4 { | ||||
| 		return cm | ||||
| 	} | ||||
| 	if cm >= cXORCase { | ||||
| 		// xor the last bit of the rune with the case type bits. | ||||
| 		b := c.src[c.pSrc+c.sz-1] | ||||
| 		return info(b&1) ^ cm&0x3 | ||||
| 	} | ||||
| 	if cm == cIgnorableCased { | ||||
| 		return cLower | ||||
| 	} | ||||
| 	return cUncased | ||||
| } | ||||
|  | ||||
| // lower writes the lowercase version of the current rune to dst. | ||||
| func lower(c *context) bool { | ||||
| 	ct := c.caseType() | ||||
| 	if c.info&hasMappingMask == 0 || ct == cLower { | ||||
| 		return c.copy() | ||||
| 	} | ||||
| 	if c.info&exceptionBit == 0 { | ||||
| 		return c.copyXOR() | ||||
| 	} | ||||
| 	e := exceptions[c.info>>exceptionShift:] | ||||
| 	offset := 2 + e[0]&lengthMask // size of header + fold string | ||||
| 	if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange { | ||||
| 		return c.writeString(e[offset : offset+nLower]) | ||||
| 	} | ||||
| 	return c.copy() | ||||
| } | ||||
|  | ||||
| func isLower(c *context) bool { | ||||
| 	ct := c.caseType() | ||||
| 	if c.info&hasMappingMask == 0 || ct == cLower { | ||||
| 		return true | ||||
| 	} | ||||
| 	if c.info&exceptionBit == 0 { | ||||
| 		c.err = transform.ErrEndOfSpan | ||||
| 		return false | ||||
| 	} | ||||
| 	e := exceptions[c.info>>exceptionShift:] | ||||
| 	if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange { | ||||
| 		c.err = transform.ErrEndOfSpan | ||||
| 		return false | ||||
| 	} | ||||
| 	return true | ||||
| } | ||||
|  | ||||
| // upper writes the uppercase version of the current rune to dst. | ||||
| func upper(c *context) bool { | ||||
| 	ct := c.caseType() | ||||
| 	if c.info&hasMappingMask == 0 || ct == cUpper { | ||||
| 		return c.copy() | ||||
| 	} | ||||
| 	if c.info&exceptionBit == 0 { | ||||
| 		return c.copyXOR() | ||||
| 	} | ||||
| 	e := exceptions[c.info>>exceptionShift:] | ||||
| 	offset := 2 + e[0]&lengthMask // size of header + fold string | ||||
| 	// Get length of first special case mapping. | ||||
| 	n := (e[1] >> lengthBits) & lengthMask | ||||
| 	if ct == cTitle { | ||||
| 		// The first special case mapping is for lower. Set n to the second. | ||||
| 		if n == noChange { | ||||
| 			n = 0 | ||||
| 		} | ||||
| 		n, e = e[1]&lengthMask, e[n:] | ||||
| 	} | ||||
| 	if n != noChange { | ||||
| 		return c.writeString(e[offset : offset+n]) | ||||
| 	} | ||||
| 	return c.copy() | ||||
| } | ||||
|  | ||||
| // isUpper writes the isUppercase version of the current rune to dst. | ||||
| func isUpper(c *context) bool { | ||||
| 	ct := c.caseType() | ||||
| 	if c.info&hasMappingMask == 0 || ct == cUpper { | ||||
| 		return true | ||||
| 	} | ||||
| 	if c.info&exceptionBit == 0 { | ||||
| 		c.err = transform.ErrEndOfSpan | ||||
| 		return false | ||||
| 	} | ||||
| 	e := exceptions[c.info>>exceptionShift:] | ||||
| 	// Get length of first special case mapping. | ||||
| 	n := (e[1] >> lengthBits) & lengthMask | ||||
| 	if ct == cTitle { | ||||
| 		n = e[1] & lengthMask | ||||
| 	} | ||||
| 	if n != noChange { | ||||
| 		c.err = transform.ErrEndOfSpan | ||||
| 		return false | ||||
| 	} | ||||
| 	return true | ||||
| } | ||||
|  | ||||
| // title writes the title case version of the current rune to dst. | ||||
| func title(c *context) bool { | ||||
| 	ct := c.caseType() | ||||
| 	if c.info&hasMappingMask == 0 || ct == cTitle { | ||||
| 		return c.copy() | ||||
| 	} | ||||
| 	if c.info&exceptionBit == 0 { | ||||
| 		if ct == cLower { | ||||
| 			return c.copyXOR() | ||||
| 		} | ||||
| 		return c.copy() | ||||
| 	} | ||||
| 	// Get the exception data. | ||||
| 	e := exceptions[c.info>>exceptionShift:] | ||||
| 	offset := 2 + e[0]&lengthMask // size of header + fold string | ||||
|  | ||||
| 	nFirst := (e[1] >> lengthBits) & lengthMask | ||||
| 	if nTitle := e[1] & lengthMask; nTitle != noChange { | ||||
| 		if nFirst != noChange { | ||||
| 			e = e[nFirst:] | ||||
| 		} | ||||
| 		return c.writeString(e[offset : offset+nTitle]) | ||||
| 	} | ||||
| 	if ct == cLower && nFirst != noChange { | ||||
| 		// Use the uppercase version instead. | ||||
| 		return c.writeString(e[offset : offset+nFirst]) | ||||
| 	} | ||||
| 	// Already in correct case. | ||||
| 	return c.copy() | ||||
| } | ||||
|  | ||||
| // isTitle reports whether the current rune is in title case. | ||||
| func isTitle(c *context) bool { | ||||
| 	ct := c.caseType() | ||||
| 	if c.info&hasMappingMask == 0 || ct == cTitle { | ||||
| 		return true | ||||
| 	} | ||||
| 	if c.info&exceptionBit == 0 { | ||||
| 		if ct == cLower { | ||||
| 			c.err = transform.ErrEndOfSpan | ||||
| 			return false | ||||
| 		} | ||||
| 		return true | ||||
| 	} | ||||
| 	// Get the exception data. | ||||
| 	e := exceptions[c.info>>exceptionShift:] | ||||
| 	if nTitle := e[1] & lengthMask; nTitle != noChange { | ||||
| 		c.err = transform.ErrEndOfSpan | ||||
| 		return false | ||||
| 	} | ||||
| 	nFirst := (e[1] >> lengthBits) & lengthMask | ||||
| 	if ct == cLower && nFirst != noChange { | ||||
| 		c.err = transform.ErrEndOfSpan | ||||
| 		return false | ||||
| 	} | ||||
| 	return true | ||||
| } | ||||
|  | ||||
| // foldFull writes the foldFull version of the current rune to dst. | ||||
| func foldFull(c *context) bool { | ||||
| 	if c.info&hasMappingMask == 0 { | ||||
| 		return c.copy() | ||||
| 	} | ||||
| 	ct := c.caseType() | ||||
| 	if c.info&exceptionBit == 0 { | ||||
| 		if ct != cLower || c.info&inverseFoldBit != 0 { | ||||
| 			return c.copyXOR() | ||||
| 		} | ||||
| 		return c.copy() | ||||
| 	} | ||||
| 	e := exceptions[c.info>>exceptionShift:] | ||||
| 	n := e[0] & lengthMask | ||||
| 	if n == 0 { | ||||
| 		if ct == cLower { | ||||
| 			return c.copy() | ||||
| 		} | ||||
| 		n = (e[1] >> lengthBits) & lengthMask | ||||
| 	} | ||||
| 	return c.writeString(e[2 : 2+n]) | ||||
| } | ||||
|  | ||||
| // isFoldFull reports whether the current run is mapped to foldFull | ||||
| func isFoldFull(c *context) bool { | ||||
| 	if c.info&hasMappingMask == 0 { | ||||
| 		return true | ||||
| 	} | ||||
| 	ct := c.caseType() | ||||
| 	if c.info&exceptionBit == 0 { | ||||
| 		if ct != cLower || c.info&inverseFoldBit != 0 { | ||||
| 			c.err = transform.ErrEndOfSpan | ||||
| 			return false | ||||
| 		} | ||||
| 		return true | ||||
| 	} | ||||
| 	e := exceptions[c.info>>exceptionShift:] | ||||
| 	n := e[0] & lengthMask | ||||
| 	if n == 0 && ct == cLower { | ||||
| 		return true | ||||
| 	} | ||||
| 	c.err = transform.ErrEndOfSpan | ||||
| 	return false | ||||
| } | ||||
							
								
								
									
										34
									
								
								vendor/golang.org/x/text/cases/fold.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										34
									
								
								vendor/golang.org/x/text/cases/fold.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,34 @@ | ||||
| // Copyright 2016 The Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
|  | ||||
| package cases | ||||
|  | ||||
| import "golang.org/x/text/transform" | ||||
|  | ||||
| type caseFolder struct{ transform.NopResetter } | ||||
|  | ||||
| // caseFolder implements the Transformer interface for doing case folding. | ||||
| func (t *caseFolder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { | ||||
| 	c := context{dst: dst, src: src, atEOF: atEOF} | ||||
| 	for c.next() { | ||||
| 		foldFull(&c) | ||||
| 		c.checkpoint() | ||||
| 	} | ||||
| 	return c.ret() | ||||
| } | ||||
|  | ||||
| func (t *caseFolder) Span(src []byte, atEOF bool) (n int, err error) { | ||||
| 	c := context{src: src, atEOF: atEOF} | ||||
| 	for c.next() && isFoldFull(&c) { | ||||
| 		c.checkpoint() | ||||
| 	} | ||||
| 	return c.retSpan() | ||||
| } | ||||
|  | ||||
| func makeFold(o options) transform.SpanningTransformer { | ||||
| 	// TODO: Special case folding, through option Language, Special/Turkic, or | ||||
| 	// both. | ||||
| 	// TODO: Implement Compact options. | ||||
| 	return &caseFolder{} | ||||
| } | ||||
							
								
								
									
										61
									
								
								vendor/golang.org/x/text/cases/icu.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										61
									
								
								vendor/golang.org/x/text/cases/icu.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,61 @@ | ||||
| // Copyright 2016 The Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
|  | ||||
| //go:build icu | ||||
|  | ||||
| package cases | ||||
|  | ||||
| // Ideally these functions would be defined in a test file, but go test doesn't | ||||
| // allow CGO in tests. The build tag should ensure either way that these | ||||
| // functions will not end up in the package. | ||||
|  | ||||
| // TODO: Ensure that the correct ICU version is set. | ||||
|  | ||||
| /* | ||||
| #cgo LDFLAGS: -licui18n.57 -licuuc.57 | ||||
| #include <stdlib.h> | ||||
| #include <unicode/ustring.h> | ||||
| #include <unicode/utypes.h> | ||||
| #include <unicode/localpointer.h> | ||||
| #include <unicode/ucasemap.h> | ||||
| */ | ||||
| import "C" | ||||
|  | ||||
| import "unsafe" | ||||
|  | ||||
| func doICU(tag, caser, input string) string { | ||||
| 	err := C.UErrorCode(0) | ||||
| 	loc := C.CString(tag) | ||||
| 	cm := C.ucasemap_open(loc, C.uint32_t(0), &err) | ||||
|  | ||||
| 	buf := make([]byte, len(input)*4) | ||||
| 	dst := (*C.char)(unsafe.Pointer(&buf[0])) | ||||
| 	src := C.CString(input) | ||||
|  | ||||
| 	cn := C.int32_t(0) | ||||
|  | ||||
| 	switch caser { | ||||
| 	case "fold": | ||||
| 		cn = C.ucasemap_utf8FoldCase(cm, | ||||
| 			dst, C.int32_t(len(buf)), | ||||
| 			src, C.int32_t(len(input)), | ||||
| 			&err) | ||||
| 	case "lower": | ||||
| 		cn = C.ucasemap_utf8ToLower(cm, | ||||
| 			dst, C.int32_t(len(buf)), | ||||
| 			src, C.int32_t(len(input)), | ||||
| 			&err) | ||||
| 	case "upper": | ||||
| 		cn = C.ucasemap_utf8ToUpper(cm, | ||||
| 			dst, C.int32_t(len(buf)), | ||||
| 			src, C.int32_t(len(input)), | ||||
| 			&err) | ||||
| 	case "title": | ||||
| 		cn = C.ucasemap_utf8ToTitle(cm, | ||||
| 			dst, C.int32_t(len(buf)), | ||||
| 			src, C.int32_t(len(input)), | ||||
| 			&err) | ||||
| 	} | ||||
| 	return string(buf[:cn]) | ||||
| } | ||||
							
								
								
									
										82
									
								
								vendor/golang.org/x/text/cases/info.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										82
									
								
								vendor/golang.org/x/text/cases/info.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,82 @@ | ||||
| // Copyright 2015 The Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
|  | ||||
| package cases | ||||
|  | ||||
| func (c info) cccVal() info { | ||||
| 	if c&exceptionBit != 0 { | ||||
| 		return info(exceptions[c>>exceptionShift]) & cccMask | ||||
| 	} | ||||
| 	return c & cccMask | ||||
| } | ||||
|  | ||||
| func (c info) cccType() info { | ||||
| 	ccc := c.cccVal() | ||||
| 	if ccc <= cccZero { | ||||
| 		return cccZero | ||||
| 	} | ||||
| 	return ccc | ||||
| } | ||||
|  | ||||
| // TODO: Implement full Unicode breaking algorithm: | ||||
| // 1) Implement breaking in separate package. | ||||
| // 2) Use the breaker here. | ||||
| // 3) Compare table size and performance of using the more generic breaker. | ||||
| // | ||||
| // Note that we can extend the current algorithm to be much more accurate. This | ||||
| // only makes sense, though, if the performance and/or space penalty of using | ||||
| // the generic breaker is big. Extra data will only be needed for non-cased | ||||
| // runes, which means there are sufficient bits left in the caseType. | ||||
| // ICU prohibits breaking in such cases as well. | ||||
|  | ||||
| // For the purpose of title casing we use an approximation of the Unicode Word | ||||
| // Breaking algorithm defined in Annex #29: | ||||
| // https://www.unicode.org/reports/tr29/#Default_Grapheme_Cluster_Table. | ||||
| // | ||||
| // For our approximation, we group the Word Break types into the following | ||||
| // categories, with associated rules: | ||||
| // | ||||
| // 1) Letter: | ||||
| //    ALetter, Hebrew_Letter, Numeric, ExtendNumLet, Extend, Format_FE, ZWJ. | ||||
| //    Rule: Never break between consecutive runes of this category. | ||||
| // | ||||
| // 2) Mid: | ||||
| //    MidLetter, MidNumLet, Single_Quote. | ||||
| //    (Cf. case-ignorable: MidLetter, MidNumLet, Single_Quote or cat is Mn, | ||||
| //    Me, Cf, Lm or Sk). | ||||
| //    Rule: Don't break between Letter and Mid, but break between two Mids. | ||||
| // | ||||
| // 3) Break: | ||||
| //    Any other category: NewLine, MidNum, CR, LF, Double_Quote, Katakana, and | ||||
| //    Other. | ||||
| //    These categories should always result in a break between two cased letters. | ||||
| //    Rule: Always break. | ||||
| // | ||||
| // Note 1: the Katakana and MidNum categories can, in esoteric cases, result in | ||||
| // preventing a break between two cased letters. For now we will ignore this | ||||
| // (e.g. [ALetter] [ExtendNumLet] [Katakana] [ExtendNumLet] [ALetter] and | ||||
| // [ALetter] [Numeric] [MidNum] [Numeric] [ALetter].) | ||||
| // | ||||
| // Note 2: the rule for Mid is very approximate, but works in most cases. To | ||||
| // improve, we could store the categories in the trie value and use a FA to | ||||
| // manage breaks. See TODO comment above. | ||||
| // | ||||
| // Note 3: according to the spec, it is possible for the Extend category to | ||||
| // introduce breaks between other categories grouped in Letter. However, this | ||||
| // is undesirable for our purposes. ICU prevents breaks in such cases as well. | ||||
|  | ||||
| // isBreak returns whether this rune should introduce a break. | ||||
| func (c info) isBreak() bool { | ||||
| 	return c.cccVal() == cccBreak | ||||
| } | ||||
|  | ||||
| // isLetter returns whether the rune is of break type ALetter, Hebrew_Letter, | ||||
| // Numeric, ExtendNumLet, or Extend. | ||||
| func (c info) isLetter() bool { | ||||
| 	ccc := c.cccVal() | ||||
| 	if ccc == cccZero { | ||||
| 		return !c.isCaseIgnorable() | ||||
| 	} | ||||
| 	return ccc != cccBreak | ||||
| } | ||||
							
								
								
									
										816
									
								
								vendor/golang.org/x/text/cases/map.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										816
									
								
								vendor/golang.org/x/text/cases/map.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,816 @@ | ||||
| // Copyright 2014 The Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
|  | ||||
| package cases | ||||
|  | ||||
| // This file contains the definitions of case mappings for all supported | ||||
| // languages. The rules for the language-specific tailorings were taken and | ||||
| // modified from the CLDR transform definitions in common/transforms. | ||||
|  | ||||
| import ( | ||||
| 	"strings" | ||||
| 	"unicode" | ||||
| 	"unicode/utf8" | ||||
|  | ||||
| 	"golang.org/x/text/internal" | ||||
| 	"golang.org/x/text/language" | ||||
| 	"golang.org/x/text/transform" | ||||
| 	"golang.org/x/text/unicode/norm" | ||||
| ) | ||||
|  | ||||
| // A mapFunc takes a context set to the current rune and writes the mapped | ||||
| // version to the same context. It may advance the context to the next rune. It | ||||
| // returns whether a checkpoint is possible: whether the pDst bytes written to | ||||
| // dst so far won't need changing as we see more source bytes. | ||||
| type mapFunc func(*context) bool | ||||
|  | ||||
| // A spanFunc takes a context set to the current rune and returns whether this | ||||
| // rune would be altered when written to the output. It may advance the context | ||||
| // to the next rune. It returns whether a checkpoint is possible. | ||||
| type spanFunc func(*context) bool | ||||
|  | ||||
| // maxIgnorable defines the maximum number of ignorables to consider for | ||||
| // lookahead operations. | ||||
| const maxIgnorable = 30 | ||||
|  | ||||
| // supported lists the language tags for which we have tailorings. | ||||
| const supported = "und af az el lt nl tr" | ||||
|  | ||||
| func init() { | ||||
| 	tags := []language.Tag{} | ||||
| 	for _, s := range strings.Split(supported, " ") { | ||||
| 		tags = append(tags, language.MustParse(s)) | ||||
| 	} | ||||
| 	matcher = internal.NewInheritanceMatcher(tags) | ||||
| 	Supported = language.NewCoverage(tags) | ||||
| } | ||||
|  | ||||
| var ( | ||||
| 	matcher *internal.InheritanceMatcher | ||||
|  | ||||
| 	Supported language.Coverage | ||||
|  | ||||
| 	// We keep the following lists separate, instead of having a single per- | ||||
| 	// language struct, to give the compiler a chance to remove unused code. | ||||
|  | ||||
| 	// Some uppercase mappers are stateless, so we can precompute the | ||||
| 	// Transformers and save a bit on runtime allocations. | ||||
| 	upperFunc = []struct { | ||||
| 		upper mapFunc | ||||
| 		span  spanFunc | ||||
| 	}{ | ||||
| 		{nil, nil},                  // und | ||||
| 		{nil, nil},                  // af | ||||
| 		{aztrUpper(upper), isUpper}, // az | ||||
| 		{elUpper, noSpan},           // el | ||||
| 		{ltUpper(upper), noSpan},    // lt | ||||
| 		{nil, nil},                  // nl | ||||
| 		{aztrUpper(upper), isUpper}, // tr | ||||
| 	} | ||||
|  | ||||
| 	undUpper            transform.SpanningTransformer = &undUpperCaser{} | ||||
| 	undLower            transform.SpanningTransformer = &undLowerCaser{} | ||||
| 	undLowerIgnoreSigma transform.SpanningTransformer = &undLowerIgnoreSigmaCaser{} | ||||
|  | ||||
| 	lowerFunc = []mapFunc{ | ||||
| 		nil,       // und | ||||
| 		nil,       // af | ||||
| 		aztrLower, // az | ||||
| 		nil,       // el | ||||
| 		ltLower,   // lt | ||||
| 		nil,       // nl | ||||
| 		aztrLower, // tr | ||||
| 	} | ||||
|  | ||||
| 	titleInfos = []struct { | ||||
| 		title     mapFunc | ||||
| 		lower     mapFunc | ||||
| 		titleSpan spanFunc | ||||
| 		rewrite   func(*context) | ||||
| 	}{ | ||||
| 		{title, lower, isTitle, nil},                // und | ||||
| 		{title, lower, isTitle, afnlRewrite},        // af | ||||
| 		{aztrUpper(title), aztrLower, isTitle, nil}, // az | ||||
| 		{title, lower, isTitle, nil},                // el | ||||
| 		{ltUpper(title), ltLower, noSpan, nil},      // lt | ||||
| 		{nlTitle, lower, nlTitleSpan, afnlRewrite},  // nl | ||||
| 		{aztrUpper(title), aztrLower, isTitle, nil}, // tr | ||||
| 	} | ||||
| ) | ||||
|  | ||||
| func makeUpper(t language.Tag, o options) transform.SpanningTransformer { | ||||
| 	_, i, _ := matcher.Match(t) | ||||
| 	f := upperFunc[i].upper | ||||
| 	if f == nil { | ||||
| 		return undUpper | ||||
| 	} | ||||
| 	return &simpleCaser{f: f, span: upperFunc[i].span} | ||||
| } | ||||
|  | ||||
| func makeLower(t language.Tag, o options) transform.SpanningTransformer { | ||||
| 	_, i, _ := matcher.Match(t) | ||||
| 	f := lowerFunc[i] | ||||
| 	if f == nil { | ||||
| 		if o.ignoreFinalSigma { | ||||
| 			return undLowerIgnoreSigma | ||||
| 		} | ||||
| 		return undLower | ||||
| 	} | ||||
| 	if o.ignoreFinalSigma { | ||||
| 		return &simpleCaser{f: f, span: isLower} | ||||
| 	} | ||||
| 	return &lowerCaser{ | ||||
| 		first:   f, | ||||
| 		midWord: finalSigma(f), | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func makeTitle(t language.Tag, o options) transform.SpanningTransformer { | ||||
| 	_, i, _ := matcher.Match(t) | ||||
| 	x := &titleInfos[i] | ||||
| 	lower := x.lower | ||||
| 	if o.noLower { | ||||
| 		lower = (*context).copy | ||||
| 	} else if !o.ignoreFinalSigma { | ||||
| 		lower = finalSigma(lower) | ||||
| 	} | ||||
| 	return &titleCaser{ | ||||
| 		title:     x.title, | ||||
| 		lower:     lower, | ||||
| 		titleSpan: x.titleSpan, | ||||
| 		rewrite:   x.rewrite, | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func noSpan(c *context) bool { | ||||
| 	c.err = transform.ErrEndOfSpan | ||||
| 	return false | ||||
| } | ||||
|  | ||||
| // TODO: consider a similar special case for the fast majority lower case. This | ||||
| // is a bit more involved so will require some more precise benchmarking to | ||||
| // justify it. | ||||
|  | ||||
| type undUpperCaser struct{ transform.NopResetter } | ||||
|  | ||||
| // undUpperCaser implements the Transformer interface for doing an upper case | ||||
| // mapping for the root locale (und). It eliminates the need for an allocation | ||||
| // as it prevents escaping by not using function pointers. | ||||
| func (t undUpperCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { | ||||
| 	c := context{dst: dst, src: src, atEOF: atEOF} | ||||
| 	for c.next() { | ||||
| 		upper(&c) | ||||
| 		c.checkpoint() | ||||
| 	} | ||||
| 	return c.ret() | ||||
| } | ||||
|  | ||||
| func (t undUpperCaser) Span(src []byte, atEOF bool) (n int, err error) { | ||||
| 	c := context{src: src, atEOF: atEOF} | ||||
| 	for c.next() && isUpper(&c) { | ||||
| 		c.checkpoint() | ||||
| 	} | ||||
| 	return c.retSpan() | ||||
| } | ||||
|  | ||||
| // undLowerIgnoreSigmaCaser implements the Transformer interface for doing | ||||
| // a lower case mapping for the root locale (und) ignoring final sigma | ||||
| // handling. This casing algorithm is used in some performance-critical packages | ||||
| // like secure/precis and x/net/http/idna, which warrants its special-casing. | ||||
| type undLowerIgnoreSigmaCaser struct{ transform.NopResetter } | ||||
|  | ||||
| func (t undLowerIgnoreSigmaCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { | ||||
| 	c := context{dst: dst, src: src, atEOF: atEOF} | ||||
| 	for c.next() && lower(&c) { | ||||
| 		c.checkpoint() | ||||
| 	} | ||||
| 	return c.ret() | ||||
|  | ||||
| } | ||||
|  | ||||
| // Span implements a generic lower-casing. This is possible as isLower works | ||||
| // for all lowercasing variants. All lowercase variants only vary in how they | ||||
| // transform a non-lowercase letter. They will never change an already lowercase | ||||
| // letter. In addition, there is no state. | ||||
| func (t undLowerIgnoreSigmaCaser) Span(src []byte, atEOF bool) (n int, err error) { | ||||
| 	c := context{src: src, atEOF: atEOF} | ||||
| 	for c.next() && isLower(&c) { | ||||
| 		c.checkpoint() | ||||
| 	} | ||||
| 	return c.retSpan() | ||||
| } | ||||
|  | ||||
| type simpleCaser struct { | ||||
| 	context | ||||
| 	f    mapFunc | ||||
| 	span spanFunc | ||||
| } | ||||
|  | ||||
| // simpleCaser implements the Transformer interface for doing a case operation | ||||
| // on a rune-by-rune basis. | ||||
| func (t *simpleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { | ||||
| 	c := context{dst: dst, src: src, atEOF: atEOF} | ||||
| 	for c.next() && t.f(&c) { | ||||
| 		c.checkpoint() | ||||
| 	} | ||||
| 	return c.ret() | ||||
| } | ||||
|  | ||||
| func (t *simpleCaser) Span(src []byte, atEOF bool) (n int, err error) { | ||||
| 	c := context{src: src, atEOF: atEOF} | ||||
| 	for c.next() && t.span(&c) { | ||||
| 		c.checkpoint() | ||||
| 	} | ||||
| 	return c.retSpan() | ||||
| } | ||||
|  | ||||
| // undLowerCaser implements the Transformer interface for doing a lower case | ||||
| // mapping for the root locale (und) ignoring final sigma handling. This casing | ||||
| // algorithm is used in some performance-critical packages like secure/precis | ||||
| // and x/net/http/idna, which warrants its special-casing. | ||||
| type undLowerCaser struct{ transform.NopResetter } | ||||
|  | ||||
| func (t undLowerCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { | ||||
| 	c := context{dst: dst, src: src, atEOF: atEOF} | ||||
|  | ||||
| 	for isInterWord := true; c.next(); { | ||||
| 		if isInterWord { | ||||
| 			if c.info.isCased() { | ||||
| 				if !lower(&c) { | ||||
| 					break | ||||
| 				} | ||||
| 				isInterWord = false | ||||
| 			} else if !c.copy() { | ||||
| 				break | ||||
| 			} | ||||
| 		} else { | ||||
| 			if c.info.isNotCasedAndNotCaseIgnorable() { | ||||
| 				if !c.copy() { | ||||
| 					break | ||||
| 				} | ||||
| 				isInterWord = true | ||||
| 			} else if !c.hasPrefix("Σ") { | ||||
| 				if !lower(&c) { | ||||
| 					break | ||||
| 				} | ||||
| 			} else if !finalSigmaBody(&c) { | ||||
| 				break | ||||
| 			} | ||||
| 		} | ||||
| 		c.checkpoint() | ||||
| 	} | ||||
| 	return c.ret() | ||||
| } | ||||
|  | ||||
| func (t undLowerCaser) Span(src []byte, atEOF bool) (n int, err error) { | ||||
| 	c := context{src: src, atEOF: atEOF} | ||||
| 	for c.next() && isLower(&c) { | ||||
| 		c.checkpoint() | ||||
| 	} | ||||
| 	return c.retSpan() | ||||
| } | ||||
|  | ||||
| // lowerCaser implements the Transformer interface. The default Unicode lower | ||||
| // casing requires different treatment for the first and subsequent characters | ||||
| // of a word, most notably to handle the Greek final Sigma. | ||||
| type lowerCaser struct { | ||||
| 	undLowerIgnoreSigmaCaser | ||||
|  | ||||
| 	context | ||||
|  | ||||
| 	first, midWord mapFunc | ||||
| } | ||||
|  | ||||
| func (t *lowerCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { | ||||
| 	t.context = context{dst: dst, src: src, atEOF: atEOF} | ||||
| 	c := &t.context | ||||
|  | ||||
| 	for isInterWord := true; c.next(); { | ||||
| 		if isInterWord { | ||||
| 			if c.info.isCased() { | ||||
| 				if !t.first(c) { | ||||
| 					break | ||||
| 				} | ||||
| 				isInterWord = false | ||||
| 			} else if !c.copy() { | ||||
| 				break | ||||
| 			} | ||||
| 		} else { | ||||
| 			if c.info.isNotCasedAndNotCaseIgnorable() { | ||||
| 				if !c.copy() { | ||||
| 					break | ||||
| 				} | ||||
| 				isInterWord = true | ||||
| 			} else if !t.midWord(c) { | ||||
| 				break | ||||
| 			} | ||||
| 		} | ||||
| 		c.checkpoint() | ||||
| 	} | ||||
| 	return c.ret() | ||||
| } | ||||
|  | ||||
| // titleCaser implements the Transformer interface. Title casing algorithms | ||||
| // distinguish between the first letter of a word and subsequent letters of the | ||||
| // same word. It uses state to avoid requiring a potentially infinite lookahead. | ||||
| type titleCaser struct { | ||||
| 	context | ||||
|  | ||||
| 	// rune mappings used by the actual casing algorithms. | ||||
| 	title     mapFunc | ||||
| 	lower     mapFunc | ||||
| 	titleSpan spanFunc | ||||
|  | ||||
| 	rewrite func(*context) | ||||
| } | ||||
|  | ||||
| // Transform implements the standard Unicode title case algorithm as defined in | ||||
| // Chapter 3 of The Unicode Standard: | ||||
| // toTitlecase(X): Find the word boundaries in X according to Unicode Standard | ||||
| // Annex #29, "Unicode Text Segmentation." For each word boundary, find the | ||||
| // first cased character F following the word boundary. If F exists, map F to | ||||
| // Titlecase_Mapping(F); then map all characters C between F and the following | ||||
| // word boundary to Lowercase_Mapping(C). | ||||
| func (t *titleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { | ||||
| 	t.context = context{dst: dst, src: src, atEOF: atEOF, isMidWord: t.isMidWord} | ||||
| 	c := &t.context | ||||
|  | ||||
| 	if !c.next() { | ||||
| 		return c.ret() | ||||
| 	} | ||||
|  | ||||
| 	for { | ||||
| 		p := c.info | ||||
| 		if t.rewrite != nil { | ||||
| 			t.rewrite(c) | ||||
| 		} | ||||
|  | ||||
| 		wasMid := p.isMid() | ||||
| 		// Break out of this loop on failure to ensure we do not modify the | ||||
| 		// state incorrectly. | ||||
| 		if p.isCased() { | ||||
| 			if !c.isMidWord { | ||||
| 				if !t.title(c) { | ||||
| 					break | ||||
| 				} | ||||
| 				c.isMidWord = true | ||||
| 			} else if !t.lower(c) { | ||||
| 				break | ||||
| 			} | ||||
| 		} else if !c.copy() { | ||||
| 			break | ||||
| 		} else if p.isBreak() { | ||||
| 			c.isMidWord = false | ||||
| 		} | ||||
|  | ||||
| 		// As we save the state of the transformer, it is safe to call | ||||
| 		// checkpoint after any successful write. | ||||
| 		if !(c.isMidWord && wasMid) { | ||||
| 			c.checkpoint() | ||||
| 		} | ||||
|  | ||||
| 		if !c.next() { | ||||
| 			break | ||||
| 		} | ||||
| 		if wasMid && c.info.isMid() { | ||||
| 			c.isMidWord = false | ||||
| 		} | ||||
| 	} | ||||
| 	return c.ret() | ||||
| } | ||||
|  | ||||
| func (t *titleCaser) Span(src []byte, atEOF bool) (n int, err error) { | ||||
| 	t.context = context{src: src, atEOF: atEOF, isMidWord: t.isMidWord} | ||||
| 	c := &t.context | ||||
|  | ||||
| 	if !c.next() { | ||||
| 		return c.retSpan() | ||||
| 	} | ||||
|  | ||||
| 	for { | ||||
| 		p := c.info | ||||
| 		if t.rewrite != nil { | ||||
| 			t.rewrite(c) | ||||
| 		} | ||||
|  | ||||
| 		wasMid := p.isMid() | ||||
| 		// Break out of this loop on failure to ensure we do not modify the | ||||
| 		// state incorrectly. | ||||
| 		if p.isCased() { | ||||
| 			if !c.isMidWord { | ||||
| 				if !t.titleSpan(c) { | ||||
| 					break | ||||
| 				} | ||||
| 				c.isMidWord = true | ||||
| 			} else if !isLower(c) { | ||||
| 				break | ||||
| 			} | ||||
| 		} else if p.isBreak() { | ||||
| 			c.isMidWord = false | ||||
| 		} | ||||
| 		// As we save the state of the transformer, it is safe to call | ||||
| 		// checkpoint after any successful write. | ||||
| 		if !(c.isMidWord && wasMid) { | ||||
| 			c.checkpoint() | ||||
| 		} | ||||
|  | ||||
| 		if !c.next() { | ||||
| 			break | ||||
| 		} | ||||
| 		if wasMid && c.info.isMid() { | ||||
| 			c.isMidWord = false | ||||
| 		} | ||||
| 	} | ||||
| 	return c.retSpan() | ||||
| } | ||||
|  | ||||
| // finalSigma adds Greek final Sigma handing to another casing function. It | ||||
| // determines whether a lowercased sigma should be σ or ς, by looking ahead for | ||||
| // case-ignorables and a cased letters. | ||||
| func finalSigma(f mapFunc) mapFunc { | ||||
| 	return func(c *context) bool { | ||||
| 		if !c.hasPrefix("Σ") { | ||||
| 			return f(c) | ||||
| 		} | ||||
| 		return finalSigmaBody(c) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func finalSigmaBody(c *context) bool { | ||||
| 	// Current rune must be ∑. | ||||
|  | ||||
| 	// ::NFD(); | ||||
| 	// # 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA | ||||
| 	// Σ } [:case-ignorable:]* [:cased:] → σ; | ||||
| 	// [:cased:] [:case-ignorable:]* { Σ → ς; | ||||
| 	// ::Any-Lower; | ||||
| 	// ::NFC(); | ||||
|  | ||||
| 	p := c.pDst | ||||
| 	c.writeString("ς") | ||||
|  | ||||
| 	// TODO: we should do this here, but right now this will never have an | ||||
| 	// effect as this is called when the prefix is Sigma, whereas Dutch and | ||||
| 	// Afrikaans only test for an apostrophe. | ||||
| 	// | ||||
| 	// if t.rewrite != nil { | ||||
| 	// 	t.rewrite(c) | ||||
| 	// } | ||||
|  | ||||
| 	// We need to do one more iteration after maxIgnorable, as a cased | ||||
| 	// letter is not an ignorable and may modify the result. | ||||
| 	wasMid := false | ||||
| 	for i := 0; i < maxIgnorable+1; i++ { | ||||
| 		if !c.next() { | ||||
| 			return false | ||||
| 		} | ||||
| 		if !c.info.isCaseIgnorable() { | ||||
| 			// All Midword runes are also case ignorable, so we are | ||||
| 			// guaranteed to have a letter or word break here. As we are | ||||
| 			// unreading the run, there is no need to unset c.isMidWord; | ||||
| 			// the title caser will handle this. | ||||
| 			if c.info.isCased() { | ||||
| 				// p+1 is guaranteed to be in bounds: if writing ς was | ||||
| 				// successful, p+1 will contain the second byte of ς. If not, | ||||
| 				// this function will have returned after c.next returned false. | ||||
| 				c.dst[p+1]++ // ς → σ | ||||
| 			} | ||||
| 			c.unreadRune() | ||||
| 			return true | ||||
| 		} | ||||
| 		// A case ignorable may also introduce a word break, so we may need | ||||
| 		// to continue searching even after detecting a break. | ||||
| 		isMid := c.info.isMid() | ||||
| 		if (wasMid && isMid) || c.info.isBreak() { | ||||
| 			c.isMidWord = false | ||||
| 		} | ||||
| 		wasMid = isMid | ||||
| 		c.copy() | ||||
| 	} | ||||
| 	return true | ||||
| } | ||||
|  | ||||
| // finalSigmaSpan would be the same as isLower. | ||||
|  | ||||
| // elUpper implements Greek upper casing, which entails removing a predefined | ||||
| // set of non-blocked modifiers. Note that these accents should not be removed | ||||
| // for title casing! | ||||
| // Example: "Οδός" -> "ΟΔΟΣ". | ||||
| func elUpper(c *context) bool { | ||||
| 	// From CLDR: | ||||
| 	// [:Greek:] [^[:ccc=Not_Reordered:][:ccc=Above:]]*? { [\u0313\u0314\u0301\u0300\u0306\u0342\u0308\u0304] → ; | ||||
| 	// [:Greek:] [^[:ccc=Not_Reordered:][:ccc=Iota_Subscript:]]*? { \u0345 → ; | ||||
|  | ||||
| 	r, _ := utf8.DecodeRune(c.src[c.pSrc:]) | ||||
| 	oldPDst := c.pDst | ||||
| 	if !upper(c) { | ||||
| 		return false | ||||
| 	} | ||||
| 	if !unicode.Is(unicode.Greek, r) { | ||||
| 		return true | ||||
| 	} | ||||
| 	i := 0 | ||||
| 	// Take the properties of the uppercased rune that is already written to the | ||||
| 	// destination. This saves us the trouble of having to uppercase the | ||||
| 	// decomposed rune again. | ||||
| 	if b := norm.NFD.Properties(c.dst[oldPDst:]).Decomposition(); b != nil { | ||||
| 		// Restore the destination position and process the decomposed rune. | ||||
| 		r, sz := utf8.DecodeRune(b) | ||||
| 		if r <= 0xFF { // See A.6.1 | ||||
| 			return true | ||||
| 		} | ||||
| 		c.pDst = oldPDst | ||||
| 		// Insert the first rune and ignore the modifiers. See A.6.2. | ||||
| 		c.writeBytes(b[:sz]) | ||||
| 		i = len(b[sz:]) / 2 // Greek modifiers are always of length 2. | ||||
| 	} | ||||
|  | ||||
| 	for ; i < maxIgnorable && c.next(); i++ { | ||||
| 		switch r, _ := utf8.DecodeRune(c.src[c.pSrc:]); r { | ||||
| 		// Above and Iota Subscript | ||||
| 		case 0x0300, // U+0300 COMBINING GRAVE ACCENT | ||||
| 			0x0301, // U+0301 COMBINING ACUTE ACCENT | ||||
| 			0x0304, // U+0304 COMBINING MACRON | ||||
| 			0x0306, // U+0306 COMBINING BREVE | ||||
| 			0x0308, // U+0308 COMBINING DIAERESIS | ||||
| 			0x0313, // U+0313 COMBINING COMMA ABOVE | ||||
| 			0x0314, // U+0314 COMBINING REVERSED COMMA ABOVE | ||||
| 			0x0342, // U+0342 COMBINING GREEK PERISPOMENI | ||||
| 			0x0345: // U+0345 COMBINING GREEK YPOGEGRAMMENI | ||||
| 			// No-op. Gobble the modifier. | ||||
|  | ||||
| 		default: | ||||
| 			switch v, _ := trie.lookup(c.src[c.pSrc:]); info(v).cccType() { | ||||
| 			case cccZero: | ||||
| 				c.unreadRune() | ||||
| 				return true | ||||
|  | ||||
| 			// We don't need to test for IotaSubscript as the only rune that | ||||
| 			// qualifies (U+0345) was already excluded in the switch statement | ||||
| 			// above. See A.4. | ||||
|  | ||||
| 			case cccAbove: | ||||
| 				return c.copy() | ||||
| 			default: | ||||
| 				// Some other modifier. We're still allowed to gobble Greek | ||||
| 				// modifiers after this. | ||||
| 				c.copy() | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	return i == maxIgnorable | ||||
| } | ||||
|  | ||||
| // TODO: implement elUpperSpan (low-priority: complex and infrequent). | ||||
|  | ||||
| func ltLower(c *context) bool { | ||||
| 	// From CLDR: | ||||
| 	// # Introduce an explicit dot above when lowercasing capital I's and J's | ||||
| 	// # whenever there are more accents above. | ||||
| 	// # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek) | ||||
| 	// # 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I | ||||
| 	// # 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J | ||||
| 	// # 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK | ||||
| 	// # 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE | ||||
| 	// # 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE | ||||
| 	// # 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE | ||||
| 	// ::NFD(); | ||||
| 	// I } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0307; | ||||
| 	// J } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → j \u0307; | ||||
| 	// I \u0328 (Į) } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0328 \u0307; | ||||
| 	// I \u0300 (Ì) → i \u0307 \u0300; | ||||
| 	// I \u0301 (Í) → i \u0307 \u0301; | ||||
| 	// I \u0303 (Ĩ) → i \u0307 \u0303; | ||||
| 	// ::Any-Lower(); | ||||
| 	// ::NFC(); | ||||
|  | ||||
| 	i := 0 | ||||
| 	if r := c.src[c.pSrc]; r < utf8.RuneSelf { | ||||
| 		lower(c) | ||||
| 		if r != 'I' && r != 'J' { | ||||
| 			return true | ||||
| 		} | ||||
| 	} else { | ||||
| 		p := norm.NFD.Properties(c.src[c.pSrc:]) | ||||
| 		if d := p.Decomposition(); len(d) >= 3 && (d[0] == 'I' || d[0] == 'J') { | ||||
| 			// UTF-8 optimization: the decomposition will only have an above | ||||
| 			// modifier if the last rune of the decomposition is in [U+300-U+311]. | ||||
| 			// In all other cases, a decomposition starting with I is always | ||||
| 			// an I followed by modifiers that are not cased themselves. See A.2. | ||||
| 			if d[1] == 0xCC && d[2] <= 0x91 { // A.2.4. | ||||
| 				if !c.writeBytes(d[:1]) { | ||||
| 					return false | ||||
| 				} | ||||
| 				c.dst[c.pDst-1] += 'a' - 'A' // lower | ||||
|  | ||||
| 				// Assumption: modifier never changes on lowercase. See A.1. | ||||
| 				// Assumption: all modifiers added have CCC = Above. See A.2.3. | ||||
| 				return c.writeString("\u0307") && c.writeBytes(d[1:]) | ||||
| 			} | ||||
| 			// In all other cases the additional modifiers will have a CCC | ||||
| 			// that is less than 230 (Above). We will insert the U+0307, if | ||||
| 			// needed, after these modifiers so that a string in FCD form | ||||
| 			// will remain so. See A.2.2. | ||||
| 			lower(c) | ||||
| 			i = 1 | ||||
| 		} else { | ||||
| 			return lower(c) | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	for ; i < maxIgnorable && c.next(); i++ { | ||||
| 		switch c.info.cccType() { | ||||
| 		case cccZero: | ||||
| 			c.unreadRune() | ||||
| 			return true | ||||
| 		case cccAbove: | ||||
| 			return c.writeString("\u0307") && c.copy() // See A.1. | ||||
| 		default: | ||||
| 			c.copy() // See A.1. | ||||
| 		} | ||||
| 	} | ||||
| 	return i == maxIgnorable | ||||
| } | ||||
|  | ||||
| // ltLowerSpan would be the same as isLower. | ||||
|  | ||||
| func ltUpper(f mapFunc) mapFunc { | ||||
| 	return func(c *context) bool { | ||||
| 		// Unicode: | ||||
| 		// 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE | ||||
| 		// | ||||
| 		// From CLDR: | ||||
| 		// # Remove \u0307 following soft-dotteds (i, j, and the like), with possible | ||||
| 		// # intervening non-230 marks. | ||||
| 		// ::NFD(); | ||||
| 		// [:Soft_Dotted:] [^[:ccc=Not_Reordered:][:ccc=Above:]]* { \u0307 → ; | ||||
| 		// ::Any-Upper(); | ||||
| 		// ::NFC(); | ||||
|  | ||||
| 		// TODO: See A.5. A soft-dotted rune never has an exception. This would | ||||
| 		// allow us to overload the exception bit and encode this property in | ||||
| 		// info. Need to measure performance impact of this. | ||||
| 		r, _ := utf8.DecodeRune(c.src[c.pSrc:]) | ||||
| 		oldPDst := c.pDst | ||||
| 		if !f(c) { | ||||
| 			return false | ||||
| 		} | ||||
| 		if !unicode.Is(unicode.Soft_Dotted, r) { | ||||
| 			return true | ||||
| 		} | ||||
|  | ||||
| 		// We don't need to do an NFD normalization, as a soft-dotted rune never | ||||
| 		// contains U+0307. See A.3. | ||||
|  | ||||
| 		i := 0 | ||||
| 		for ; i < maxIgnorable && c.next(); i++ { | ||||
| 			switch c.info.cccType() { | ||||
| 			case cccZero: | ||||
| 				c.unreadRune() | ||||
| 				return true | ||||
| 			case cccAbove: | ||||
| 				if c.hasPrefix("\u0307") { | ||||
| 					// We don't do a full NFC, but rather combine runes for | ||||
| 					// some of the common cases. (Returning NFC or | ||||
| 					// preserving normal form is neither a requirement nor | ||||
| 					// a possibility anyway). | ||||
| 					if !c.next() { | ||||
| 						return false | ||||
| 					} | ||||
| 					if c.dst[oldPDst] == 'I' && c.pDst == oldPDst+1 && c.src[c.pSrc] == 0xcc { | ||||
| 						s := "" | ||||
| 						switch c.src[c.pSrc+1] { | ||||
| 						case 0x80: // U+0300 COMBINING GRAVE ACCENT | ||||
| 							s = "\u00cc" // U+00CC LATIN CAPITAL LETTER I WITH GRAVE | ||||
| 						case 0x81: // U+0301 COMBINING ACUTE ACCENT | ||||
| 							s = "\u00cd" // U+00CD LATIN CAPITAL LETTER I WITH ACUTE | ||||
| 						case 0x83: // U+0303 COMBINING TILDE | ||||
| 							s = "\u0128" // U+0128 LATIN CAPITAL LETTER I WITH TILDE | ||||
| 						case 0x88: // U+0308 COMBINING DIAERESIS | ||||
| 							s = "\u00cf" // U+00CF LATIN CAPITAL LETTER I WITH DIAERESIS | ||||
| 						default: | ||||
| 						} | ||||
| 						if s != "" { | ||||
| 							c.pDst = oldPDst | ||||
| 							return c.writeString(s) | ||||
| 						} | ||||
| 					} | ||||
| 				} | ||||
| 				return c.copy() | ||||
| 			default: | ||||
| 				c.copy() | ||||
| 			} | ||||
| 		} | ||||
| 		return i == maxIgnorable | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // TODO: implement ltUpperSpan (low priority: complex and infrequent). | ||||
|  | ||||
| func aztrUpper(f mapFunc) mapFunc { | ||||
| 	return func(c *context) bool { | ||||
| 		// i→İ; | ||||
| 		if c.src[c.pSrc] == 'i' { | ||||
| 			return c.writeString("İ") | ||||
| 		} | ||||
| 		return f(c) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func aztrLower(c *context) (done bool) { | ||||
| 	// From CLDR: | ||||
| 	// # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri | ||||
| 	// # 0130; 0069; 0130; 0130; tr; # LATIN CAPITAL LETTER I WITH DOT ABOVE | ||||
| 	// İ→i; | ||||
| 	// # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i. | ||||
| 	// # This matches the behavior of the canonically equivalent I-dot_above | ||||
| 	// # 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE | ||||
| 	// # When lowercasing, unless an I is before a dot_above, it turns into a dotless i. | ||||
| 	// # 0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I | ||||
| 	// I([^[:ccc=Not_Reordered:][:ccc=Above:]]*)\u0307 → i$1 ; | ||||
| 	// I→ı ; | ||||
| 	// ::Any-Lower(); | ||||
| 	if c.hasPrefix("\u0130") { // İ | ||||
| 		return c.writeString("i") | ||||
| 	} | ||||
| 	if c.src[c.pSrc] != 'I' { | ||||
| 		return lower(c) | ||||
| 	} | ||||
|  | ||||
| 	// We ignore the lower-case I for now, but insert it later when we know | ||||
| 	// which form we need. | ||||
| 	start := c.pSrc + c.sz | ||||
|  | ||||
| 	i := 0 | ||||
| Loop: | ||||
| 	// We check for up to n ignorables before \u0307. As \u0307 is an | ||||
| 	// ignorable as well, n is maxIgnorable-1. | ||||
| 	for ; i < maxIgnorable && c.next(); i++ { | ||||
| 		switch c.info.cccType() { | ||||
| 		case cccAbove: | ||||
| 			if c.hasPrefix("\u0307") { | ||||
| 				return c.writeString("i") && c.writeBytes(c.src[start:c.pSrc]) // ignore U+0307 | ||||
| 			} | ||||
| 			done = true | ||||
| 			break Loop | ||||
| 		case cccZero: | ||||
| 			c.unreadRune() | ||||
| 			done = true | ||||
| 			break Loop | ||||
| 		default: | ||||
| 			// We'll write this rune after we know which starter to use. | ||||
| 		} | ||||
| 	} | ||||
| 	if i == maxIgnorable { | ||||
| 		done = true | ||||
| 	} | ||||
| 	return c.writeString("ı") && c.writeBytes(c.src[start:c.pSrc+c.sz]) && done | ||||
| } | ||||
|  | ||||
| // aztrLowerSpan would be the same as isLower. | ||||
|  | ||||
| func nlTitle(c *context) bool { | ||||
| 	// From CLDR: | ||||
| 	// # Special titlecasing for Dutch initial "ij". | ||||
| 	// ::Any-Title(); | ||||
| 	// # Fix up Ij at the beginning of a "word" (per Any-Title, notUAX #29) | ||||
| 	// [:^WB=ALetter:] [:WB=Extend:]* [[:WB=MidLetter:][:WB=MidNumLet:]]? { Ij } → IJ ; | ||||
| 	if c.src[c.pSrc] != 'I' && c.src[c.pSrc] != 'i' { | ||||
| 		return title(c) | ||||
| 	} | ||||
|  | ||||
| 	if !c.writeString("I") || !c.next() { | ||||
| 		return false | ||||
| 	} | ||||
| 	if c.src[c.pSrc] == 'j' || c.src[c.pSrc] == 'J' { | ||||
| 		return c.writeString("J") | ||||
| 	} | ||||
| 	c.unreadRune() | ||||
| 	return true | ||||
| } | ||||
|  | ||||
| func nlTitleSpan(c *context) bool { | ||||
| 	// From CLDR: | ||||
| 	// # Special titlecasing for Dutch initial "ij". | ||||
| 	// ::Any-Title(); | ||||
| 	// # Fix up Ij at the beginning of a "word" (per Any-Title, notUAX #29) | ||||
| 	// [:^WB=ALetter:] [:WB=Extend:]* [[:WB=MidLetter:][:WB=MidNumLet:]]? { Ij } → IJ ; | ||||
| 	if c.src[c.pSrc] != 'I' { | ||||
| 		return isTitle(c) | ||||
| 	} | ||||
| 	if !c.next() || c.src[c.pSrc] == 'j' { | ||||
| 		return false | ||||
| 	} | ||||
| 	if c.src[c.pSrc] != 'J' { | ||||
| 		c.unreadRune() | ||||
| 	} | ||||
| 	return true | ||||
| } | ||||
|  | ||||
| // Not part of CLDR, but see https://unicode.org/cldr/trac/ticket/7078. | ||||
| func afnlRewrite(c *context) { | ||||
| 	if c.hasPrefix("'") || c.hasPrefix("’") { | ||||
| 		c.isMidWord = true | ||||
| 	} | ||||
| } | ||||
							
								
								
									
										2255
									
								
								vendor/golang.org/x/text/cases/tables10.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										2255
									
								
								vendor/golang.org/x/text/cases/tables10.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										2316
									
								
								vendor/golang.org/x/text/cases/tables11.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										2316
									
								
								vendor/golang.org/x/text/cases/tables11.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										2359
									
								
								vendor/golang.org/x/text/cases/tables12.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										2359
									
								
								vendor/golang.org/x/text/cases/tables12.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										2399
									
								
								vendor/golang.org/x/text/cases/tables13.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										2399
									
								
								vendor/golang.org/x/text/cases/tables13.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										2527
									
								
								vendor/golang.org/x/text/cases/tables15.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										2527
									
								
								vendor/golang.org/x/text/cases/tables15.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										2215
									
								
								vendor/golang.org/x/text/cases/tables9.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										2215
									
								
								vendor/golang.org/x/text/cases/tables9.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										217
									
								
								vendor/golang.org/x/text/cases/trieval.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										217
									
								
								vendor/golang.org/x/text/cases/trieval.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,217 @@ | ||||
| // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. | ||||
|  | ||||
| package cases | ||||
|  | ||||
| // This file contains definitions for interpreting the trie value of the case | ||||
| // trie generated by "go run gen*.go". It is shared by both the generator | ||||
| // program and the resultant package. Sharing is achieved by the generator | ||||
| // copying gen_trieval.go to trieval.go and changing what's above this comment. | ||||
|  | ||||
| // info holds case information for a single rune. It is the value returned | ||||
| // by a trie lookup. Most mapping information can be stored in a single 16-bit | ||||
| // value. If not, for example when a rune is mapped to multiple runes, the value | ||||
| // stores some basic case data and an index into an array with additional data. | ||||
| // | ||||
| // The per-rune values have the following format: | ||||
| // | ||||
| //	if (exception) { | ||||
| //	  15..4  unsigned exception index | ||||
| //	} else { | ||||
| //	  15..8  XOR pattern or index to XOR pattern for case mapping | ||||
| //	         Only 13..8 are used for XOR patterns. | ||||
| //	      7  inverseFold (fold to upper, not to lower) | ||||
| //	      6  index: interpret the XOR pattern as an index | ||||
| //	         or isMid if case mode is cIgnorableUncased. | ||||
| //	   5..4  CCC: zero (normal or break), above or other | ||||
| //	} | ||||
| //	   3  exception: interpret this value as an exception index | ||||
| //	      (TODO: is this bit necessary? Probably implied from case mode.) | ||||
| //	2..0  case mode | ||||
| // | ||||
| // For the non-exceptional cases, a rune must be either uncased, lowercase or | ||||
| // uppercase. If the rune is cased, the XOR pattern maps either a lowercase | ||||
| // rune to uppercase or an uppercase rune to lowercase (applied to the 10 | ||||
| // least-significant bits of the rune). | ||||
| // | ||||
| // See the definitions below for a more detailed description of the various | ||||
| // bits. | ||||
| type info uint16 | ||||
|  | ||||
| const ( | ||||
| 	casedMask      = 0x0003 | ||||
| 	fullCasedMask  = 0x0007 | ||||
| 	ignorableMask  = 0x0006 | ||||
| 	ignorableValue = 0x0004 | ||||
|  | ||||
| 	inverseFoldBit = 1 << 7 | ||||
| 	isMidBit       = 1 << 6 | ||||
|  | ||||
| 	exceptionBit     = 1 << 3 | ||||
| 	exceptionShift   = 4 | ||||
| 	numExceptionBits = 12 | ||||
|  | ||||
| 	xorIndexBit = 1 << 6 | ||||
| 	xorShift    = 8 | ||||
|  | ||||
| 	// There is no mapping if all xor bits and the exception bit are zero. | ||||
| 	hasMappingMask = 0xff80 | exceptionBit | ||||
| ) | ||||
|  | ||||
| // The case mode bits encodes the case type of a rune. This includes uncased, | ||||
| // title, upper and lower case and case ignorable. (For a definition of these | ||||
| // terms see Chapter 3 of The Unicode Standard Core Specification.) In some rare | ||||
| // cases, a rune can be both cased and case-ignorable. This is encoded by | ||||
| // cIgnorableCased. A rune of this type is always lower case. Some runes are | ||||
| // cased while not having a mapping. | ||||
| // | ||||
| // A common pattern for scripts in the Unicode standard is for upper and lower | ||||
| // case runes to alternate for increasing rune values (e.g. the accented Latin | ||||
| // ranges starting from U+0100 and U+1E00 among others and some Cyrillic | ||||
| // characters). We use this property by defining a cXORCase mode, where the case | ||||
| // mode (always upper or lower case) is derived from the rune value. As the XOR | ||||
| // pattern for case mappings is often identical for successive runes, using | ||||
| // cXORCase can result in large series of identical trie values. This, in turn, | ||||
| // allows us to better compress the trie blocks. | ||||
| const ( | ||||
| 	cUncased          info = iota // 000 | ||||
| 	cTitle                        // 001 | ||||
| 	cLower                        // 010 | ||||
| 	cUpper                        // 011 | ||||
| 	cIgnorableUncased             // 100 | ||||
| 	cIgnorableCased               // 101 // lower case if mappings exist | ||||
| 	cXORCase                      // 11x // case is cLower | ((rune&1) ^ x) | ||||
|  | ||||
| 	maxCaseMode = cUpper | ||||
| ) | ||||
|  | ||||
| func (c info) isCased() bool { | ||||
| 	return c&casedMask != 0 | ||||
| } | ||||
|  | ||||
| func (c info) isCaseIgnorable() bool { | ||||
| 	return c&ignorableMask == ignorableValue | ||||
| } | ||||
|  | ||||
| func (c info) isNotCasedAndNotCaseIgnorable() bool { | ||||
| 	return c&fullCasedMask == 0 | ||||
| } | ||||
|  | ||||
| func (c info) isCaseIgnorableAndNotCased() bool { | ||||
| 	return c&fullCasedMask == cIgnorableUncased | ||||
| } | ||||
|  | ||||
| func (c info) isMid() bool { | ||||
| 	return c&(fullCasedMask|isMidBit) == isMidBit|cIgnorableUncased | ||||
| } | ||||
|  | ||||
| // The case mapping implementation will need to know about various Canonical | ||||
| // Combining Class (CCC) values. We encode two of these in the trie value: | ||||
| // cccZero (0) and cccAbove (230). If the value is cccOther, it means that | ||||
| // CCC(r) > 0, but not 230. A value of cccBreak means that CCC(r) == 0 and that | ||||
| // the rune also has the break category Break (see below). | ||||
| const ( | ||||
| 	cccBreak info = iota << 4 | ||||
| 	cccZero | ||||
| 	cccAbove | ||||
| 	cccOther | ||||
|  | ||||
| 	cccMask = cccBreak | cccZero | cccAbove | cccOther | ||||
| ) | ||||
|  | ||||
| const ( | ||||
| 	starter       = 0 | ||||
| 	above         = 230 | ||||
| 	iotaSubscript = 240 | ||||
| ) | ||||
|  | ||||
| // The exceptions slice holds data that does not fit in a normal info entry. | ||||
| // The entry is pointed to by the exception index in an entry. It has the | ||||
| // following format: | ||||
| // | ||||
| // Header: | ||||
| // | ||||
| //	byte 0: | ||||
| //	 7..6  unused | ||||
| //	 5..4  CCC type (same bits as entry) | ||||
| //	    3  unused | ||||
| //	 2..0  length of fold | ||||
| // | ||||
| //	byte 1: | ||||
| //	  7..6  unused | ||||
| //	  5..3  length of 1st mapping of case type | ||||
| //	  2..0  length of 2nd mapping of case type | ||||
| // | ||||
| //	  case     1st    2nd | ||||
| //	  lower -> upper, title | ||||
| //	  upper -> lower, title | ||||
| //	  title -> lower, upper | ||||
| // | ||||
| // Lengths with the value 0x7 indicate no value and implies no change. | ||||
| // A length of 0 indicates a mapping to zero-length string. | ||||
| // | ||||
| // Body bytes: | ||||
| // | ||||
| //	case folding bytes | ||||
| //	lowercase mapping bytes | ||||
| //	uppercase mapping bytes | ||||
| //	titlecase mapping bytes | ||||
| //	closure mapping bytes (for NFKC_Casefold). (TODO) | ||||
| // | ||||
| // Fallbacks: | ||||
| // | ||||
| //	missing fold  -> lower | ||||
| //	missing title -> upper | ||||
| //	all missing   -> original rune | ||||
| // | ||||
| // exceptions starts with a dummy byte to enforce that there is no zero index | ||||
| // value. | ||||
| const ( | ||||
| 	lengthMask = 0x07 | ||||
| 	lengthBits = 3 | ||||
| 	noChange   = 0 | ||||
| ) | ||||
|  | ||||
| // References to generated trie. | ||||
|  | ||||
| var trie = newCaseTrie(0) | ||||
|  | ||||
| var sparse = sparseBlocks{ | ||||
| 	values:  sparseValues[:], | ||||
| 	offsets: sparseOffsets[:], | ||||
| } | ||||
|  | ||||
| // Sparse block lookup code. | ||||
|  | ||||
| // valueRange is an entry in a sparse block. | ||||
| type valueRange struct { | ||||
| 	value  uint16 | ||||
| 	lo, hi byte | ||||
| } | ||||
|  | ||||
| type sparseBlocks struct { | ||||
| 	values  []valueRange | ||||
| 	offsets []uint16 | ||||
| } | ||||
|  | ||||
| // lookup returns the value from values block n for byte b using binary search. | ||||
| func (s *sparseBlocks) lookup(n uint32, b byte) uint16 { | ||||
| 	lo := s.offsets[n] | ||||
| 	hi := s.offsets[n+1] | ||||
| 	for lo < hi { | ||||
| 		m := lo + (hi-lo)/2 | ||||
| 		r := s.values[m] | ||||
| 		if r.lo <= b && b <= r.hi { | ||||
| 			return r.value | ||||
| 		} | ||||
| 		if b < r.lo { | ||||
| 			hi = m | ||||
| 		} else { | ||||
| 			lo = m + 1 | ||||
| 		} | ||||
| 	} | ||||
| 	return 0 | ||||
| } | ||||
|  | ||||
| // lastRuneForTesting is the last rune used for testing. Everything after this | ||||
| // is boring. | ||||
| const lastRuneForTesting = rune(0x1FFFF) | ||||
							
								
								
									
										1
									
								
								vendor/modules.txt
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								vendor/modules.txt
									
									
									
									
										vendored
									
									
								
							| @@ -955,6 +955,7 @@ golang.org/x/sys/windows/svc/mgr | ||||
| golang.org/x/term | ||||
| # golang.org/x/text v0.14.0 | ||||
| ## explicit; go 1.18 | ||||
| golang.org/x/text/cases | ||||
| golang.org/x/text/encoding | ||||
| golang.org/x/text/encoding/charmap | ||||
| golang.org/x/text/encoding/htmlindex | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Kubernetes Prow Robot
					Kubernetes Prow Robot