aboutsummaryrefslogtreecommitdiffstats
path: root/src/dma/vendor/golang.org/x/text/internal/colltab/collelem.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/dma/vendor/golang.org/x/text/internal/colltab/collelem.go')
-rw-r--r--src/dma/vendor/golang.org/x/text/internal/colltab/collelem.go371
1 files changed, 371 insertions, 0 deletions
diff --git a/src/dma/vendor/golang.org/x/text/internal/colltab/collelem.go b/src/dma/vendor/golang.org/x/text/internal/colltab/collelem.go
new file mode 100644
index 00000000..396cebda
--- /dev/null
+++ b/src/dma/vendor/golang.org/x/text/internal/colltab/collelem.go
@@ -0,0 +1,371 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package colltab
+
+import (
+ "fmt"
+ "unicode"
+)
+
+// Level identifies the collation comparison level.
+// The primary level corresponds to the basic sorting of text.
+// The secondary level corresponds to accents and related linguistic elements.
+// The tertiary level corresponds to casing and related concepts.
+// The quaternary level is derived from the other levels by the
+// various algorithms for handling variable elements.
+type Level int
+
+const (
+ Primary Level = iota
+ Secondary
+ Tertiary
+ Quaternary
+ Identity
+
+ NumLevels
+)
+
+const (
+ defaultSecondary = 0x20
+ defaultTertiary = 0x2
+ maxTertiary = 0x1F
+ MaxQuaternary = 0x1FFFFF // 21 bits.
+)
+
+// Elem is a representation of a collation element. This API provides ways to encode
+// and decode Elems. Implementations of collation tables may use values greater
+// or equal to PrivateUse for their own purposes. However, these should never be
+// returned by AppendNext.
+type Elem uint32
+
+const (
+ maxCE Elem = 0xAFFFFFFF
+ PrivateUse = minContract
+ minContract = 0xC0000000
+ maxContract = 0xDFFFFFFF
+ minExpand = 0xE0000000
+ maxExpand = 0xEFFFFFFF
+ minDecomp = 0xF0000000
+)
+
+type ceType int
+
+const (
+ ceNormal ceType = iota // ceNormal includes implicits (ce == 0)
+ ceContractionIndex // rune can be a start of a contraction
+ ceExpansionIndex // rune expands into a sequence of collation elements
+ ceDecompose // rune expands using NFKC decomposition
+)
+
+func (ce Elem) ctype() ceType {
+ if ce <= maxCE {
+ return ceNormal
+ }
+ if ce <= maxContract {
+ return ceContractionIndex
+ } else {
+ if ce <= maxExpand {
+ return ceExpansionIndex
+ }
+ return ceDecompose
+ }
+ panic("should not reach here")
+ return ceType(-1)
+}
+
+// For normal collation elements, we assume that a collation element either has
+// a primary or non-default secondary value, not both.
+// Collation elements with a primary value are of the form
+// 01pppppp pppppppp ppppppp0 ssssssss
+// - p* is primary collation value
+// - s* is the secondary collation value
+// 00pppppp pppppppp ppppppps sssttttt, where
+// - p* is primary collation value
+// - s* offset of secondary from default value.
+// - t* is the tertiary collation value
+// 100ttttt cccccccc pppppppp pppppppp
+// - t* is the tertiar collation value
+// - c* is the canonical combining class
+// - p* is the primary collation value
+// Collation elements with a secondary value are of the form
+// 1010cccc ccccssss ssssssss tttttttt, where
+// - c* is the canonical combining class
+// - s* is the secondary collation value
+// - t* is the tertiary collation value
+// 11qqqqqq qqqqqqqq qqqqqqq0 00000000
+// - q* quaternary value
+const (
+ ceTypeMask = 0xC0000000
+ ceTypeMaskExt = 0xE0000000
+ ceIgnoreMask = 0xF00FFFFF
+ ceType1 = 0x40000000
+ ceType2 = 0x00000000
+ ceType3or4 = 0x80000000
+ ceType4 = 0xA0000000
+ ceTypeQ = 0xC0000000
+ Ignore = ceType4
+ firstNonPrimary = 0x80000000
+ lastSpecialPrimary = 0xA0000000
+ secondaryMask = 0x80000000
+ hasTertiaryMask = 0x40000000
+ primaryValueMask = 0x3FFFFE00
+ maxPrimaryBits = 21
+ compactPrimaryBits = 16
+ maxSecondaryBits = 12
+ maxTertiaryBits = 8
+ maxCCCBits = 8
+ maxSecondaryCompactBits = 8
+ maxSecondaryDiffBits = 4
+ maxTertiaryCompactBits = 5
+ primaryShift = 9
+ compactSecondaryShift = 5
+ minCompactSecondary = defaultSecondary - 4
+)
+
+func makeImplicitCE(primary int) Elem {
+ return ceType1 | Elem(primary<<primaryShift) | defaultSecondary
+}
+
+// MakeElem returns an Elem for the given values. It will return an error
+// if the given combination of values is invalid.
+func MakeElem(primary, secondary, tertiary int, ccc uint8) (Elem, error) {
+ if w := primary; w >= 1<<maxPrimaryBits || w < 0 {
+ return 0, fmt.Errorf("makeCE: primary weight out of bounds: %x >= %x", w, 1<<maxPrimaryBits)
+ }
+ if w := secondary; w >= 1<<maxSecondaryBits || w < 0 {
+ return 0, fmt.Errorf("makeCE: secondary weight out of bounds: %x >= %x", w, 1<<maxSecondaryBits)
+ }
+ if w := tertiary; w >= 1<<maxTertiaryBits || w < 0 {
+ return 0, fmt.Errorf("makeCE: tertiary weight out of bounds: %x >= %x", w, 1<<maxTertiaryBits)
+ }
+ ce := Elem(0)
+ if primary != 0 {
+ if ccc != 0 {
+ if primary >= 1<<compactPrimaryBits {
+ return 0, fmt.Errorf("makeCE: primary weight with non-zero CCC out of bounds: %x >= %x", primary, 1<<compactPrimaryBits)
+ }
+ if secondary != defaultSecondary {
+ return 0, fmt.Errorf("makeCE: cannot combine non-default secondary value (%x) with non-zero CCC (%x)", secondary, ccc)
+ }
+ ce = Elem(tertiary << (compactPrimaryBits + maxCCCBits))
+ ce |= Elem(ccc) << compactPrimaryBits
+ ce |= Elem(primary)
+ ce |= ceType3or4
+ } else if tertiary == defaultTertiary {
+ if secondary >= 1<<maxSecondaryCompactBits {
+ return 0, fmt.Errorf("makeCE: secondary weight with non-zero primary out of bounds: %x >= %x", secondary, 1<<maxSecondaryCompactBits)
+ }
+ ce = Elem(primary<<(maxSecondaryCompactBits+1) + secondary)
+ ce |= ceType1
+ } else {
+ d := secondary - defaultSecondary + maxSecondaryDiffBits
+ if d >= 1<<maxSecondaryDiffBits || d < 0 {
+ return 0, fmt.Errorf("makeCE: secondary weight diff out of bounds: %x < 0 || %x > %x", d, d, 1<<maxSecondaryDiffBits)
+ }
+ if tertiary >= 1<<maxTertiaryCompactBits {
+ return 0, fmt.Errorf("makeCE: tertiary weight with non-zero primary out of bounds: %x > %x", tertiary, 1<<maxTertiaryCompactBits)
+ }
+ ce = Elem(primary<<maxSecondaryDiffBits + d)
+ ce = ce<<maxTertiaryCompactBits + Elem(tertiary)
+ }
+ } else {
+ ce = Elem(secondary<<maxTertiaryBits + tertiary)
+ ce += Elem(ccc) << (maxSecondaryBits + maxTertiaryBits)
+ ce |= ceType4
+ }
+ return ce, nil
+}
+
+// MakeQuaternary returns an Elem with the given quaternary value.
+func MakeQuaternary(v int) Elem {
+ return ceTypeQ | Elem(v<<primaryShift)
+}
+
+// Mask sets weights for any level smaller than l to 0.
+// The resulting Elem can be used to test for equality with
+// other Elems to which the same mask has been applied.
+func (ce Elem) Mask(l Level) uint32 {
+ return 0
+}
+
+// CCC returns the canonical combining class associated with the underlying character,
+// if applicable, or 0 otherwise.
+func (ce Elem) CCC() uint8 {
+ if ce&ceType3or4 != 0 {
+ if ce&ceType4 == ceType3or4 {
+ return uint8(ce >> 16)
+ }
+ return uint8(ce >> 20)
+ }
+ return 0
+}
+
+// Primary returns the primary collation weight for ce.
+func (ce Elem) Primary() int {
+ if ce >= firstNonPrimary {
+ if ce > lastSpecialPrimary {
+ return 0
+ }
+ return int(uint16(ce))
+ }
+ return int(ce&primaryValueMask) >> primaryShift
+}
+
+// Secondary returns the secondary collation weight for ce.
+func (ce Elem) Secondary() int {
+ switch ce & ceTypeMask {
+ case ceType1:
+ return int(uint8(ce))
+ case ceType2:
+ return minCompactSecondary + int((ce>>compactSecondaryShift)&0xF)
+ case ceType3or4:
+ if ce < ceType4 {
+ return defaultSecondary
+ }
+ return int(ce>>8) & 0xFFF
+ case ceTypeQ:
+ return 0
+ }
+ panic("should not reach here")
+}
+
+// Tertiary returns the tertiary collation weight for ce.
+func (ce Elem) Tertiary() uint8 {
+ if ce&hasTertiaryMask == 0 {
+ if ce&ceType3or4 == 0 {
+ return uint8(ce & 0x1F)
+ }
+ if ce&ceType4 == ceType4 {
+ return uint8(ce)
+ }
+ return uint8(ce>>24) & 0x1F // type 2
+ } else if ce&ceTypeMask == ceType1 {
+ return defaultTertiary
+ }
+ // ce is a quaternary value.
+ return 0
+}
+
+func (ce Elem) updateTertiary(t uint8) Elem {
+ if ce&ceTypeMask == ceType1 {
+ // convert to type 4
+ nce := ce & primaryValueMask
+ nce |= Elem(uint8(ce)-minCompactSecondary) << compactSecondaryShift
+ ce = nce
+ } else if ce&ceTypeMaskExt == ceType3or4 {
+ ce &= ^Elem(maxTertiary << 24)
+ return ce | (Elem(t) << 24)
+ } else {
+ // type 2 or 4
+ ce &= ^Elem(maxTertiary)
+ }
+ return ce | Elem(t)
+}
+
+// Quaternary returns the quaternary value if explicitly specified,
+// 0 if ce == Ignore, or MaxQuaternary otherwise.
+// Quaternary values are used only for shifted variants.
+func (ce Elem) Quaternary() int {
+ if ce&ceTypeMask == ceTypeQ {
+ return int(ce&primaryValueMask) >> primaryShift
+ } else if ce&ceIgnoreMask == Ignore {
+ return 0
+ }
+ return MaxQuaternary
+}
+
+// Weight returns the collation weight for the given level.
+func (ce Elem) Weight(l Level) int {
+ switch l {
+ case Primary:
+ return ce.Primary()
+ case Secondary:
+ return ce.Secondary()
+ case Tertiary:
+ return int(ce.Tertiary())
+ case Quaternary:
+ return ce.Quaternary()
+ }
+ return 0 // return 0 (ignore) for undefined levels.
+}
+
+// For contractions, collation elements are of the form
+// 110bbbbb bbbbbbbb iiiiiiii iiiinnnn, where
+// - n* is the size of the first node in the contraction trie.
+// - i* is the index of the first node in the contraction trie.
+// - b* is the offset into the contraction collation element table.
+// See contract.go for details on the contraction trie.
+const (
+ maxNBits = 4
+ maxTrieIndexBits = 12
+ maxContractOffsetBits = 13
+)
+
+func splitContractIndex(ce Elem) (index, n, offset int) {
+ n = int(ce & (1<<maxNBits - 1))
+ ce >>= maxNBits
+ index = int(ce & (1<<maxTrieIndexBits - 1))
+ ce >>= maxTrieIndexBits
+ offset = int(ce & (1<<maxContractOffsetBits - 1))
+ return
+}
+
+// For expansions, Elems are of the form 11100000 00000000 bbbbbbbb bbbbbbbb,
+// where b* is the index into the expansion sequence table.
+const maxExpandIndexBits = 16
+
+func splitExpandIndex(ce Elem) (index int) {
+ return int(uint16(ce))
+}
+
+// Some runes can be expanded using NFKD decomposition. Instead of storing the full
+// sequence of collation elements, we decompose the rune and lookup the collation
+// elements for each rune in the decomposition and modify the tertiary weights.
+// The Elem, in this case, is of the form 11110000 00000000 wwwwwwww vvvvvvvv, where
+// - v* is the replacement tertiary weight for the first rune,
+// - w* is the replacement tertiary weight for the second rune,
+// Tertiary weights of subsequent runes should be replaced with maxTertiary.
+// See https://www.unicode.org/reports/tr10/#Compatibility_Decompositions for more details.
+func splitDecompose(ce Elem) (t1, t2 uint8) {
+ return uint8(ce), uint8(ce >> 8)
+}
+
+const (
+ // These constants were taken from https://www.unicode.org/versions/Unicode6.0.0/ch12.pdf.
+ minUnified rune = 0x4E00
+ maxUnified = 0x9FFF
+ minCompatibility = 0xF900
+ maxCompatibility = 0xFAFF
+ minRare = 0x3400
+ maxRare = 0x4DBF
+)
+const (
+ commonUnifiedOffset = 0x10000
+ rareUnifiedOffset = 0x20000 // largest rune in common is U+FAFF
+ otherOffset = 0x50000 // largest rune in rare is U+2FA1D
+ illegalOffset = otherOffset + int(unicode.MaxRune)
+ maxPrimary = illegalOffset + 1
+)
+
+// implicitPrimary returns the primary weight for the a rune
+// for which there is no entry for the rune in the collation table.
+// We take a different approach from the one specified in
+// https://unicode.org/reports/tr10/#Implicit_Weights,
+// but preserve the resulting relative ordering of the runes.
+func implicitPrimary(r rune) int {
+ if unicode.Is(unicode.Ideographic, r) {
+ if r >= minUnified && r <= maxUnified {
+ // The most common case for CJK.
+ return int(r) + commonUnifiedOffset
+ }
+ if r >= minCompatibility && r <= maxCompatibility {
+ // This will typically not hit. The DUCET explicitly specifies mappings
+ // for all characters that do not decompose.
+ return int(r) + commonUnifiedOffset
+ }
+ return int(r) + rareUnifiedOffset
+ }
+ return int(r) + otherOffset
+}