diff options
author | Tomofumi Hayashi <tohayash@redhat.com> | 2019-04-27 20:38:39 +0900 |
---|---|---|
committer | Tomofumi Hayashi <tohayash@redhat.com> | 2019-04-27 20:40:37 +0900 |
commit | 4d11ca17d0f73f5bd783f45900118295fdfed46b (patch) | |
tree | cce8575b02ac850d2b30ec12a5c4083c48e85c6c /src/dma/vendor/golang.org/x/text/internal/colltab/collelem.go | |
parent | 07e4a96e4996f3d39b92dd601b3ed0d23bfbaa0c (diff) |
barometer: update DMA's vendoring packages
Change-Id: I0578b094f1ecdaed20c906be2ba29d51b8089d7c
Signed-off-by: Tomofumi Hayashi <tohayash@redhat.com>
Diffstat (limited to 'src/dma/vendor/golang.org/x/text/internal/colltab/collelem.go')
-rw-r--r-- | src/dma/vendor/golang.org/x/text/internal/colltab/collelem.go | 371 |
1 files changed, 371 insertions, 0 deletions
diff --git a/src/dma/vendor/golang.org/x/text/internal/colltab/collelem.go b/src/dma/vendor/golang.org/x/text/internal/colltab/collelem.go new file mode 100644 index 00000000..396cebda --- /dev/null +++ b/src/dma/vendor/golang.org/x/text/internal/colltab/collelem.go @@ -0,0 +1,371 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package colltab + +import ( + "fmt" + "unicode" +) + +// Level identifies the collation comparison level. +// The primary level corresponds to the basic sorting of text. +// The secondary level corresponds to accents and related linguistic elements. +// The tertiary level corresponds to casing and related concepts. +// The quaternary level is derived from the other levels by the +// various algorithms for handling variable elements. +type Level int + +const ( + Primary Level = iota + Secondary + Tertiary + Quaternary + Identity + + NumLevels +) + +const ( + defaultSecondary = 0x20 + defaultTertiary = 0x2 + maxTertiary = 0x1F + MaxQuaternary = 0x1FFFFF // 21 bits. +) + +// Elem is a representation of a collation element. This API provides ways to encode +// and decode Elems. Implementations of collation tables may use values greater +// or equal to PrivateUse for their own purposes. However, these should never be +// returned by AppendNext. +type Elem uint32 + +const ( + maxCE Elem = 0xAFFFFFFF + PrivateUse = minContract + minContract = 0xC0000000 + maxContract = 0xDFFFFFFF + minExpand = 0xE0000000 + maxExpand = 0xEFFFFFFF + minDecomp = 0xF0000000 +) + +type ceType int + +const ( + ceNormal ceType = iota // ceNormal includes implicits (ce == 0) + ceContractionIndex // rune can be a start of a contraction + ceExpansionIndex // rune expands into a sequence of collation elements + ceDecompose // rune expands using NFKC decomposition +) + +func (ce Elem) ctype() ceType { + if ce <= maxCE { + return ceNormal + } + if ce <= maxContract { + return ceContractionIndex + } else { + if ce <= maxExpand { + return ceExpansionIndex + } + return ceDecompose + } + panic("should not reach here") + return ceType(-1) +} + +// For normal collation elements, we assume that a collation element either has +// a primary or non-default secondary value, not both. +// Collation elements with a primary value are of the form +// 01pppppp pppppppp ppppppp0 ssssssss +// - p* is primary collation value +// - s* is the secondary collation value +// 00pppppp pppppppp ppppppps sssttttt, where +// - p* is primary collation value +// - s* offset of secondary from default value. +// - t* is the tertiary collation value +// 100ttttt cccccccc pppppppp pppppppp +// - t* is the tertiar collation value +// - c* is the canonical combining class +// - p* is the primary collation value +// Collation elements with a secondary value are of the form +// 1010cccc ccccssss ssssssss tttttttt, where +// - c* is the canonical combining class +// - s* is the secondary collation value +// - t* is the tertiary collation value +// 11qqqqqq qqqqqqqq qqqqqqq0 00000000 +// - q* quaternary value +const ( + ceTypeMask = 0xC0000000 + ceTypeMaskExt = 0xE0000000 + ceIgnoreMask = 0xF00FFFFF + ceType1 = 0x40000000 + ceType2 = 0x00000000 + ceType3or4 = 0x80000000 + ceType4 = 0xA0000000 + ceTypeQ = 0xC0000000 + Ignore = ceType4 + firstNonPrimary = 0x80000000 + lastSpecialPrimary = 0xA0000000 + secondaryMask = 0x80000000 + hasTertiaryMask = 0x40000000 + primaryValueMask = 0x3FFFFE00 + maxPrimaryBits = 21 + compactPrimaryBits = 16 + maxSecondaryBits = 12 + maxTertiaryBits = 8 + maxCCCBits = 8 + maxSecondaryCompactBits = 8 + maxSecondaryDiffBits = 4 + maxTertiaryCompactBits = 5 + primaryShift = 9 + compactSecondaryShift = 5 + minCompactSecondary = defaultSecondary - 4 +) + +func makeImplicitCE(primary int) Elem { + return ceType1 | Elem(primary<<primaryShift) | defaultSecondary +} + +// MakeElem returns an Elem for the given values. It will return an error +// if the given combination of values is invalid. +func MakeElem(primary, secondary, tertiary int, ccc uint8) (Elem, error) { + if w := primary; w >= 1<<maxPrimaryBits || w < 0 { + return 0, fmt.Errorf("makeCE: primary weight out of bounds: %x >= %x", w, 1<<maxPrimaryBits) + } + if w := secondary; w >= 1<<maxSecondaryBits || w < 0 { + return 0, fmt.Errorf("makeCE: secondary weight out of bounds: %x >= %x", w, 1<<maxSecondaryBits) + } + if w := tertiary; w >= 1<<maxTertiaryBits || w < 0 { + return 0, fmt.Errorf("makeCE: tertiary weight out of bounds: %x >= %x", w, 1<<maxTertiaryBits) + } + ce := Elem(0) + if primary != 0 { + if ccc != 0 { + if primary >= 1<<compactPrimaryBits { + return 0, fmt.Errorf("makeCE: primary weight with non-zero CCC out of bounds: %x >= %x", primary, 1<<compactPrimaryBits) + } + if secondary != defaultSecondary { + return 0, fmt.Errorf("makeCE: cannot combine non-default secondary value (%x) with non-zero CCC (%x)", secondary, ccc) + } + ce = Elem(tertiary << (compactPrimaryBits + maxCCCBits)) + ce |= Elem(ccc) << compactPrimaryBits + ce |= Elem(primary) + ce |= ceType3or4 + } else if tertiary == defaultTertiary { + if secondary >= 1<<maxSecondaryCompactBits { + return 0, fmt.Errorf("makeCE: secondary weight with non-zero primary out of bounds: %x >= %x", secondary, 1<<maxSecondaryCompactBits) + } + ce = Elem(primary<<(maxSecondaryCompactBits+1) + secondary) + ce |= ceType1 + } else { + d := secondary - defaultSecondary + maxSecondaryDiffBits + if d >= 1<<maxSecondaryDiffBits || d < 0 { + return 0, fmt.Errorf("makeCE: secondary weight diff out of bounds: %x < 0 || %x > %x", d, d, 1<<maxSecondaryDiffBits) + } + if tertiary >= 1<<maxTertiaryCompactBits { + return 0, fmt.Errorf("makeCE: tertiary weight with non-zero primary out of bounds: %x > %x", tertiary, 1<<maxTertiaryCompactBits) + } + ce = Elem(primary<<maxSecondaryDiffBits + d) + ce = ce<<maxTertiaryCompactBits + Elem(tertiary) + } + } else { + ce = Elem(secondary<<maxTertiaryBits + tertiary) + ce += Elem(ccc) << (maxSecondaryBits + maxTertiaryBits) + ce |= ceType4 + } + return ce, nil +} + +// MakeQuaternary returns an Elem with the given quaternary value. +func MakeQuaternary(v int) Elem { + return ceTypeQ | Elem(v<<primaryShift) +} + +// Mask sets weights for any level smaller than l to 0. +// The resulting Elem can be used to test for equality with +// other Elems to which the same mask has been applied. +func (ce Elem) Mask(l Level) uint32 { + return 0 +} + +// CCC returns the canonical combining class associated with the underlying character, +// if applicable, or 0 otherwise. +func (ce Elem) CCC() uint8 { + if ce&ceType3or4 != 0 { + if ce&ceType4 == ceType3or4 { + return uint8(ce >> 16) + } + return uint8(ce >> 20) + } + return 0 +} + +// Primary returns the primary collation weight for ce. +func (ce Elem) Primary() int { + if ce >= firstNonPrimary { + if ce > lastSpecialPrimary { + return 0 + } + return int(uint16(ce)) + } + return int(ce&primaryValueMask) >> primaryShift +} + +// Secondary returns the secondary collation weight for ce. +func (ce Elem) Secondary() int { + switch ce & ceTypeMask { + case ceType1: + return int(uint8(ce)) + case ceType2: + return minCompactSecondary + int((ce>>compactSecondaryShift)&0xF) + case ceType3or4: + if ce < ceType4 { + return defaultSecondary + } + return int(ce>>8) & 0xFFF + case ceTypeQ: + return 0 + } + panic("should not reach here") +} + +// Tertiary returns the tertiary collation weight for ce. +func (ce Elem) Tertiary() uint8 { + if ce&hasTertiaryMask == 0 { + if ce&ceType3or4 == 0 { + return uint8(ce & 0x1F) + } + if ce&ceType4 == ceType4 { + return uint8(ce) + } + return uint8(ce>>24) & 0x1F // type 2 + } else if ce&ceTypeMask == ceType1 { + return defaultTertiary + } + // ce is a quaternary value. + return 0 +} + +func (ce Elem) updateTertiary(t uint8) Elem { + if ce&ceTypeMask == ceType1 { + // convert to type 4 + nce := ce & primaryValueMask + nce |= Elem(uint8(ce)-minCompactSecondary) << compactSecondaryShift + ce = nce + } else if ce&ceTypeMaskExt == ceType3or4 { + ce &= ^Elem(maxTertiary << 24) + return ce | (Elem(t) << 24) + } else { + // type 2 or 4 + ce &= ^Elem(maxTertiary) + } + return ce | Elem(t) +} + +// Quaternary returns the quaternary value if explicitly specified, +// 0 if ce == Ignore, or MaxQuaternary otherwise. +// Quaternary values are used only for shifted variants. +func (ce Elem) Quaternary() int { + if ce&ceTypeMask == ceTypeQ { + return int(ce&primaryValueMask) >> primaryShift + } else if ce&ceIgnoreMask == Ignore { + return 0 + } + return MaxQuaternary +} + +// Weight returns the collation weight for the given level. +func (ce Elem) Weight(l Level) int { + switch l { + case Primary: + return ce.Primary() + case Secondary: + return ce.Secondary() + case Tertiary: + return int(ce.Tertiary()) + case Quaternary: + return ce.Quaternary() + } + return 0 // return 0 (ignore) for undefined levels. +} + +// For contractions, collation elements are of the form +// 110bbbbb bbbbbbbb iiiiiiii iiiinnnn, where +// - n* is the size of the first node in the contraction trie. +// - i* is the index of the first node in the contraction trie. +// - b* is the offset into the contraction collation element table. +// See contract.go for details on the contraction trie. +const ( + maxNBits = 4 + maxTrieIndexBits = 12 + maxContractOffsetBits = 13 +) + +func splitContractIndex(ce Elem) (index, n, offset int) { + n = int(ce & (1<<maxNBits - 1)) + ce >>= maxNBits + index = int(ce & (1<<maxTrieIndexBits - 1)) + ce >>= maxTrieIndexBits + offset = int(ce & (1<<maxContractOffsetBits - 1)) + return +} + +// For expansions, Elems are of the form 11100000 00000000 bbbbbbbb bbbbbbbb, +// where b* is the index into the expansion sequence table. +const maxExpandIndexBits = 16 + +func splitExpandIndex(ce Elem) (index int) { + return int(uint16(ce)) +} + +// Some runes can be expanded using NFKD decomposition. Instead of storing the full +// sequence of collation elements, we decompose the rune and lookup the collation +// elements for each rune in the decomposition and modify the tertiary weights. +// The Elem, in this case, is of the form 11110000 00000000 wwwwwwww vvvvvvvv, where +// - v* is the replacement tertiary weight for the first rune, +// - w* is the replacement tertiary weight for the second rune, +// Tertiary weights of subsequent runes should be replaced with maxTertiary. +// See https://www.unicode.org/reports/tr10/#Compatibility_Decompositions for more details. +func splitDecompose(ce Elem) (t1, t2 uint8) { + return uint8(ce), uint8(ce >> 8) +} + +const ( + // These constants were taken from https://www.unicode.org/versions/Unicode6.0.0/ch12.pdf. + minUnified rune = 0x4E00 + maxUnified = 0x9FFF + minCompatibility = 0xF900 + maxCompatibility = 0xFAFF + minRare = 0x3400 + maxRare = 0x4DBF +) +const ( + commonUnifiedOffset = 0x10000 + rareUnifiedOffset = 0x20000 // largest rune in common is U+FAFF + otherOffset = 0x50000 // largest rune in rare is U+2FA1D + illegalOffset = otherOffset + int(unicode.MaxRune) + maxPrimary = illegalOffset + 1 +) + +// implicitPrimary returns the primary weight for the a rune +// for which there is no entry for the rune in the collation table. +// We take a different approach from the one specified in +// https://unicode.org/reports/tr10/#Implicit_Weights, +// but preserve the resulting relative ordering of the runes. +func implicitPrimary(r rune) int { + if unicode.Is(unicode.Ideographic, r) { + if r >= minUnified && r <= maxUnified { + // The most common case for CJK. + return int(r) + commonUnifiedOffset + } + if r >= minCompatibility && r <= maxCompatibility { + // This will typically not hit. The DUCET explicitly specifies mappings + // for all characters that do not decompose. + return int(r) + commonUnifiedOffset + } + return int(r) + rareUnifiedOffset + } + return int(r) + otherOffset +} |