aboutsummaryrefslogtreecommitdiffstats
path: root/src/dma/vendor/golang.org/x/text/internal/colltab/table.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/dma/vendor/golang.org/x/text/internal/colltab/table.go')
-rw-r--r--src/dma/vendor/golang.org/x/text/internal/colltab/table.go275
1 files changed, 275 insertions, 0 deletions
diff --git a/src/dma/vendor/golang.org/x/text/internal/colltab/table.go b/src/dma/vendor/golang.org/x/text/internal/colltab/table.go
new file mode 100644
index 00000000..e26e36da
--- /dev/null
+++ b/src/dma/vendor/golang.org/x/text/internal/colltab/table.go
@@ -0,0 +1,275 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package colltab
+
+import (
+ "unicode/utf8"
+
+ "golang.org/x/text/unicode/norm"
+)
+
+// Table holds all collation data for a given collation ordering.
+type Table struct {
+ Index Trie // main trie
+
+ // expansion info
+ ExpandElem []uint32
+
+ // contraction info
+ ContractTries ContractTrieSet
+ ContractElem []uint32
+ MaxContractLen int
+ VariableTop uint32
+}
+
+func (t *Table) AppendNext(w []Elem, b []byte) (res []Elem, n int) {
+ return t.appendNext(w, source{bytes: b})
+}
+
+func (t *Table) AppendNextString(w []Elem, s string) (res []Elem, n int) {
+ return t.appendNext(w, source{str: s})
+}
+
+func (t *Table) Start(p int, b []byte) int {
+ // TODO: implement
+ panic("not implemented")
+}
+
+func (t *Table) StartString(p int, s string) int {
+ // TODO: implement
+ panic("not implemented")
+}
+
+func (t *Table) Domain() []string {
+ // TODO: implement
+ panic("not implemented")
+}
+
+func (t *Table) Top() uint32 {
+ return t.VariableTop
+}
+
+type source struct {
+ str string
+ bytes []byte
+}
+
+func (src *source) lookup(t *Table) (ce Elem, sz int) {
+ if src.bytes == nil {
+ return t.Index.lookupString(src.str)
+ }
+ return t.Index.lookup(src.bytes)
+}
+
+func (src *source) tail(sz int) {
+ if src.bytes == nil {
+ src.str = src.str[sz:]
+ } else {
+ src.bytes = src.bytes[sz:]
+ }
+}
+
+func (src *source) nfd(buf []byte, end int) []byte {
+ if src.bytes == nil {
+ return norm.NFD.AppendString(buf[:0], src.str[:end])
+ }
+ return norm.NFD.Append(buf[:0], src.bytes[:end]...)
+}
+
+func (src *source) rune() (r rune, sz int) {
+ if src.bytes == nil {
+ return utf8.DecodeRuneInString(src.str)
+ }
+ return utf8.DecodeRune(src.bytes)
+}
+
+func (src *source) properties(f norm.Form) norm.Properties {
+ if src.bytes == nil {
+ return f.PropertiesString(src.str)
+ }
+ return f.Properties(src.bytes)
+}
+
+// appendNext appends the weights corresponding to the next rune or
+// contraction in s. If a contraction is matched to a discontinuous
+// sequence of runes, the weights for the interstitial runes are
+// appended as well. It returns a new slice that includes the appended
+// weights and the number of bytes consumed from s.
+func (t *Table) appendNext(w []Elem, src source) (res []Elem, n int) {
+ ce, sz := src.lookup(t)
+ tp := ce.ctype()
+ if tp == ceNormal {
+ if ce == 0 {
+ r, _ := src.rune()
+ const (
+ hangulSize = 3
+ firstHangul = 0xAC00
+ lastHangul = 0xD7A3
+ )
+ if r >= firstHangul && r <= lastHangul {
+ // TODO: performance can be considerably improved here.
+ n = sz
+ var buf [16]byte // Used for decomposing Hangul.
+ for b := src.nfd(buf[:0], hangulSize); len(b) > 0; b = b[sz:] {
+ ce, sz = t.Index.lookup(b)
+ w = append(w, ce)
+ }
+ return w, n
+ }
+ ce = makeImplicitCE(implicitPrimary(r))
+ }
+ w = append(w, ce)
+ } else if tp == ceExpansionIndex {
+ w = t.appendExpansion(w, ce)
+ } else if tp == ceContractionIndex {
+ n := 0
+ src.tail(sz)
+ if src.bytes == nil {
+ w, n = t.matchContractionString(w, ce, src.str)
+ } else {
+ w, n = t.matchContraction(w, ce, src.bytes)
+ }
+ sz += n
+ } else if tp == ceDecompose {
+ // Decompose using NFKD and replace tertiary weights.
+ t1, t2 := splitDecompose(ce)
+ i := len(w)
+ nfkd := src.properties(norm.NFKD).Decomposition()
+ for p := 0; len(nfkd) > 0; nfkd = nfkd[p:] {
+ w, p = t.appendNext(w, source{bytes: nfkd})
+ }
+ w[i] = w[i].updateTertiary(t1)
+ if i++; i < len(w) {
+ w[i] = w[i].updateTertiary(t2)
+ for i++; i < len(w); i++ {
+ w[i] = w[i].updateTertiary(maxTertiary)
+ }
+ }
+ }
+ return w, sz
+}
+
+func (t *Table) appendExpansion(w []Elem, ce Elem) []Elem {
+ i := splitExpandIndex(ce)
+ n := int(t.ExpandElem[i])
+ i++
+ for _, ce := range t.ExpandElem[i : i+n] {
+ w = append(w, Elem(ce))
+ }
+ return w
+}
+
+func (t *Table) matchContraction(w []Elem, ce Elem, suffix []byte) ([]Elem, int) {
+ index, n, offset := splitContractIndex(ce)
+
+ scan := t.ContractTries.scanner(index, n, suffix)
+ buf := [norm.MaxSegmentSize]byte{}
+ bufp := 0
+ p := scan.scan(0)
+
+ if !scan.done && p < len(suffix) && suffix[p] >= utf8.RuneSelf {
+ // By now we should have filtered most cases.
+ p0 := p
+ bufn := 0
+ rune := norm.NFD.Properties(suffix[p:])
+ p += rune.Size()
+ if rune.LeadCCC() != 0 {
+ prevCC := rune.TrailCCC()
+ // A gap may only occur in the last normalization segment.
+ // This also ensures that len(scan.s) < norm.MaxSegmentSize.
+ if end := norm.NFD.FirstBoundary(suffix[p:]); end != -1 {
+ scan.s = suffix[:p+end]
+ }
+ for p < len(suffix) && !scan.done && suffix[p] >= utf8.RuneSelf {
+ rune = norm.NFD.Properties(suffix[p:])
+ if ccc := rune.LeadCCC(); ccc == 0 || prevCC >= ccc {
+ break
+ }
+ prevCC = rune.TrailCCC()
+ if pp := scan.scan(p); pp != p {
+ // Copy the interstitial runes for later processing.
+ bufn += copy(buf[bufn:], suffix[p0:p])
+ if scan.pindex == pp {
+ bufp = bufn
+ }
+ p, p0 = pp, pp
+ } else {
+ p += rune.Size()
+ }
+ }
+ }
+ }
+ // Append weights for the matched contraction, which may be an expansion.
+ i, n := scan.result()
+ ce = Elem(t.ContractElem[i+offset])
+ if ce.ctype() == ceNormal {
+ w = append(w, ce)
+ } else {
+ w = t.appendExpansion(w, ce)
+ }
+ // Append weights for the runes in the segment not part of the contraction.
+ for b, p := buf[:bufp], 0; len(b) > 0; b = b[p:] {
+ w, p = t.appendNext(w, source{bytes: b})
+ }
+ return w, n
+}
+
+// TODO: unify the two implementations. This is best done after first simplifying
+// the algorithm taking into account the inclusion of both NFC and NFD forms
+// in the table.
+func (t *Table) matchContractionString(w []Elem, ce Elem, suffix string) ([]Elem, int) {
+ index, n, offset := splitContractIndex(ce)
+
+ scan := t.ContractTries.scannerString(index, n, suffix)
+ buf := [norm.MaxSegmentSize]byte{}
+ bufp := 0
+ p := scan.scan(0)
+
+ if !scan.done && p < len(suffix) && suffix[p] >= utf8.RuneSelf {
+ // By now we should have filtered most cases.
+ p0 := p
+ bufn := 0
+ rune := norm.NFD.PropertiesString(suffix[p:])
+ p += rune.Size()
+ if rune.LeadCCC() != 0 {
+ prevCC := rune.TrailCCC()
+ // A gap may only occur in the last normalization segment.
+ // This also ensures that len(scan.s) < norm.MaxSegmentSize.
+ if end := norm.NFD.FirstBoundaryInString(suffix[p:]); end != -1 {
+ scan.s = suffix[:p+end]
+ }
+ for p < len(suffix) && !scan.done && suffix[p] >= utf8.RuneSelf {
+ rune = norm.NFD.PropertiesString(suffix[p:])
+ if ccc := rune.LeadCCC(); ccc == 0 || prevCC >= ccc {
+ break
+ }
+ prevCC = rune.TrailCCC()
+ if pp := scan.scan(p); pp != p {
+ // Copy the interstitial runes for later processing.
+ bufn += copy(buf[bufn:], suffix[p0:p])
+ if scan.pindex == pp {
+ bufp = bufn
+ }
+ p, p0 = pp, pp
+ } else {
+ p += rune.Size()
+ }
+ }
+ }
+ }
+ // Append weights for the matched contraction, which may be an expansion.
+ i, n := scan.result()
+ ce = Elem(t.ContractElem[i+offset])
+ if ce.ctype() == ceNormal {
+ w = append(w, ce)
+ } else {
+ w = t.appendExpansion(w, ce)
+ }
+ // Append weights for the runes in the segment not part of the contraction.
+ for b, p := buf[:bufp], 0; len(b) > 0; b = b[p:] {
+ w, p = t.appendNext(w, source{bytes: b})
+ }
+ return w, n
+}