1 files changed, 458 insertions, 0 deletions
diff --git a/src/dma/vendor/golang.org/x/text/unicode/norm/iter.go b/src/dma/vendor/golang.org/x/text/unicode/norm/iter.go
new file mode 100644
index 00000000..417c6b26
--- /dev/null
+++ b/src/dma/vendor/golang.org/x/text/unicode/norm/iter.go
@@ -0,0 +1,458 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package norm
+
+import (
+	"fmt"
+	"unicode/utf8"
+)
+
+// MaxSegmentSize is the maximum size of a byte buffer needed to consider any
+// sequence of starter and non-starter runes for the purpose of normalization.
+const MaxSegmentSize = maxByteBufferSize
+
+// An Iter iterates over a string or byte slice, while normalizing it
+// to a given Form.
+type Iter struct {
+	rb     reorderBuffer
+	buf    [maxByteBufferSize]byte
+	info   Properties // first character saved from previous iteration
+	next   iterFunc   // implementation of next depends on form
+	asciiF iterFunc
+
+	p        int    // current position in input source
+	multiSeg []byte // remainder of multi-segment decomposition
+}
+
+type iterFunc func(*Iter) []byte
+
+// Init initializes i to iterate over src after normalizing it to Form f.
+func (i *Iter) Init(f Form, src []byte) {
+	i.p = 0
+	if len(src) == 0 {
+		i.setDone()
+		i.rb.nsrc = 0
+		return
+	}
+	i.multiSeg = nil
+	i.rb.init(f, src)
+	i.next = i.rb.f.nextMain
+	i.asciiF = nextASCIIBytes
+	i.info = i.rb.f.info(i.rb.src, i.p)
+	i.rb.ss.first(i.info)
+}
+
+// InitString initializes i to iterate over src after normalizing it to Form f.
+func (i *Iter) InitString(f Form, src string) {
+	i.p = 0
+	if len(src) == 0 {
+		i.setDone()
+		i.rb.nsrc = 0
+		return
+	}
+	i.multiSeg = nil
+	i.rb.initString(f, src)
+	i.next = i.rb.f.nextMain
+	i.asciiF = nextASCIIString
+	i.info = i.rb.f.info(i.rb.src, i.p)
+	i.rb.ss.first(i.info)
+}
+
+// Seek sets the segment to be returned by the next call to Next to start
+// at position p.  It is the responsibility of the caller to set p to the
+// start of a segment.
+func (i *Iter) Seek(offset int64, whence int) (int64, error) {
+	var abs int64
+	switch whence {
+	case 0:
+		abs = offset
+	case 1:
+		abs = int64(i.p) + offset
+	case 2:
+		abs = int64(i.rb.nsrc) + offset
+	default:
+		return 0, fmt.Errorf("norm: invalid whence")
+	}
+	if abs < 0 {
+		return 0, fmt.Errorf("norm: negative position")
+	}
+	if int(abs) >= i.rb.nsrc {
+		i.setDone()
+		return int64(i.p), nil
+	}
+	i.p = int(abs)
+	i.multiSeg = nil
+	i.next = i.rb.f.nextMain
+	i.info = i.rb.f.info(i.rb.src, i.p)
+	i.rb.ss.first(i.info)
+	return abs, nil
+}
+
+// returnSlice returns a slice of the underlying input type as a byte slice.
+// If the underlying is of type []byte, it will simply return a slice.
+// If the underlying is of type string, it will copy the slice to the buffer
+// and return that.
+func (i *Iter) returnSlice(a, b int) []byte {
+	if i.rb.src.bytes == nil {
+		return i.buf[:copy(i.buf[:], i.rb.src.str[a:b])]
+	}
+	return i.rb.src.bytes[a:b]
+}
+
+// Pos returns the byte position at which the next call to Next will commence processing.
+func (i *Iter) Pos() int {
+	return i.p
+}
+
+func (i *Iter) setDone() {
+	i.next = nextDone
+	i.p = i.rb.nsrc
+}
+
+// Done returns true if there is no more input to process.
+func (i *Iter) Done() bool {
+	return i.p >= i.rb.nsrc
+}
+
+// Next returns f(i.input[i.Pos():n]), where n is a boundary of i.input.
+// For any input a and b for which f(a) == f(b), subsequent calls
+// to Next will return the same segments.
+// Modifying runes are grouped together with the preceding starter, if such a starter exists.
+// Although not guaranteed, n will typically be the smallest possible n.
+func (i *Iter) Next() []byte {
+	return i.next(i)
+}
+
+func nextASCIIBytes(i *Iter) []byte {
+	p := i.p + 1
+	if p >= i.rb.nsrc {
+		p0 := i.p
+		i.setDone()
+		return i.rb.src.bytes[p0:p]
+	}
+	if i.rb.src.bytes[p] < utf8.RuneSelf {
+		p0 := i.p
+		i.p = p
+		return i.rb.src.bytes[p0:p]
+	}
+	i.info = i.rb.f.info(i.rb.src, i.p)
+	i.next = i.rb.f.nextMain
+	return i.next(i)
+}
+
+func nextASCIIString(i *Iter) []byte {
+	p := i.p + 1
+	if p >= i.rb.nsrc {
+		i.buf[0] = i.rb.src.str[i.p]
+		i.setDone()
+		return i.buf[:1]
+	}
+	if i.rb.src.str[p] < utf8.RuneSelf {
+		i.buf[0] = i.rb.src.str[i.p]
+		i.p = p
+		return i.buf[:1]
+	}
+	i.info = i.rb.f.info(i.rb.src, i.p)
+	i.next = i.rb.f.nextMain
+	return i.next(i)
+}
+
+func nextHangul(i *Iter) []byte {
+	p := i.p
+	next := p + hangulUTF8Size
+	if next >= i.rb.nsrc {
+		i.setDone()
+	} else if i.rb.src.hangul(next) == 0 {
+		i.rb.ss.next(i.info)
+		i.info = i.rb.f.info(i.rb.src, i.p)
+		i.next = i.rb.f.nextMain
+		return i.next(i)
+	}
+	i.p = next
+	return i.buf[:decomposeHangul(i.buf[:], i.rb.src.hangul(p))]
+}
+
+func nextDone(i *Iter) []byte {
+	return nil
+}
+
+// nextMulti is used for iterating over multi-segment decompositions
+// for decomposing normal forms.
+func nextMulti(i *Iter) []byte {
+	j := 0
+	d := i.multiSeg
+	// skip first rune
+	for j = 1; j < len(d) && !utf8.RuneStart(d[j]); j++ {
+	}
+	for j < len(d) {
+		info := i.rb.f.info(input{bytes: d}, j)
+		if info.BoundaryBefore() {
+			i.multiSeg = d[j:]
+			return d[:j]
+		}
+		j += int(info.size)
+	}
+	// treat last segment as normal decomposition
+	i.next = i.rb.f.nextMain
+	return i.next(i)
+}
+
+// nextMultiNorm is used for iterating over multi-segment decompositions
+// for composing normal forms.
+func nextMultiNorm(i *Iter) []byte {
+	j := 0
+	d := i.multiSeg
+	for j < len(d) {
+		info := i.rb.f.info(input{bytes: d}, j)
+		if info.BoundaryBefore() {
+			i.rb.compose()
+			seg := i.buf[:i.rb.flushCopy(i.buf[:])]
+			i.rb.insertUnsafe(input{bytes: d}, j, info)
+			i.multiSeg = d[j+int(info.size):]
+			return seg
+		}
+		i.rb.insertUnsafe(input{bytes: d}, j, info)
+		j += int(info.size)
+	}
+	i.multiSeg = nil
+	i.next = nextComposed
+	return doNormComposed(i)
+}
+
+// nextDecomposed is the implementation of Next for forms NFD and NFKD.
+func nextDecomposed(i *Iter) (next []byte) {
+	outp := 0
+	inCopyStart, outCopyStart := i.p, 0
+	for {
+		if sz := int(i.info.size); sz <= 1 {
+			i.rb.ss = 0
+			p := i.p
+			i.p++ // ASCII or illegal byte.  Either way, advance by 1.
+			if i.p >= i.rb.nsrc {
+				i.setDone()
+				return i.returnSlice(p, i.p)
+			} else if i.rb.src._byte(i.p) < utf8.RuneSelf {
+				i.next = i.asciiF
+				return i.returnSlice(p, i.p)
+			}
+			outp++
+		} else if d := i.info.Decomposition(); d != nil {
+			// Note: If leading CCC != 0, then len(d) == 2 and last is also non-zero.
+			// Case 1: there is a leftover to copy.  In this case the decomposition
+			// must begin with a modifier and should always be appended.
+			// Case 2: no leftover. Simply return d if followed by a ccc == 0 value.
+			p := outp + len(d)
+			if outp > 0 {
+				i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p)
+				// TODO: this condition should not be possible, but we leave it
+				// in for defensive purposes.
+				if p > len(i.buf) {
+					return i.buf[:outp]
+				}
+			} else if i.info.multiSegment() {
+				// outp must be 0 as multi-segment decompositions always
+				// start a new segment.
+				if i.multiSeg == nil {
+					i.multiSeg = d
+					i.next = nextMulti
+					return nextMulti(i)
+				}
+				// We are in the last segment.  Treat as normal decomposition.
+				d = i.multiSeg
+				i.multiSeg = nil
+				p = len(d)
+			}
+			prevCC := i.info.tccc
+			if i.p += sz; i.p >= i.rb.nsrc {
+				i.setDone()
+				i.info = Properties{} // Force BoundaryBefore to succeed.
+			} else {
+				i.info = i.rb.f.info(i.rb.src, i.p)
+			}
+			switch i.rb.ss.next(i.info) {
+			case ssOverflow:
+				i.next = nextCGJDecompose
+				fallthrough
+			case ssStarter:
+				if outp > 0 {
+					copy(i.buf[outp:], d)
+					return i.buf[:p]
+				}
+				return d
+			}
+			copy(i.buf[outp:], d)
+			outp = p
+			inCopyStart, outCopyStart = i.p, outp
+			if i.info.ccc < prevCC {
+				goto doNorm
+			}
+			continue
+		} else if r := i.rb.src.hangul(i.p); r != 0 {
+			outp = decomposeHangul(i.buf[:], r)
+			i.p += hangulUTF8Size
+			inCopyStart, outCopyStart = i.p, outp
+			if i.p >= i.rb.nsrc {
+				i.setDone()
+				break
+			} else if i.rb.src.hangul(i.p) != 0 {
+				i.next = nextHangul
+				return i.buf[:outp]
+			}
+		} else {
+			p := outp + sz
+			if p > len(i.buf) {
+				break
+			}
+			outp = p
+			i.p += sz
+		}
+		if i.p >= i.rb.nsrc {
+			i.setDone()
+			break
+		}
+		prevCC := i.info.tccc
+		i.info = i.rb.f.info(i.rb.src, i.p)
+		if v := i.rb.ss.next(i.info); v == ssStarter {
+			break
+		} else if v == ssOverflow {
+			i.next = nextCGJDecompose
+			break
+		}
+		if i.info.ccc < prevCC {
+			goto doNorm
+		}
+	}
+	if outCopyStart == 0 {
+		return i.returnSlice(inCopyStart, i.p)
+	} else if inCopyStart < i.p {
+		i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p)
+	}
+	return i.buf[:outp]
+doNorm:
+	// Insert what we have decomposed so far in the reorderBuffer.
+	// As we will only reorder, there will always be enough room.
+	i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p)
+	i.rb.insertDecomposed(i.buf[0:outp])
+	return doNormDecomposed(i)
+}
+
+func doNormDecomposed(i *Iter) []byte {
+	for {
+		i.rb.insertUnsafe(i.rb.src, i.p, i.info)
+		if i.p += int(i.info.size); i.p >= i.rb.nsrc {
+			i.setDone()
+			break
+		}
+		i.info = i.rb.f.info(i.rb.src, i.p)
+		if i.info.ccc == 0 {
+			break
+		}
+		if s := i.rb.ss.next(i.info); s == ssOverflow {
+			i.next = nextCGJDecompose
+			break
+		}
+	}
+	// new segment or too many combining characters: exit normalization
+	return i.buf[:i.rb.flushCopy(i.buf[:])]
+}
+
+func nextCGJDecompose(i *Iter) []byte {
+	i.rb.ss = 0
+	i.rb.insertCGJ()
+	i.next = nextDecomposed
+	i.rb.ss.first(i.info)
+	buf := doNormDecomposed(i)
+	return buf
+}
+
+// nextComposed is the implementation of Next for forms NFC and NFKC.
+func nextComposed(i *Iter) []byte {
+	outp, startp := 0, i.p
+	var prevCC uint8
+	for {
+		if !i.info.isYesC() {
+			goto doNorm
+		}
+		prevCC = i.info.tccc
+		sz := int(i.info.size)
+		if sz == 0 {
+			sz = 1 // illegal rune: copy byte-by-byte
+		}
+		p := outp + sz
+		if p > len(i.buf) {
+			break
+		}
+		outp = p
+		i.p += sz
+		if i.p >= i.rb.nsrc {
+			i.setDone()
+			break
+		} else if i.rb.src._byte(i.p) < utf8.RuneSelf {
+			i.rb.ss = 0
+			i.next = i.asciiF
+			break
+		}
+		i.info = i.rb.f.info(i.rb.src, i.p)
+		if v := i.rb.ss.next(i.info); v == ssStarter {
+			break
+		} else if v == ssOverflow {
+			i.next = nextCGJCompose
+			break
+		}
+		if i.info.ccc < prevCC {
+			goto doNorm
+		}
+	}
+	return i.returnSlice(startp, i.p)
+doNorm:
+	// reset to start position
+	i.p = startp
+	i.info = i.rb.f.info(i.rb.src, i.p)
+	i.rb.ss.first(i.info)
+	if i.info.multiSegment() {
+		d := i.info.Decomposition()
+		info := i.rb.f.info(input{bytes: d}, 0)
+		i.rb.insertUnsafe(input{bytes: d}, 0, info)
+		i.multiSeg = d[int(info.size):]
+		i.next = nextMultiNorm
+		return nextMultiNorm(i)
+	}
+	i.rb.ss.first(i.info)
+	i.rb.insertUnsafe(i.rb.src, i.p, i.info)
+	return doNormComposed(i)
+}
+
+func doNormComposed(i *Iter) []byte {
+	// First rune should already be inserted.
+	for {
+		if i.p += int(i.info.size); i.p >= i.rb.nsrc {
+			i.setDone()
+			break
+		}
+		i.info = i.rb.f.info(i.rb.src, i.p)
+		if s := i.rb.ss.next(i.info); s == ssStarter {
+			break
+		} else if s == ssOverflow {
+			i.next = nextCGJCompose
+			break
+		}
+		i.rb.insertUnsafe(i.rb.src, i.p, i.info)
+	}
+	i.rb.compose()
+	seg := i.buf[:i.rb.flushCopy(i.buf[:])]
+	return seg
+}
+
+func nextCGJCompose(i *Iter) []byte {
+	i.rb.ss = 0 // instead of first
+	i.rb.insertCGJ()
+	i.next = nextComposed
+	// Note that we treat any rune with nLeadingNonStarters > 0 as a non-starter,
+	// even if they are not. This is particularly dubious for U+FF9E and UFF9A.
+	// If we ever change that, insert a check here.
+	i.rb.ss.first(i.info)
+	i.rb.insertUnsafe(i.rb.src, i.p, i.info)
+	return doNormComposed(i)
+}