From 54d3d47daf9190275bbdaf8703b84969a4593451 Mon Sep 17 00:00:00 2001 From: Corey Hulen Date: Fri, 24 Mar 2017 23:31:34 -0700 Subject: PLT-6076 Adding viper libs for config file changes (#5871) * Adding viper libs for config file changes * Removing the old fsnotify lib * updating some missing libs --- .../golang.org/x/text/internal/triegen/triegen.go | 494 +++++++++++++++++++++ 1 file changed, 494 insertions(+) create mode 100644 vendor/golang.org/x/text/internal/triegen/triegen.go (limited to 'vendor/golang.org/x/text/internal/triegen/triegen.go') diff --git a/vendor/golang.org/x/text/internal/triegen/triegen.go b/vendor/golang.org/x/text/internal/triegen/triegen.go new file mode 100644 index 000000000..adb010812 --- /dev/null +++ b/vendor/golang.org/x/text/internal/triegen/triegen.go @@ -0,0 +1,494 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package triegen implements a code generator for a trie for associating +// unsigned integer values with UTF-8 encoded runes. +// +// Many of the go.text packages use tries for storing per-rune information. A +// trie is especially useful if many of the runes have the same value. If this +// is the case, many blocks can be expected to be shared allowing for +// information on many runes to be stored in little space. +// +// As most of the lookups are done directly on []byte slices, the tries use the +// UTF-8 bytes directly for the lookup. This saves a conversion from UTF-8 to +// runes and contributes a little bit to better performance. It also naturally +// provides a fast path for ASCII. +// +// Space is also an issue. There are many code points defined in Unicode and as +// a result tables can get quite large. So every byte counts. The triegen +// package automatically chooses the smallest integer values to represent the +// tables. Compacters allow further compression of the trie by allowing for +// alternative representations of individual trie blocks. +// +// triegen allows generating multiple tries as a single structure. This is +// useful when, for example, one wants to generate tries for several languages +// that have a lot of values in common. Some existing libraries for +// internationalization store all per-language data as a dynamically loadable +// chunk. The go.text packages are designed with the assumption that the user +// typically wants to compile in support for all supported languages, in line +// with the approach common to Go to create a single standalone binary. The +// multi-root trie approach can give significant storage savings in this +// scenario. +// +// triegen generates both tables and code. The code is optimized to use the +// automatically chosen data types. The following code is generated for a Trie +// or multiple Tries named "foo": +// - type fooTrie +// The trie type. +// +// - func newFooTrie(x int) *fooTrie +// Trie constructor, where x is the index of the trie passed to Gen. +// +// - func (t *fooTrie) lookup(s []byte) (v uintX, sz int) +// The lookup method, where uintX is automatically chosen. +// +// - func lookupString, lookupUnsafe and lookupStringUnsafe +// Variants of the above. +// +// - var fooValues and fooIndex and any tables generated by Compacters. +// The core trie data. +// +// - var fooTrieHandles +// Indexes of starter blocks in case of multiple trie roots. +// +// It is recommended that users test the generated trie by checking the returned +// value for every rune. Such exhaustive tests are possible as the the number of +// runes in Unicode is limited. +package triegen // import "golang.org/x/text/internal/triegen" + +// TODO: Arguably, the internally optimized data types would not have to be +// exposed in the generated API. We could also investigate not generating the +// code, but using it through a package. We would have to investigate the impact +// on performance of making such change, though. For packages like unicode/norm, +// small changes like this could tank performance. + +import ( + "encoding/binary" + "fmt" + "hash/crc64" + "io" + "log" + "unicode/utf8" +) + +// builder builds a set of tries for associating values with runes. The set of +// tries can share common index and value blocks. +type builder struct { + Name string + + // ValueType is the type of the trie values looked up. + ValueType string + + // ValueSize is the byte size of the ValueType. + ValueSize int + + // IndexType is the type of trie index values used for all UTF-8 bytes of + // a rune except the last one. + IndexType string + + // IndexSize is the byte size of the IndexType. + IndexSize int + + // SourceType is used when generating the lookup functions. If the user + // requests StringSupport, all lookup functions will be generated for + // string input as well. + SourceType string + + Trie []*Trie + + IndexBlocks []*node + ValueBlocks [][]uint64 + Compactions []compaction + Checksum uint64 + + ASCIIBlock string + StarterBlock string + + indexBlockIdx map[uint64]int + valueBlockIdx map[uint64]nodeIndex + asciiBlockIdx map[uint64]int + + // Stats are used to fill out the template. + Stats struct { + NValueEntries int + NValueBytes int + NIndexEntries int + NIndexBytes int + NHandleBytes int + } + + err error +} + +// A nodeIndex encodes the index of a node, which is defined by the compaction +// which stores it and an index within the compaction. For internal nodes, the +// compaction is always 0. +type nodeIndex struct { + compaction int + index int +} + +// compaction keeps track of stats used for the compaction. +type compaction struct { + c Compacter + blocks []*node + maxHandle uint32 + totalSize int + + // Used by template-based generator and thus exported. + Cutoff uint32 + Offset uint32 + Handler string +} + +func (b *builder) setError(err error) { + if b.err == nil { + b.err = err + } +} + +// An Option can be passed to Gen. +type Option func(b *builder) error + +// Compact configures the trie generator to use the given Compacter. +func Compact(c Compacter) Option { + return func(b *builder) error { + b.Compactions = append(b.Compactions, compaction{ + c: c, + Handler: c.Handler() + "(n, b)"}) + return nil + } +} + +// Gen writes Go code for a shared trie lookup structure to w for the given +// Tries. The generated trie type will be called nameTrie. newNameTrie(x) will +// return the *nameTrie for tries[x]. A value can be looked up by using one of +// the various lookup methods defined on nameTrie. It returns the table size of +// the generated trie. +func Gen(w io.Writer, name string, tries []*Trie, opts ...Option) (sz int, err error) { + // The index contains two dummy blocks, followed by the zero block. The zero + // block is at offset 0x80, so that the offset for the zero block for + // continuation bytes is 0. + b := &builder{ + Name: name, + Trie: tries, + IndexBlocks: []*node{{}, {}, {}}, + Compactions: []compaction{{ + Handler: name + "Values[n<<6+uint32(b)]", + }}, + // The 0 key in indexBlockIdx and valueBlockIdx is the hash of the zero + // block. + indexBlockIdx: map[uint64]int{0: 0}, + valueBlockIdx: map[uint64]nodeIndex{0: {}}, + asciiBlockIdx: map[uint64]int{}, + } + b.Compactions[0].c = (*simpleCompacter)(b) + + for _, f := range opts { + if err := f(b); err != nil { + return 0, err + } + } + b.build() + if b.err != nil { + return 0, b.err + } + if err = b.print(w); err != nil { + return 0, err + } + return b.Size(), nil +} + +// A Trie represents a single root node of a trie. A builder may build several +// overlapping tries at once. +type Trie struct { + root *node + + hiddenTrie +} + +// hiddenTrie contains values we want to be visible to the template generator, +// but hidden from the API documentation. +type hiddenTrie struct { + Name string + Checksum uint64 + ASCIIIndex int + StarterIndex int +} + +// NewTrie returns a new trie root. +func NewTrie(name string) *Trie { + return &Trie{ + &node{ + children: make([]*node, blockSize), + values: make([]uint64, utf8.RuneSelf), + }, + hiddenTrie{Name: name}, + } +} + +// Gen is a convenience wrapper around the Gen func passing t as the only trie +// and uses the name passed to NewTrie. It returns the size of the generated +// tables. +func (t *Trie) Gen(w io.Writer, opts ...Option) (sz int, err error) { + return Gen(w, t.Name, []*Trie{t}, opts...) +} + +// node is a node of the intermediate trie structure. +type node struct { + // children holds this node's children. It is always of length 64. + // A child node may be nil. + children []*node + + // values contains the values of this node. If it is non-nil, this node is + // either a root or leaf node: + // For root nodes, len(values) == 128 and it maps the bytes in [0x00, 0x7F]. + // For leaf nodes, len(values) == 64 and it maps the bytes in [0x80, 0xBF]. + values []uint64 + + index nodeIndex +} + +// Insert associates value with the given rune. Insert will panic if a non-zero +// value is passed for an invalid rune. +func (t *Trie) Insert(r rune, value uint64) { + if value == 0 { + return + } + s := string(r) + if []rune(s)[0] != r && value != 0 { + // Note: The UCD tables will always assign what amounts to a zero value + // to a surrogate. Allowing a zero value for an illegal rune allows + // users to iterate over [0..MaxRune] without having to explicitly + // exclude surrogates, which would be tedious. + panic(fmt.Sprintf("triegen: non-zero value for invalid rune %U", r)) + } + if len(s) == 1 { + // It is a root node value (ASCII). + t.root.values[s[0]] = value + return + } + + n := t.root + for ; len(s) > 1; s = s[1:] { + if n.children == nil { + n.children = make([]*node, blockSize) + } + p := s[0] % blockSize + c := n.children[p] + if c == nil { + c = &node{} + n.children[p] = c + } + if len(s) > 2 && c.values != nil { + log.Fatalf("triegen: insert(%U): found internal node with values", r) + } + n = c + } + if n.values == nil { + n.values = make([]uint64, blockSize) + } + if n.children != nil { + log.Fatalf("triegen: insert(%U): found leaf node that also has child nodes", r) + } + n.values[s[0]-0x80] = value +} + +// Size returns the number of bytes the generated trie will take to store. It +// needs to be exported as it is used in the templates. +func (b *builder) Size() int { + // Index blocks. + sz := len(b.IndexBlocks) * blockSize * b.IndexSize + + // Skip the first compaction, which represents the normal value blocks, as + // its totalSize does not account for the ASCII blocks, which are managed + // separately. + sz += len(b.ValueBlocks) * blockSize * b.ValueSize + for _, c := range b.Compactions[1:] { + sz += c.totalSize + } + + // TODO: this computation does not account for the fixed overhead of a using + // a compaction, either code or data. As for data, though, the typical + // overhead of data is in the order of bytes (2 bytes for cases). Further, + // the savings of using a compaction should anyway be substantial for it to + // be worth it. + + // For multi-root tries, we also need to account for the handles. + if len(b.Trie) > 1 { + sz += 2 * b.IndexSize * len(b.Trie) + } + return sz +} + +func (b *builder) build() { + // Compute the sizes of the values. + var vmax uint64 + for _, t := range b.Trie { + vmax = maxValue(t.root, vmax) + } + b.ValueType, b.ValueSize = getIntType(vmax) + + // Compute all block allocations. + // TODO: first compute the ASCII blocks for all tries and then the other + // nodes. ASCII blocks are more restricted in placement, as they require two + // blocks to be placed consecutively. Processing them first may improve + // sharing (at least one zero block can be expected to be saved.) + for _, t := range b.Trie { + b.Checksum += b.buildTrie(t) + } + + // Compute the offsets for all the Compacters. + offset := uint32(0) + for i := range b.Compactions { + c := &b.Compactions[i] + c.Offset = offset + offset += c.maxHandle + 1 + c.Cutoff = offset + } + + // Compute the sizes of indexes. + // TODO: different byte positions could have different sizes. So far we have + // not found a case where this is beneficial. + imax := uint64(b.Compactions[len(b.Compactions)-1].Cutoff) + for _, ib := range b.IndexBlocks { + if x := uint64(ib.index.index); x > imax { + imax = x + } + } + b.IndexType, b.IndexSize = getIntType(imax) +} + +func maxValue(n *node, max uint64) uint64 { + if n == nil { + return max + } + for _, c := range n.children { + max = maxValue(c, max) + } + for _, v := range n.values { + if max < v { + max = v + } + } + return max +} + +func getIntType(v uint64) (string, int) { + switch { + case v < 1<<8: + return "uint8", 1 + case v < 1<<16: + return "uint16", 2 + case v < 1<<32: + return "uint32", 4 + } + return "uint64", 8 +} + +const ( + blockSize = 64 + + // Subtract two blocks to offset 0x80, the first continuation byte. + blockOffset = 2 + + // Subtract three blocks to offset 0xC0, the first non-ASCII starter. + rootBlockOffset = 3 +) + +var crcTable = crc64.MakeTable(crc64.ISO) + +func (b *builder) buildTrie(t *Trie) uint64 { + n := t.root + + // Get the ASCII offset. For the first trie, the ASCII block will be at + // position 0. + hasher := crc64.New(crcTable) + binary.Write(hasher, binary.BigEndian, n.values) + hash := hasher.Sum64() + + v, ok := b.asciiBlockIdx[hash] + if !ok { + v = len(b.ValueBlocks) + b.asciiBlockIdx[hash] = v + + b.ValueBlocks = append(b.ValueBlocks, n.values[:blockSize], n.values[blockSize:]) + if v == 0 { + // Add the zero block at position 2 so that it will be assigned a + // zero reference in the lookup blocks. + // TODO: always do this? This would allow us to remove a check from + // the trie lookup, but at the expense of extra space. Analyze + // performance for unicode/norm. + b.ValueBlocks = append(b.ValueBlocks, make([]uint64, blockSize)) + } + } + t.ASCIIIndex = v + + // Compute remaining offsets. + t.Checksum = b.computeOffsets(n, true) + // We already subtracted the normal blockOffset from the index. Subtract the + // difference for starter bytes. + t.StarterIndex = n.index.index - (rootBlockOffset - blockOffset) + return t.Checksum +} + +func (b *builder) computeOffsets(n *node, root bool) uint64 { + // For the first trie, the root lookup block will be at position 3, which is + // the offset for UTF-8 non-ASCII starter bytes. + first := len(b.IndexBlocks) == rootBlockOffset + if first { + b.IndexBlocks = append(b.IndexBlocks, n) + } + + // We special-case the cases where all values recursively are 0. This allows + // for the use of a zero block to which all such values can be directed. + hash := uint64(0) + if n.children != nil || n.values != nil { + hasher := crc64.New(crcTable) + for _, c := range n.children { + var v uint64 + if c != nil { + v = b.computeOffsets(c, false) + } + binary.Write(hasher, binary.BigEndian, v) + } + binary.Write(hasher, binary.BigEndian, n.values) + hash = hasher.Sum64() + } + + if first { + b.indexBlockIdx[hash] = rootBlockOffset - blockOffset + } + + // Compacters don't apply to internal nodes. + if n.children != nil { + v, ok := b.indexBlockIdx[hash] + if !ok { + v = len(b.IndexBlocks) - blockOffset + b.IndexBlocks = append(b.IndexBlocks, n) + b.indexBlockIdx[hash] = v + } + n.index = nodeIndex{0, v} + } else { + h, ok := b.valueBlockIdx[hash] + if !ok { + bestI, bestSize := 0, blockSize*b.ValueSize + for i, c := range b.Compactions[1:] { + if sz, ok := c.c.Size(n.values); ok && bestSize > sz { + bestI, bestSize = i+1, sz + } + } + c := &b.Compactions[bestI] + c.totalSize += bestSize + v := c.c.Store(n.values) + if c.maxHandle < v { + c.maxHandle = v + } + h = nodeIndex{bestI, int(v)} + b.valueBlockIdx[hash] = h + } + n.index = h + } + return hash +} -- cgit v1.2.3-1-g7c22