From 54d3d47daf9190275bbdaf8703b84969a4593451 Mon Sep 17 00:00:00 2001 From: Corey Hulen Date: Fri, 24 Mar 2017 23:31:34 -0700 Subject: PLT-6076 Adding viper libs for config file changes (#5871) * Adding viper libs for config file changes * Removing the old fsnotify lib * updating some missing libs --- .../golang.org/x/text/internal/triegen/compact.go | 58 ++ .../x/text/internal/triegen/data_test.go | 875 +++++++++++++++++++++ .../text/internal/triegen/example_compact_test.go | 71 ++ .../x/text/internal/triegen/example_test.go | 148 ++++ .../golang.org/x/text/internal/triegen/gen_test.go | 68 ++ vendor/golang.org/x/text/internal/triegen/print.go | 251 ++++++ .../golang.org/x/text/internal/triegen/triegen.go | 494 ++++++++++++ 7 files changed, 1965 insertions(+) create mode 100644 vendor/golang.org/x/text/internal/triegen/compact.go create mode 100644 vendor/golang.org/x/text/internal/triegen/data_test.go create mode 100644 vendor/golang.org/x/text/internal/triegen/example_compact_test.go create mode 100644 vendor/golang.org/x/text/internal/triegen/example_test.go create mode 100644 vendor/golang.org/x/text/internal/triegen/gen_test.go create mode 100644 vendor/golang.org/x/text/internal/triegen/print.go create mode 100644 vendor/golang.org/x/text/internal/triegen/triegen.go (limited to 'vendor/golang.org/x/text/internal/triegen') diff --git a/vendor/golang.org/x/text/internal/triegen/compact.go b/vendor/golang.org/x/text/internal/triegen/compact.go new file mode 100644 index 000000000..397b975c1 --- /dev/null +++ b/vendor/golang.org/x/text/internal/triegen/compact.go @@ -0,0 +1,58 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package triegen + +// This file defines Compacter and its implementations. + +import "io" + +// A Compacter generates an alternative, more space-efficient way to store a +// trie value block. A trie value block holds all possible values for the last +// byte of a UTF-8 encoded rune. Excluding ASCII characters, a trie value block +// always has 64 values, as a UTF-8 encoding ends with a byte in [0x80, 0xC0). +type Compacter interface { + // Size returns whether the Compacter could encode the given block as well + // as its size in case it can. len(v) is always 64. + Size(v []uint64) (sz int, ok bool) + + // Store stores the block using the Compacter's compression method. + // It returns a handle with which the block can be retrieved. + // len(v) is always 64. + Store(v []uint64) uint32 + + // Print writes the data structures associated to the given store to w. + Print(w io.Writer) error + + // Handler returns the name of a function that gets called during trie + // lookup for blocks generated by the Compacter. The function should be of + // the form func (n uint32, b byte) uint64, where n is the index returned by + // the Compacter's Store method and b is the last byte of the UTF-8 + // encoding, where 0x80 <= b < 0xC0, for which to do the lookup in the + // block. + Handler() string +} + +// simpleCompacter is the default Compacter used by builder. It implements a +// normal trie block. +type simpleCompacter builder + +func (b *simpleCompacter) Size([]uint64) (sz int, ok bool) { + return blockSize * b.ValueSize, true +} + +func (b *simpleCompacter) Store(v []uint64) uint32 { + h := uint32(len(b.ValueBlocks) - blockOffset) + b.ValueBlocks = append(b.ValueBlocks, v) + return h +} + +func (b *simpleCompacter) Print(io.Writer) error { + // Structures are printed in print.go. + return nil +} + +func (b *simpleCompacter) Handler() string { + panic("Handler should be special-cased for this Compacter") +} diff --git a/vendor/golang.org/x/text/internal/triegen/data_test.go b/vendor/golang.org/x/text/internal/triegen/data_test.go new file mode 100644 index 000000000..91de547a5 --- /dev/null +++ b/vendor/golang.org/x/text/internal/triegen/data_test.go @@ -0,0 +1,875 @@ +// This file is generated with "go test -tags generate". DO NOT EDIT! +// +build !generate + +package triegen_test + +// lookup returns the trie value for the first UTF-8 encoding in s and +// the width in bytes of this encoding. The size will be 0 if s does not +// hold enough bytes to complete the encoding. len(s) must be greater than 0. +func (t *randTrie) lookup(s []byte) (v uint8, sz int) { + c0 := s[0] + switch { + case c0 < 0x80: // is ASCII + return randValues[c0], 1 + case c0 < 0xC2: + return 0, 1 // Illegal UTF-8: not a starter, not ASCII. + case c0 < 0xE0: // 2-byte UTF-8 + if len(s) < 2 { + return 0, 0 + } + i := randIndex[c0] + c1 := s[1] + if c1 < 0x80 || 0xC0 <= c1 { + return 0, 1 // Illegal UTF-8: not a continuation byte. + } + return t.lookupValue(uint32(i), c1), 2 + case c0 < 0xF0: // 3-byte UTF-8 + if len(s) < 3 { + return 0, 0 + } + i := randIndex[c0] + c1 := s[1] + if c1 < 0x80 || 0xC0 <= c1 { + return 0, 1 // Illegal UTF-8: not a continuation byte. + } + o := uint32(i)<<6 + uint32(c1) + i = randIndex[o] + c2 := s[2] + if c2 < 0x80 || 0xC0 <= c2 { + return 0, 2 // Illegal UTF-8: not a continuation byte. + } + return t.lookupValue(uint32(i), c2), 3 + case c0 < 0xF8: // 4-byte UTF-8 + if len(s) < 4 { + return 0, 0 + } + i := randIndex[c0] + c1 := s[1] + if c1 < 0x80 || 0xC0 <= c1 { + return 0, 1 // Illegal UTF-8: not a continuation byte. + } + o := uint32(i)<<6 + uint32(c1) + i = randIndex[o] + c2 := s[2] + if c2 < 0x80 || 0xC0 <= c2 { + return 0, 2 // Illegal UTF-8: not a continuation byte. + } + o = uint32(i)<<6 + uint32(c2) + i = randIndex[o] + c3 := s[3] + if c3 < 0x80 || 0xC0 <= c3 { + return 0, 3 // Illegal UTF-8: not a continuation byte. + } + return t.lookupValue(uint32(i), c3), 4 + } + // Illegal rune + return 0, 1 +} + +// lookupUnsafe returns the trie value for the first UTF-8 encoding in s. +// s must start with a full and valid UTF-8 encoded rune. +func (t *randTrie) lookupUnsafe(s []byte) uint8 { + c0 := s[0] + if c0 < 0x80 { // is ASCII + return randValues[c0] + } + i := randIndex[c0] + if c0 < 0xE0 { // 2-byte UTF-8 + return t.lookupValue(uint32(i), s[1]) + } + i = randIndex[uint32(i)<<6+uint32(s[1])] + if c0 < 0xF0 { // 3-byte UTF-8 + return t.lookupValue(uint32(i), s[2]) + } + i = randIndex[uint32(i)<<6+uint32(s[2])] + if c0 < 0xF8 { // 4-byte UTF-8 + return t.lookupValue(uint32(i), s[3]) + } + return 0 +} + +// lookupString returns the trie value for the first UTF-8 encoding in s and +// the width in bytes of this encoding. The size will be 0 if s does not +// hold enough bytes to complete the encoding. len(s) must be greater than 0. +func (t *randTrie) lookupString(s string) (v uint8, sz int) { + c0 := s[0] + switch { + case c0 < 0x80: // is ASCII + return randValues[c0], 1 + case c0 < 0xC2: + return 0, 1 // Illegal UTF-8: not a starter, not ASCII. + case c0 < 0xE0: // 2-byte UTF-8 + if len(s) < 2 { + return 0, 0 + } + i := randIndex[c0] + c1 := s[1] + if c1 < 0x80 || 0xC0 <= c1 { + return 0, 1 // Illegal UTF-8: not a continuation byte. + } + return t.lookupValue(uint32(i), c1), 2 + case c0 < 0xF0: // 3-byte UTF-8 + if len(s) < 3 { + return 0, 0 + } + i := randIndex[c0] + c1 := s[1] + if c1 < 0x80 || 0xC0 <= c1 { + return 0, 1 // Illegal UTF-8: not a continuation byte. + } + o := uint32(i)<<6 + uint32(c1) + i = randIndex[o] + c2 := s[2] + if c2 < 0x80 || 0xC0 <= c2 { + return 0, 2 // Illegal UTF-8: not a continuation byte. + } + return t.lookupValue(uint32(i), c2), 3 + case c0 < 0xF8: // 4-byte UTF-8 + if len(s) < 4 { + return 0, 0 + } + i := randIndex[c0] + c1 := s[1] + if c1 < 0x80 || 0xC0 <= c1 { + return 0, 1 // Illegal UTF-8: not a continuation byte. + } + o := uint32(i)<<6 + uint32(c1) + i = randIndex[o] + c2 := s[2] + if c2 < 0x80 || 0xC0 <= c2 { + return 0, 2 // Illegal UTF-8: not a continuation byte. + } + o = uint32(i)<<6 + uint32(c2) + i = randIndex[o] + c3 := s[3] + if c3 < 0x80 || 0xC0 <= c3 { + return 0, 3 // Illegal UTF-8: not a continuation byte. + } + return t.lookupValue(uint32(i), c3), 4 + } + // Illegal rune + return 0, 1 +} + +// lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s. +// s must start with a full and valid UTF-8 encoded rune. +func (t *randTrie) lookupStringUnsafe(s string) uint8 { + c0 := s[0] + if c0 < 0x80 { // is ASCII + return randValues[c0] + } + i := randIndex[c0] + if c0 < 0xE0 { // 2-byte UTF-8 + return t.lookupValue(uint32(i), s[1]) + } + i = randIndex[uint32(i)<<6+uint32(s[1])] + if c0 < 0xF0 { // 3-byte UTF-8 + return t.lookupValue(uint32(i), s[2]) + } + i = randIndex[uint32(i)<<6+uint32(s[2])] + if c0 < 0xF8 { // 4-byte UTF-8 + return t.lookupValue(uint32(i), s[3]) + } + return 0 +} + +// randTrie. Total size: 9280 bytes (9.06 KiB). Checksum: 6debd324a8debb8f. +type randTrie struct{} + +func newRandTrie(i int) *randTrie { + return &randTrie{} +} + +// lookupValue determines the type of block n and looks up the value for b. +func (t *randTrie) lookupValue(n uint32, b byte) uint8 { + switch { + default: + return uint8(randValues[n<<6+uint32(b)]) + } +} + +// randValues: 56 blocks, 3584 entries, 3584 bytes +// The third block is the zero block. +var randValues = [3584]uint8{ + // Block 0x0, offset 0x0 + // Block 0x1, offset 0x40 + // Block 0x2, offset 0x80 + // Block 0x3, offset 0xc0 + 0xc9: 0x0001, + // Block 0x4, offset 0x100 + 0x100: 0x0001, + // Block 0x5, offset 0x140 + 0x155: 0x0001, + // Block 0x6, offset 0x180 + 0x196: 0x0001, + // Block 0x7, offset 0x1c0 + 0x1ef: 0x0001, + // Block 0x8, offset 0x200 + 0x206: 0x0001, + // Block 0x9, offset 0x240 + 0x258: 0x0001, + // Block 0xa, offset 0x280 + 0x288: 0x0001, + // Block 0xb, offset 0x2c0 + 0x2f2: 0x0001, + // Block 0xc, offset 0x300 + 0x304: 0x0001, + // Block 0xd, offset 0x340 + 0x34b: 0x0001, + // Block 0xe, offset 0x380 + 0x3ba: 0x0001, + // Block 0xf, offset 0x3c0 + 0x3f5: 0x0001, + // Block 0x10, offset 0x400 + 0x41d: 0x0001, + // Block 0x11, offset 0x440 + 0x442: 0x0001, + // Block 0x12, offset 0x480 + 0x4bb: 0x0001, + // Block 0x13, offset 0x4c0 + 0x4e9: 0x0001, + // Block 0x14, offset 0x500 + 0x53e: 0x0001, + // Block 0x15, offset 0x540 + 0x55f: 0x0001, + // Block 0x16, offset 0x580 + 0x5b7: 0x0001, + // Block 0x17, offset 0x5c0 + 0x5d9: 0x0001, + // Block 0x18, offset 0x600 + 0x60e: 0x0001, + // Block 0x19, offset 0x640 + 0x652: 0x0001, + // Block 0x1a, offset 0x680 + 0x68f: 0x0001, + // Block 0x1b, offset 0x6c0 + 0x6dc: 0x0001, + // Block 0x1c, offset 0x700 + 0x703: 0x0001, + // Block 0x1d, offset 0x740 + 0x741: 0x0001, + // Block 0x1e, offset 0x780 + 0x79b: 0x0001, + // Block 0x1f, offset 0x7c0 + 0x7f1: 0x0001, + // Block 0x20, offset 0x800 + 0x833: 0x0001, + // Block 0x21, offset 0x840 + 0x853: 0x0001, + // Block 0x22, offset 0x880 + 0x8a2: 0x0001, + // Block 0x23, offset 0x8c0 + 0x8f8: 0x0001, + // Block 0x24, offset 0x900 + 0x917: 0x0001, + // Block 0x25, offset 0x940 + 0x945: 0x0001, + // Block 0x26, offset 0x980 + 0x99e: 0x0001, + // Block 0x27, offset 0x9c0 + 0x9fd: 0x0001, + // Block 0x28, offset 0xa00 + 0xa0d: 0x0001, + // Block 0x29, offset 0xa40 + 0xa66: 0x0001, + // Block 0x2a, offset 0xa80 + 0xaab: 0x0001, + // Block 0x2b, offset 0xac0 + 0xaea: 0x0001, + // Block 0x2c, offset 0xb00 + 0xb2d: 0x0001, + // Block 0x2d, offset 0xb40 + 0xb54: 0x0001, + // Block 0x2e, offset 0xb80 + 0xb90: 0x0001, + // Block 0x2f, offset 0xbc0 + 0xbe5: 0x0001, + // Block 0x30, offset 0xc00 + 0xc28: 0x0001, + // Block 0x31, offset 0xc40 + 0xc7c: 0x0001, + // Block 0x32, offset 0xc80 + 0xcbf: 0x0001, + // Block 0x33, offset 0xcc0 + 0xcc7: 0x0001, + // Block 0x34, offset 0xd00 + 0xd34: 0x0001, + // Block 0x35, offset 0xd40 + 0xd61: 0x0001, + // Block 0x36, offset 0xd80 + 0xdb9: 0x0001, + // Block 0x37, offset 0xdc0 + 0xdda: 0x0001, +} + +// randIndex: 89 blocks, 5696 entries, 5696 bytes +// Block 0 is the zero block. +var randIndex = [5696]uint8{ + // Block 0x0, offset 0x0 + // Block 0x1, offset 0x40 + // Block 0x2, offset 0x80 + // Block 0x3, offset 0xc0 + 0xe1: 0x02, 0xe3: 0x03, 0xe4: 0x04, + 0xea: 0x05, 0xeb: 0x06, 0xec: 0x07, + 0xf0: 0x10, 0xf1: 0x24, 0xf2: 0x3d, 0xf3: 0x4f, 0xf4: 0x56, + // Block 0x4, offset 0x100 + 0x107: 0x01, + // Block 0x5, offset 0x140 + 0x16c: 0x02, + // Block 0x6, offset 0x180 + 0x19c: 0x03, + 0x1ae: 0x04, + // Block 0x7, offset 0x1c0 + 0x1d8: 0x05, + 0x1f7: 0x06, + // Block 0x8, offset 0x200 + 0x20c: 0x07, + // Block 0x9, offset 0x240 + 0x24a: 0x08, + // Block 0xa, offset 0x280 + 0x2b6: 0x09, + // Block 0xb, offset 0x2c0 + 0x2d5: 0x0a, + // Block 0xc, offset 0x300 + 0x31a: 0x0b, + // Block 0xd, offset 0x340 + 0x373: 0x0c, + // Block 0xe, offset 0x380 + 0x38b: 0x0d, + // Block 0xf, offset 0x3c0 + 0x3f0: 0x0e, + // Block 0x10, offset 0x400 + 0x433: 0x0f, + // Block 0x11, offset 0x440 + 0x45d: 0x10, + // Block 0x12, offset 0x480 + 0x491: 0x08, 0x494: 0x09, 0x497: 0x0a, + 0x49b: 0x0b, 0x49c: 0x0c, + 0x4a1: 0x0d, + 0x4ad: 0x0e, + 0x4ba: 0x0f, + // Block 0x13, offset 0x4c0 + 0x4c1: 0x11, + // Block 0x14, offset 0x500 + 0x531: 0x12, + // Block 0x15, offset 0x540 + 0x546: 0x13, + // Block 0x16, offset 0x580 + 0x5ab: 0x14, + // Block 0x17, offset 0x5c0 + 0x5d4: 0x11, + 0x5fe: 0x11, + // Block 0x18, offset 0x600 + 0x618: 0x0a, + // Block 0x19, offset 0x640 + 0x65b: 0x15, + // Block 0x1a, offset 0x680 + 0x6a0: 0x16, + // Block 0x1b, offset 0x6c0 + 0x6d2: 0x17, + 0x6f6: 0x18, + // Block 0x1c, offset 0x700 + 0x711: 0x19, + // Block 0x1d, offset 0x740 + 0x768: 0x1a, + // Block 0x1e, offset 0x780 + 0x783: 0x1b, + // Block 0x1f, offset 0x7c0 + 0x7f9: 0x1c, + // Block 0x20, offset 0x800 + 0x831: 0x1d, + // Block 0x21, offset 0x840 + 0x85e: 0x1e, + // Block 0x22, offset 0x880 + 0x898: 0x1f, + // Block 0x23, offset 0x8c0 + 0x8c7: 0x18, + 0x8d5: 0x14, + 0x8f7: 0x20, + 0x8fe: 0x1f, + // Block 0x24, offset 0x900 + 0x905: 0x21, + // Block 0x25, offset 0x940 + 0x966: 0x03, + // Block 0x26, offset 0x980 + 0x981: 0x07, 0x983: 0x11, + 0x989: 0x12, 0x98a: 0x13, 0x98e: 0x14, 0x98f: 0x15, + 0x992: 0x16, 0x995: 0x17, 0x996: 0x18, + 0x998: 0x19, 0x999: 0x1a, 0x99b: 0x1b, 0x99f: 0x1c, + 0x9a3: 0x1d, + 0x9ad: 0x1e, 0x9af: 0x1f, + 0x9b0: 0x20, 0x9b1: 0x21, + 0x9b8: 0x22, 0x9bd: 0x23, + // Block 0x27, offset 0x9c0 + 0x9cd: 0x22, + // Block 0x28, offset 0xa00 + 0xa0c: 0x08, + // Block 0x29, offset 0xa40 + 0xa6f: 0x1c, + // Block 0x2a, offset 0xa80 + 0xa90: 0x1a, + 0xaaf: 0x23, + // Block 0x2b, offset 0xac0 + 0xae3: 0x19, + 0xae8: 0x24, + 0xafc: 0x25, + // Block 0x2c, offset 0xb00 + 0xb13: 0x26, + // Block 0x2d, offset 0xb40 + 0xb67: 0x1c, + // Block 0x2e, offset 0xb80 + 0xb8f: 0x0b, + // Block 0x2f, offset 0xbc0 + 0xbcb: 0x27, + 0xbe7: 0x26, + // Block 0x30, offset 0xc00 + 0xc34: 0x16, + // Block 0x31, offset 0xc40 + 0xc62: 0x03, + // Block 0x32, offset 0xc80 + 0xcbb: 0x12, + // Block 0x33, offset 0xcc0 + 0xcdf: 0x09, + // Block 0x34, offset 0xd00 + 0xd34: 0x0a, + // Block 0x35, offset 0xd40 + 0xd41: 0x1e, + // Block 0x36, offset 0xd80 + 0xd83: 0x28, + // Block 0x37, offset 0xdc0 + 0xdc0: 0x15, + // Block 0x38, offset 0xe00 + 0xe1a: 0x15, + // Block 0x39, offset 0xe40 + 0xe65: 0x29, + // Block 0x3a, offset 0xe80 + 0xe86: 0x1f, + // Block 0x3b, offset 0xec0 + 0xeec: 0x18, + // Block 0x3c, offset 0xf00 + 0xf28: 0x2a, + // Block 0x3d, offset 0xf40 + 0xf53: 0x08, + // Block 0x3e, offset 0xf80 + 0xfa2: 0x2b, + 0xfaa: 0x17, + // Block 0x3f, offset 0xfc0 + 0xfc0: 0x25, 0xfc2: 0x26, + 0xfc9: 0x27, 0xfcd: 0x28, 0xfce: 0x29, + 0xfd5: 0x2a, + 0xfd8: 0x2b, 0xfd9: 0x2c, 0xfdf: 0x2d, + 0xfe1: 0x2e, 0xfe2: 0x2f, 0xfe3: 0x30, 0xfe6: 0x31, + 0xfe9: 0x32, 0xfec: 0x33, 0xfed: 0x34, 0xfef: 0x35, + 0xff1: 0x36, 0xff2: 0x37, 0xff3: 0x38, 0xff4: 0x39, + 0xffa: 0x3a, 0xffc: 0x3b, 0xffe: 0x3c, + // Block 0x40, offset 0x1000 + 0x102c: 0x2c, + // Block 0x41, offset 0x1040 + 0x1074: 0x2c, + // Block 0x42, offset 0x1080 + 0x108c: 0x08, + 0x10a0: 0x2d, + // Block 0x43, offset 0x10c0 + 0x10e8: 0x10, + // Block 0x44, offset 0x1100 + 0x110f: 0x13, + // Block 0x45, offset 0x1140 + 0x114b: 0x2e, + // Block 0x46, offset 0x1180 + 0x118b: 0x23, + 0x119d: 0x0c, + // Block 0x47, offset 0x11c0 + 0x11c3: 0x12, + 0x11f9: 0x0f, + // Block 0x48, offset 0x1200 + 0x121e: 0x1b, + // Block 0x49, offset 0x1240 + 0x1270: 0x2f, + // Block 0x4a, offset 0x1280 + 0x128a: 0x1b, + 0x12a7: 0x02, + // Block 0x4b, offset 0x12c0 + 0x12fb: 0x14, + // Block 0x4c, offset 0x1300 + 0x1333: 0x30, + // Block 0x4d, offset 0x1340 + 0x134d: 0x31, + // Block 0x4e, offset 0x1380 + 0x138e: 0x15, + // Block 0x4f, offset 0x13c0 + 0x13f4: 0x32, + // Block 0x50, offset 0x1400 + 0x141b: 0x33, + // Block 0x51, offset 0x1440 + 0x1448: 0x3e, 0x1449: 0x3f, 0x144a: 0x40, 0x144f: 0x41, + 0x1459: 0x42, 0x145c: 0x43, 0x145e: 0x44, 0x145f: 0x45, + 0x1468: 0x46, 0x1469: 0x47, 0x146c: 0x48, 0x146d: 0x49, 0x146e: 0x4a, + 0x1472: 0x4b, 0x1473: 0x4c, + 0x1479: 0x4d, 0x147b: 0x4e, + // Block 0x52, offset 0x1480 + 0x1480: 0x34, + 0x1499: 0x11, + 0x14b6: 0x2c, + // Block 0x53, offset 0x14c0 + 0x14e4: 0x0d, + // Block 0x54, offset 0x1500 + 0x1527: 0x08, + // Block 0x55, offset 0x1540 + 0x1555: 0x2b, + // Block 0x56, offset 0x1580 + 0x15b2: 0x35, + // Block 0x57, offset 0x15c0 + 0x15f2: 0x1c, 0x15f4: 0x29, + // Block 0x58, offset 0x1600 + 0x1600: 0x50, 0x1603: 0x51, + 0x1608: 0x52, 0x160a: 0x53, 0x160d: 0x54, 0x160e: 0x55, +} + +// lookup returns the trie value for the first UTF-8 encoding in s and +// the width in bytes of this encoding. The size will be 0 if s does not +// hold enough bytes to complete the encoding. len(s) must be greater than 0. +func (t *multiTrie) lookup(s []byte) (v uint64, sz int) { + c0 := s[0] + switch { + case c0 < 0x80: // is ASCII + return t.ascii[c0], 1 + case c0 < 0xC2: + return 0, 1 // Illegal UTF-8: not a starter, not ASCII. + case c0 < 0xE0: // 2-byte UTF-8 + if len(s) < 2 { + return 0, 0 + } + i := t.utf8Start[c0] + c1 := s[1] + if c1 < 0x80 || 0xC0 <= c1 { + return 0, 1 // Illegal UTF-8: not a continuation byte. + } + return t.lookupValue(uint32(i), c1), 2 + case c0 < 0xF0: // 3-byte UTF-8 + if len(s) < 3 { + return 0, 0 + } + i := t.utf8Start[c0] + c1 := s[1] + if c1 < 0x80 || 0xC0 <= c1 { + return 0, 1 // Illegal UTF-8: not a continuation byte. + } + o := uint32(i)<<6 + uint32(c1) + i = multiIndex[o] + c2 := s[2] + if c2 < 0x80 || 0xC0 <= c2 { + return 0, 2 // Illegal UTF-8: not a continuation byte. + } + return t.lookupValue(uint32(i), c2), 3 + case c0 < 0xF8: // 4-byte UTF-8 + if len(s) < 4 { + return 0, 0 + } + i := t.utf8Start[c0] + c1 := s[1] + if c1 < 0x80 || 0xC0 <= c1 { + return 0, 1 // Illegal UTF-8: not a continuation byte. + } + o := uint32(i)<<6 + uint32(c1) + i = multiIndex[o] + c2 := s[2] + if c2 < 0x80 || 0xC0 <= c2 { + return 0, 2 // Illegal UTF-8: not a continuation byte. + } + o = uint32(i)<<6 + uint32(c2) + i = multiIndex[o] + c3 := s[3] + if c3 < 0x80 || 0xC0 <= c3 { + return 0, 3 // Illegal UTF-8: not a continuation byte. + } + return t.lookupValue(uint32(i), c3), 4 + } + // Illegal rune + return 0, 1 +} + +// lookupUnsafe returns the trie value for the first UTF-8 encoding in s. +// s must start with a full and valid UTF-8 encoded rune. +func (t *multiTrie) lookupUnsafe(s []byte) uint64 { + c0 := s[0] + if c0 < 0x80 { // is ASCII + return t.ascii[c0] + } + i := t.utf8Start[c0] + if c0 < 0xE0 { // 2-byte UTF-8 + return t.lookupValue(uint32(i), s[1]) + } + i = multiIndex[uint32(i)<<6+uint32(s[1])] + if c0 < 0xF0 { // 3-byte UTF-8 + return t.lookupValue(uint32(i), s[2]) + } + i = multiIndex[uint32(i)<<6+uint32(s[2])] + if c0 < 0xF8 { // 4-byte UTF-8 + return t.lookupValue(uint32(i), s[3]) + } + return 0 +} + +// lookupString returns the trie value for the first UTF-8 encoding in s and +// the width in bytes of this encoding. The size will be 0 if s does not +// hold enough bytes to complete the encoding. len(s) must be greater than 0. +func (t *multiTrie) lookupString(s string) (v uint64, sz int) { + c0 := s[0] + switch { + case c0 < 0x80: // is ASCII + return t.ascii[c0], 1 + case c0 < 0xC2: + return 0, 1 // Illegal UTF-8: not a starter, not ASCII. + case c0 < 0xE0: // 2-byte UTF-8 + if len(s) < 2 { + return 0, 0 + } + i := t.utf8Start[c0] + c1 := s[1] + if c1 < 0x80 || 0xC0 <= c1 { + return 0, 1 // Illegal UTF-8: not a continuation byte. + } + return t.lookupValue(uint32(i), c1), 2 + case c0 < 0xF0: // 3-byte UTF-8 + if len(s) < 3 { + return 0, 0 + } + i := t.utf8Start[c0] + c1 := s[1] + if c1 < 0x80 || 0xC0 <= c1 { + return 0, 1 // Illegal UTF-8: not a continuation byte. + } + o := uint32(i)<<6 + uint32(c1) + i = multiIndex[o] + c2 := s[2] + if c2 < 0x80 || 0xC0 <= c2 { + return 0, 2 // Illegal UTF-8: not a continuation byte. + } + return t.lookupValue(uint32(i), c2), 3 + case c0 < 0xF8: // 4-byte UTF-8 + if len(s) < 4 { + return 0, 0 + } + i := t.utf8Start[c0] + c1 := s[1] + if c1 < 0x80 || 0xC0 <= c1 { + return 0, 1 // Illegal UTF-8: not a continuation byte. + } + o := uint32(i)<<6 + uint32(c1) + i = multiIndex[o] + c2 := s[2] + if c2 < 0x80 || 0xC0 <= c2 { + return 0, 2 // Illegal UTF-8: not a continuation byte. + } + o = uint32(i)<<6 + uint32(c2) + i = multiIndex[o] + c3 := s[3] + if c3 < 0x80 || 0xC0 <= c3 { + return 0, 3 // Illegal UTF-8: not a continuation byte. + } + return t.lookupValue(uint32(i), c3), 4 + } + // Illegal rune + return 0, 1 +} + +// lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s. +// s must start with a full and valid UTF-8 encoded rune. +func (t *multiTrie) lookupStringUnsafe(s string) uint64 { + c0 := s[0] + if c0 < 0x80 { // is ASCII + return t.ascii[c0] + } + i := t.utf8Start[c0] + if c0 < 0xE0 { // 2-byte UTF-8 + return t.lookupValue(uint32(i), s[1]) + } + i = multiIndex[uint32(i)<<6+uint32(s[1])] + if c0 < 0xF0 { // 3-byte UTF-8 + return t.lookupValue(uint32(i), s[2]) + } + i = multiIndex[uint32(i)<<6+uint32(s[2])] + if c0 < 0xF8 { // 4-byte UTF-8 + return t.lookupValue(uint32(i), s[3]) + } + return 0 +} + +// multiTrie. Total size: 18250 bytes (17.82 KiB). Checksum: a69a609d8696aa5e. +type multiTrie struct { + ascii []uint64 // index for ASCII bytes + utf8Start []uint8 // index for UTF-8 bytes >= 0xC0 +} + +func newMultiTrie(i int) *multiTrie { + h := multiTrieHandles[i] + return &multiTrie{multiValues[uint32(h.ascii)<<6:], multiIndex[uint32(h.multi)<<6:]} +} + +type multiTrieHandle struct { + ascii, multi uint8 +} + +// multiTrieHandles: 5 handles, 10 bytes +var multiTrieHandles = [5]multiTrieHandle{ + {0, 0}, // 8c1e77823143d35c: all + {0, 23}, // 8fb58ff8243b45b0: ASCII only + {0, 23}, // 8fb58ff8243b45b0: ASCII only 2 + {0, 24}, // 2ccc43994f11046f: BMP only + {30, 25}, // ce448591bdcb4733: No BMP +} + +// lookupValue determines the type of block n and looks up the value for b. +func (t *multiTrie) lookupValue(n uint32, b byte) uint64 { + switch { + default: + return uint64(multiValues[n<<6+uint32(b)]) + } +} + +// multiValues: 32 blocks, 2048 entries, 16384 bytes +// The third block is the zero block. +var multiValues = [2048]uint64{ + // Block 0x0, offset 0x0 + 0x03: 0x6e361699800b9fb8, 0x04: 0x52d3935a34f6f0b, 0x05: 0x2948319393e7ef10, + 0x07: 0x20f03b006704f663, 0x08: 0x6c15c0732bb2495f, 0x09: 0xe54e2c59d953551, + 0x0f: 0x33d8a825807d8037, 0x10: 0x6ecd93cb12168b92, 0x11: 0x6a81c9c0ce86e884, + 0x1f: 0xa03e77aac8be79b, 0x20: 0x28591d0e7e486efa, 0x21: 0x716fa3bc398dec8, + 0x3f: 0x4fd3bcfa72bce8b0, + // Block 0x1, offset 0x40 + 0x40: 0x3cbaef3db8ba5f12, 0x41: 0x2d262347c1f56357, + 0x7f: 0x782caa2d25a418a9, + // Block 0x2, offset 0x80 + // Block 0x3, offset 0xc0 + 0xc0: 0x6bbd1f937b1ff5d2, 0xc1: 0x732e23088d2eb8a4, + // Block 0x4, offset 0x100 + 0x13f: 0x56f8c4c82f5962dc, + // Block 0x5, offset 0x140 + 0x140: 0x57dc4544729a5da2, 0x141: 0x2f62f9cd307ffa0d, + // Block 0x6, offset 0x180 + 0x1bf: 0x7bf4d0ebf302a088, + // Block 0x7, offset 0x1c0 + 0x1c0: 0x1f0d67f249e59931, 0x1c1: 0x3011def73aa550c7, + // Block 0x8, offset 0x200 + 0x23f: 0x5de81c1dff6bf29d, + // Block 0x9, offset 0x240 + 0x240: 0x752c035737b825e8, 0x241: 0x1e793399081e3bb3, + // Block 0xa, offset 0x280 + 0x2bf: 0x6a28f01979cbf059, + // Block 0xb, offset 0x2c0 + 0x2c0: 0x373a4b0f2cbd4c74, 0x2c1: 0x4fd2c288683b767c, + // Block 0xc, offset 0x300 + 0x33f: 0x5a10ffa9e29184fb, + // Block 0xd, offset 0x340 + 0x340: 0x700f9bdb53fff6a5, 0x341: 0xcde93df0427eb79, + // Block 0xe, offset 0x380 + 0x3bf: 0x74071288fff39c76, + // Block 0xf, offset 0x3c0 + 0x3c0: 0x481fc2f510e5268a, 0x3c1: 0x7565c28164204849, + // Block 0x10, offset 0x400 + 0x43f: 0x5676a62fd49c6bec, + // Block 0x11, offset 0x440 + 0x440: 0x2f2d15776cbafc6b, 0x441: 0x4c55e8dc0ff11a3f, + // Block 0x12, offset 0x480 + 0x4bf: 0x69d6f0fe711fafc9, + // Block 0x13, offset 0x4c0 + 0x4c0: 0x33181de28cfb062d, 0x4c1: 0x2ef3adc6bb2f2d02, + // Block 0x14, offset 0x500 + 0x53f: 0xe03b31814c95f8b, + // Block 0x15, offset 0x540 + 0x540: 0x3bf6dc9a1c115603, 0x541: 0x6984ec9b7f51f7fc, + // Block 0x16, offset 0x580 + 0x5bf: 0x3c02ea92fb168559, + // Block 0x17, offset 0x5c0 + 0x5c0: 0x1badfe42e7629494, 0x5c1: 0x6dc4a554005f7645, + // Block 0x18, offset 0x600 + 0x63f: 0x3bb2ed2a72748f4b, + // Block 0x19, offset 0x640 + 0x640: 0x291354cd6767ec10, 0x641: 0x2c3a4715e3c070d6, + // Block 0x1a, offset 0x680 + 0x6bf: 0x352711cfb7236418, + // Block 0x1b, offset 0x6c0 + 0x6c0: 0x3a59d34fb8bceda, 0x6c1: 0x5e90d8ebedd64fa1, + // Block 0x1c, offset 0x700 + 0x73f: 0x7191a77b28d23110, + // Block 0x1d, offset 0x740 + 0x740: 0x4ca7f0c1623423d8, 0x741: 0x4f7156d996e2d0de, + // Block 0x1e, offset 0x780 + // Block 0x1f, offset 0x7c0 +} + +// multiIndex: 29 blocks, 1856 entries, 1856 bytes +// Block 0 is the zero block. +var multiIndex = [1856]uint8{ + // Block 0x0, offset 0x0 + // Block 0x1, offset 0x40 + // Block 0x2, offset 0x80 + // Block 0x3, offset 0xc0 + 0xc2: 0x01, 0xc3: 0x02, 0xc4: 0x03, 0xc7: 0x04, + 0xc8: 0x05, 0xcf: 0x06, + 0xd0: 0x07, + 0xdf: 0x08, + 0xe0: 0x02, 0xe1: 0x03, 0xe2: 0x04, 0xe3: 0x05, 0xe4: 0x06, 0xe7: 0x07, + 0xe8: 0x08, 0xef: 0x09, + 0xf0: 0x0e, 0xf1: 0x11, 0xf2: 0x13, 0xf3: 0x15, 0xf4: 0x17, + // Block 0x4, offset 0x100 + 0x120: 0x09, + 0x13f: 0x0a, + // Block 0x5, offset 0x140 + 0x140: 0x0b, + 0x17f: 0x0c, + // Block 0x6, offset 0x180 + 0x180: 0x0d, + // Block 0x7, offset 0x1c0 + 0x1ff: 0x0e, + // Block 0x8, offset 0x200 + 0x200: 0x0f, + // Block 0x9, offset 0x240 + 0x27f: 0x10, + // Block 0xa, offset 0x280 + 0x280: 0x11, + // Block 0xb, offset 0x2c0 + 0x2ff: 0x12, + // Block 0xc, offset 0x300 + 0x300: 0x13, + // Block 0xd, offset 0x340 + 0x37f: 0x14, + // Block 0xe, offset 0x380 + 0x380: 0x15, + // Block 0xf, offset 0x3c0 + 0x3ff: 0x16, + // Block 0x10, offset 0x400 + 0x410: 0x0a, + 0x41f: 0x0b, + 0x420: 0x0c, + 0x43f: 0x0d, + // Block 0x11, offset 0x440 + 0x440: 0x17, + // Block 0x12, offset 0x480 + 0x4bf: 0x18, + // Block 0x13, offset 0x4c0 + 0x4c0: 0x0f, + 0x4ff: 0x10, + // Block 0x14, offset 0x500 + 0x500: 0x19, + // Block 0x15, offset 0x540 + 0x540: 0x12, + // Block 0x16, offset 0x580 + 0x5bf: 0x1a, + // Block 0x17, offset 0x5c0 + 0x5ff: 0x14, + // Block 0x18, offset 0x600 + 0x600: 0x1b, + // Block 0x19, offset 0x640 + 0x640: 0x16, + // Block 0x1a, offset 0x680 + // Block 0x1b, offset 0x6c0 + 0x6c2: 0x01, 0x6c3: 0x02, 0x6c4: 0x03, 0x6c7: 0x04, + 0x6c8: 0x05, 0x6cf: 0x06, + 0x6d0: 0x07, + 0x6df: 0x08, + 0x6e0: 0x02, 0x6e1: 0x03, 0x6e2: 0x04, 0x6e3: 0x05, 0x6e4: 0x06, 0x6e7: 0x07, + 0x6e8: 0x08, 0x6ef: 0x09, + // Block 0x1c, offset 0x700 + 0x730: 0x0e, 0x731: 0x11, 0x732: 0x13, 0x733: 0x15, 0x734: 0x17, +} diff --git a/vendor/golang.org/x/text/internal/triegen/example_compact_test.go b/vendor/golang.org/x/text/internal/triegen/example_compact_test.go new file mode 100644 index 000000000..7cf604ca4 --- /dev/null +++ b/vendor/golang.org/x/text/internal/triegen/example_compact_test.go @@ -0,0 +1,71 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package triegen_test + +import ( + "fmt" + "io" + "io/ioutil" + + "golang.org/x/text/internal/triegen" +) + +func ExampleCompacter() { + t := triegen.NewTrie("root") + for r := rune(0); r < 10000; r += 64 { + t.Insert(r, 0x9015BADA55^uint64(r)) + } + sz, _ := t.Gen(ioutil.Discard) + + fmt.Printf("Size normal: %5d\n", sz) + + var c myCompacter + sz, _ = t.Gen(ioutil.Discard, triegen.Compact(&c)) + + fmt.Printf("Size compacted: %5d\n", sz) + + // Output: + // Size normal: 81344 + // Size compacted: 3224 +} + +// A myCompacter accepts a block if only the first value is given. +type myCompacter []uint64 + +func (c *myCompacter) Size(values []uint64) (sz int, ok bool) { + for _, v := range values[1:] { + if v != 0 { + return 0, false + } + } + return 8, true // the size of a uint64 +} + +func (c *myCompacter) Store(v []uint64) uint32 { + x := uint32(len(*c)) + *c = append(*c, v[0]) + return x +} + +func (c *myCompacter) Print(w io.Writer) error { + fmt.Fprintln(w, "var firstValue = []uint64{") + for _, v := range *c { + fmt.Fprintf(w, "\t%#x,\n", v) + } + fmt.Fprintln(w, "}") + return nil +} + +func (c *myCompacter) Handler() string { + return "getFirstValue" + + // Where getFirstValue is included along with the generated code: + // func getFirstValue(n uint32, b byte) uint64 { + // if b == 0x80 { // the first continuation byte + // return firstValue[n] + // } + // return 0 + // } +} diff --git a/vendor/golang.org/x/text/internal/triegen/example_test.go b/vendor/golang.org/x/text/internal/triegen/example_test.go new file mode 100644 index 000000000..557a152e7 --- /dev/null +++ b/vendor/golang.org/x/text/internal/triegen/example_test.go @@ -0,0 +1,148 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package triegen_test + +import ( + "fmt" + "io/ioutil" + "math/rand" + "unicode" + + "golang.org/x/text/internal/triegen" +) + +const seed = 0x12345 + +var genWriter = ioutil.Discard + +func randomRunes() map[rune]uint8 { + rnd := rand.New(rand.NewSource(seed)) + m := map[rune]uint8{} + for len(m) < 100 { + // Only set our random rune if it is a valid Unicode code point. + if r := rune(rnd.Int31n(unicode.MaxRune + 1)); []rune(string(r))[0] == r { + m[r] = 1 + } + } + return m +} + +// Example_build shows how to build a simple trie. It assigns the value 1 to +// 100 random runes generated by randomRunes. +func Example_build() { + t := triegen.NewTrie("rand") + + for r, _ := range randomRunes() { + t.Insert(r, 1) + } + sz, err := t.Gen(genWriter) + + fmt.Printf("Trie size: %d bytes\n", sz) + fmt.Printf("Error: %v\n", err) + + // Output: + // Trie size: 9280 bytes + // Error: +} + +// Example_lookup demonstrates how to use the trie generated by Example_build. +func Example_lookup() { + trie := newRandTrie(0) + + // The same set of runes used by Example_build. + runes := randomRunes() + + // Verify the right value is returned for all runes. + for r := rune(0); r <= unicode.MaxRune; r++ { + // Note that the return type of lookup is uint8. + if v, _ := trie.lookupString(string(r)); v != runes[r] { + fmt.Println("FAILURE") + return + } + } + fmt.Println("SUCCESS") + + // Output: + // SUCCESS +} + +// runeValues generates some random values for a set of interesting runes. +func runeValues() map[rune]uint64 { + rnd := rand.New(rand.NewSource(seed)) + m := map[rune]uint64{} + for p := 4; p <= unicode.MaxRune; p <<= 1 { + for d := -1; d <= 1; d++ { + m[rune(p+d)] = uint64(rnd.Int63()) + } + } + return m +} + +// ExampleGen_build demonstrates the creation of multiple tries sharing common +// blocks. ExampleGen_lookup demonstrates how to use the generated tries. +func ExampleGen_build() { + var tries []*triegen.Trie + + rv := runeValues() + for _, c := range []struct { + include func(rune) bool + name string + }{ + {func(r rune) bool { return true }, "all"}, + {func(r rune) bool { return r < 0x80 }, "ASCII only"}, + {func(r rune) bool { return r < 0x80 }, "ASCII only 2"}, + {func(r rune) bool { return r <= 0xFFFF }, "BMP only"}, + {func(r rune) bool { return r > 0xFFFF }, "No BMP"}, + } { + t := triegen.NewTrie(c.name) + tries = append(tries, t) + + for r, v := range rv { + if c.include(r) { + t.Insert(r, v) + } + } + } + sz, err := triegen.Gen(genWriter, "multi", tries) + + fmt.Printf("Trie size: %d bytes\n", sz) + fmt.Printf("Error: %v\n", err) + + // Output: + // Trie size: 18250 bytes + // Error: +} + +// ExampleGen_lookup shows how to look up values in the trie generated by +// ExampleGen_build. +func ExampleGen_lookup() { + rv := runeValues() + for i, include := range []func(rune) bool{ + func(r rune) bool { return true }, // all + func(r rune) bool { return r < 0x80 }, // ASCII only + func(r rune) bool { return r < 0x80 }, // ASCII only 2 + func(r rune) bool { return r <= 0xFFFF }, // BMP only + func(r rune) bool { return r > 0xFFFF }, // No BMP + } { + t := newMultiTrie(i) + + for r := rune(0); r <= unicode.MaxRune; r++ { + x := uint64(0) + if include(r) { + x = rv[r] + } + // As we convert from a valid rune, we know it is safe to use + // lookupStringUnsafe. + if v := t.lookupStringUnsafe(string(r)); x != v { + fmt.Println("FAILURE") + return + } + } + } + fmt.Println("SUCCESS") + + // Output: + // SUCCESS +} diff --git a/vendor/golang.org/x/text/internal/triegen/gen_test.go b/vendor/golang.org/x/text/internal/triegen/gen_test.go new file mode 100644 index 000000000..831627d7a --- /dev/null +++ b/vendor/golang.org/x/text/internal/triegen/gen_test.go @@ -0,0 +1,68 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build generate + +package triegen_test + +// The code in this file generates captures and writes the tries generated in +// the examples to data_test.go. To invoke it, run: +// go test -tags=generate +// +// Making the generation code a "test" allows us to link in the necessary test +// code. + +import ( + "log" + "os" + "os/exec" +) + +func init() { + const tmpfile = "tmpout" + const dstfile = "data_test.go" + + f, err := os.Create(tmpfile) + if err != nil { + log.Fatalf("Could not create output file: %v", err) + } + defer os.Remove(tmpfile) + defer f.Close() + + // We exit before this function returns, regardless of success or failure, + // so there's no need to save (and later restore) the existing genWriter + // value. + genWriter = f + + f.Write([]byte(header)) + + Example_build() + ExampleGen_build() + + if err := exec.Command("gofmt", "-w", tmpfile).Run(); err != nil { + log.Fatal(err) + } + os.Remove(dstfile) + os.Rename(tmpfile, dstfile) + + os.Exit(0) +} + +const header = `// This file is generated with "go test -tags generate". DO NOT EDIT! +// +build !generate + +package triegen_test +` + +// Stubs for generated tries. These are needed as we exclude data_test.go if +// the generate flag is set. This will clearly make the tests fail, but that +// is okay. It allows us to bootstrap. + +type trie struct{} + +func (t *trie) lookupString(string) (uint8, int) { return 0, 1 } +func (t *trie) lookupStringUnsafe(string) uint64 { return 0 } + +func newRandTrie(i int) *trie { return &trie{} } +func newMultiTrie(i int) *trie { return &trie{} } diff --git a/vendor/golang.org/x/text/internal/triegen/print.go b/vendor/golang.org/x/text/internal/triegen/print.go new file mode 100644 index 000000000..8d9f120bc --- /dev/null +++ b/vendor/golang.org/x/text/internal/triegen/print.go @@ -0,0 +1,251 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package triegen + +import ( + "bytes" + "fmt" + "io" + "strings" + "text/template" +) + +// print writes all the data structures as well as the code necessary to use the +// trie to w. +func (b *builder) print(w io.Writer) error { + b.Stats.NValueEntries = len(b.ValueBlocks) * blockSize + b.Stats.NValueBytes = len(b.ValueBlocks) * blockSize * b.ValueSize + b.Stats.NIndexEntries = len(b.IndexBlocks) * blockSize + b.Stats.NIndexBytes = len(b.IndexBlocks) * blockSize * b.IndexSize + b.Stats.NHandleBytes = len(b.Trie) * 2 * b.IndexSize + + // If we only have one root trie, all starter blocks are at position 0 and + // we can access the arrays directly. + if len(b.Trie) == 1 { + // At this point we cannot refer to the generated tables directly. + b.ASCIIBlock = b.Name + "Values" + b.StarterBlock = b.Name + "Index" + } else { + // Otherwise we need to have explicit starter indexes in the trie + // structure. + b.ASCIIBlock = "t.ascii" + b.StarterBlock = "t.utf8Start" + } + + b.SourceType = "[]byte" + if err := lookupGen.Execute(w, b); err != nil { + return err + } + + b.SourceType = "string" + if err := lookupGen.Execute(w, b); err != nil { + return err + } + + if err := trieGen.Execute(w, b); err != nil { + return err + } + + for _, c := range b.Compactions { + if err := c.c.Print(w); err != nil { + return err + } + } + + return nil +} + +func printValues(n int, values []uint64) string { + w := &bytes.Buffer{} + boff := n * blockSize + fmt.Fprintf(w, "\t// Block %#x, offset %#x", n, boff) + var newline bool + for i, v := range values { + if i%6 == 0 { + newline = true + } + if v != 0 { + if newline { + fmt.Fprintf(w, "\n") + newline = false + } + fmt.Fprintf(w, "\t%#02x:%#04x, ", boff+i, v) + } + } + return w.String() +} + +func printIndex(b *builder, nr int, n *node) string { + w := &bytes.Buffer{} + boff := nr * blockSize + fmt.Fprintf(w, "\t// Block %#x, offset %#x", nr, boff) + var newline bool + for i, c := range n.children { + if i%8 == 0 { + newline = true + } + if c != nil { + v := b.Compactions[c.index.compaction].Offset + uint32(c.index.index) + if v != 0 { + if newline { + fmt.Fprintf(w, "\n") + newline = false + } + fmt.Fprintf(w, "\t%#02x:%#02x, ", boff+i, v) + } + } + } + return w.String() +} + +var ( + trieGen = template.Must(template.New("trie").Funcs(template.FuncMap{ + "printValues": printValues, + "printIndex": printIndex, + "title": strings.Title, + "dec": func(x int) int { return x - 1 }, + "psize": func(n int) string { + return fmt.Sprintf("%d bytes (%.2f KiB)", n, float64(n)/1024) + }, + }).Parse(trieTemplate)) + lookupGen = template.Must(template.New("lookup").Parse(lookupTemplate)) +) + +// TODO: consider the return type of lookup. It could be uint64, even if the +// internal value type is smaller. We will have to verify this with the +// performance of unicode/norm, which is very sensitive to such changes. +const trieTemplate = `{{$b := .}}{{$multi := gt (len .Trie) 1}} +// {{.Name}}Trie. Total size: {{psize .Size}}. Checksum: {{printf "%08x" .Checksum}}. +type {{.Name}}Trie struct { {{if $multi}} + ascii []{{.ValueType}} // index for ASCII bytes + utf8Start []{{.IndexType}} // index for UTF-8 bytes >= 0xC0 +{{end}}} + +func new{{title .Name}}Trie(i int) *{{.Name}}Trie { {{if $multi}} + h := {{.Name}}TrieHandles[i] + return &{{.Name}}Trie{ {{.Name}}Values[uint32(h.ascii)<<6:], {{.Name}}Index[uint32(h.multi)<<6:] } +} + +type {{.Name}}TrieHandle struct { + ascii, multi {{.IndexType}} +} + +// {{.Name}}TrieHandles: {{len .Trie}} handles, {{.Stats.NHandleBytes}} bytes +var {{.Name}}TrieHandles = [{{len .Trie}}]{{.Name}}TrieHandle{ +{{range .Trie}} { {{.ASCIIIndex}}, {{.StarterIndex}} }, // {{printf "%08x" .Checksum}}: {{.Name}} +{{end}}}{{else}} + return &{{.Name}}Trie{} +} +{{end}} +// lookupValue determines the type of block n and looks up the value for b. +func (t *{{.Name}}Trie) lookupValue(n uint32, b byte) {{.ValueType}}{{$last := dec (len .Compactions)}} { + switch { {{range $i, $c := .Compactions}} + {{if eq $i $last}}default{{else}}case n < {{$c.Cutoff}}{{end}}:{{if ne $i 0}} + n -= {{$c.Offset}}{{end}} + return {{print $b.ValueType}}({{$c.Handler}}){{end}} + } +} + +// {{.Name}}Values: {{len .ValueBlocks}} blocks, {{.Stats.NValueEntries}} entries, {{.Stats.NValueBytes}} bytes +// The third block is the zero block. +var {{.Name}}Values = [{{.Stats.NValueEntries}}]{{.ValueType}} { +{{range $i, $v := .ValueBlocks}}{{printValues $i $v}} +{{end}}} + +// {{.Name}}Index: {{len .IndexBlocks}} blocks, {{.Stats.NIndexEntries}} entries, {{.Stats.NIndexBytes}} bytes +// Block 0 is the zero block. +var {{.Name}}Index = [{{.Stats.NIndexEntries}}]{{.IndexType}} { +{{range $i, $v := .IndexBlocks}}{{printIndex $b $i $v}} +{{end}}} +` + +// TODO: consider allowing zero-length strings after evaluating performance with +// unicode/norm. +const lookupTemplate = ` +// lookup{{if eq .SourceType "string"}}String{{end}} returns the trie value for the first UTF-8 encoding in s and +// the width in bytes of this encoding. The size will be 0 if s does not +// hold enough bytes to complete the encoding. len(s) must be greater than 0. +func (t *{{.Name}}Trie) lookup{{if eq .SourceType "string"}}String{{end}}(s {{.SourceType}}) (v {{.ValueType}}, sz int) { + c0 := s[0] + switch { + case c0 < 0x80: // is ASCII + return {{.ASCIIBlock}}[c0], 1 + case c0 < 0xC2: + return 0, 1 // Illegal UTF-8: not a starter, not ASCII. + case c0 < 0xE0: // 2-byte UTF-8 + if len(s) < 2 { + return 0, 0 + } + i := {{.StarterBlock}}[c0] + c1 := s[1] + if c1 < 0x80 || 0xC0 <= c1 { + return 0, 1 // Illegal UTF-8: not a continuation byte. + } + return t.lookupValue(uint32(i), c1), 2 + case c0 < 0xF0: // 3-byte UTF-8 + if len(s) < 3 { + return 0, 0 + } + i := {{.StarterBlock}}[c0] + c1 := s[1] + if c1 < 0x80 || 0xC0 <= c1 { + return 0, 1 // Illegal UTF-8: not a continuation byte. + } + o := uint32(i)<<6 + uint32(c1) + i = {{.Name}}Index[o] + c2 := s[2] + if c2 < 0x80 || 0xC0 <= c2 { + return 0, 2 // Illegal UTF-8: not a continuation byte. + } + return t.lookupValue(uint32(i), c2), 3 + case c0 < 0xF8: // 4-byte UTF-8 + if len(s) < 4 { + return 0, 0 + } + i := {{.StarterBlock}}[c0] + c1 := s[1] + if c1 < 0x80 || 0xC0 <= c1 { + return 0, 1 // Illegal UTF-8: not a continuation byte. + } + o := uint32(i)<<6 + uint32(c1) + i = {{.Name}}Index[o] + c2 := s[2] + if c2 < 0x80 || 0xC0 <= c2 { + return 0, 2 // Illegal UTF-8: not a continuation byte. + } + o = uint32(i)<<6 + uint32(c2) + i = {{.Name}}Index[o] + c3 := s[3] + if c3 < 0x80 || 0xC0 <= c3 { + return 0, 3 // Illegal UTF-8: not a continuation byte. + } + return t.lookupValue(uint32(i), c3), 4 + } + // Illegal rune + return 0, 1 +} + +// lookup{{if eq .SourceType "string"}}String{{end}}Unsafe returns the trie value for the first UTF-8 encoding in s. +// s must start with a full and valid UTF-8 encoded rune. +func (t *{{.Name}}Trie) lookup{{if eq .SourceType "string"}}String{{end}}Unsafe(s {{.SourceType}}) {{.ValueType}} { + c0 := s[0] + if c0 < 0x80 { // is ASCII + return {{.ASCIIBlock}}[c0] + } + i := {{.StarterBlock}}[c0] + if c0 < 0xE0 { // 2-byte UTF-8 + return t.lookupValue(uint32(i), s[1]) + } + i = {{.Name}}Index[uint32(i)<<6+uint32(s[1])] + if c0 < 0xF0 { // 3-byte UTF-8 + return t.lookupValue(uint32(i), s[2]) + } + i = {{.Name}}Index[uint32(i)<<6+uint32(s[2])] + if c0 < 0xF8 { // 4-byte UTF-8 + return t.lookupValue(uint32(i), s[3]) + } + return 0 +} +` diff --git a/vendor/golang.org/x/text/internal/triegen/triegen.go b/vendor/golang.org/x/text/internal/triegen/triegen.go new file mode 100644 index 000000000..adb010812 --- /dev/null +++ b/vendor/golang.org/x/text/internal/triegen/triegen.go @@ -0,0 +1,494 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package triegen implements a code generator for a trie for associating +// unsigned integer values with UTF-8 encoded runes. +// +// Many of the go.text packages use tries for storing per-rune information. A +// trie is especially useful if many of the runes have the same value. If this +// is the case, many blocks can be expected to be shared allowing for +// information on many runes to be stored in little space. +// +// As most of the lookups are done directly on []byte slices, the tries use the +// UTF-8 bytes directly for the lookup. This saves a conversion from UTF-8 to +// runes and contributes a little bit to better performance. It also naturally +// provides a fast path for ASCII. +// +// Space is also an issue. There are many code points defined in Unicode and as +// a result tables can get quite large. So every byte counts. The triegen +// package automatically chooses the smallest integer values to represent the +// tables. Compacters allow further compression of the trie by allowing for +// alternative representations of individual trie blocks. +// +// triegen allows generating multiple tries as a single structure. This is +// useful when, for example, one wants to generate tries for several languages +// that have a lot of values in common. Some existing libraries for +// internationalization store all per-language data as a dynamically loadable +// chunk. The go.text packages are designed with the assumption that the user +// typically wants to compile in support for all supported languages, in line +// with the approach common to Go to create a single standalone binary. The +// multi-root trie approach can give significant storage savings in this +// scenario. +// +// triegen generates both tables and code. The code is optimized to use the +// automatically chosen data types. The following code is generated for a Trie +// or multiple Tries named "foo": +// - type fooTrie +// The trie type. +// +// - func newFooTrie(x int) *fooTrie +// Trie constructor, where x is the index of the trie passed to Gen. +// +// - func (t *fooTrie) lookup(s []byte) (v uintX, sz int) +// The lookup method, where uintX is automatically chosen. +// +// - func lookupString, lookupUnsafe and lookupStringUnsafe +// Variants of the above. +// +// - var fooValues and fooIndex and any tables generated by Compacters. +// The core trie data. +// +// - var fooTrieHandles +// Indexes of starter blocks in case of multiple trie roots. +// +// It is recommended that users test the generated trie by checking the returned +// value for every rune. Such exhaustive tests are possible as the the number of +// runes in Unicode is limited. +package triegen // import "golang.org/x/text/internal/triegen" + +// TODO: Arguably, the internally optimized data types would not have to be +// exposed in the generated API. We could also investigate not generating the +// code, but using it through a package. We would have to investigate the impact +// on performance of making such change, though. For packages like unicode/norm, +// small changes like this could tank performance. + +import ( + "encoding/binary" + "fmt" + "hash/crc64" + "io" + "log" + "unicode/utf8" +) + +// builder builds a set of tries for associating values with runes. The set of +// tries can share common index and value blocks. +type builder struct { + Name string + + // ValueType is the type of the trie values looked up. + ValueType string + + // ValueSize is the byte size of the ValueType. + ValueSize int + + // IndexType is the type of trie index values used for all UTF-8 bytes of + // a rune except the last one. + IndexType string + + // IndexSize is the byte size of the IndexType. + IndexSize int + + // SourceType is used when generating the lookup functions. If the user + // requests StringSupport, all lookup functions will be generated for + // string input as well. + SourceType string + + Trie []*Trie + + IndexBlocks []*node + ValueBlocks [][]uint64 + Compactions []compaction + Checksum uint64 + + ASCIIBlock string + StarterBlock string + + indexBlockIdx map[uint64]int + valueBlockIdx map[uint64]nodeIndex + asciiBlockIdx map[uint64]int + + // Stats are used to fill out the template. + Stats struct { + NValueEntries int + NValueBytes int + NIndexEntries int + NIndexBytes int + NHandleBytes int + } + + err error +} + +// A nodeIndex encodes the index of a node, which is defined by the compaction +// which stores it and an index within the compaction. For internal nodes, the +// compaction is always 0. +type nodeIndex struct { + compaction int + index int +} + +// compaction keeps track of stats used for the compaction. +type compaction struct { + c Compacter + blocks []*node + maxHandle uint32 + totalSize int + + // Used by template-based generator and thus exported. + Cutoff uint32 + Offset uint32 + Handler string +} + +func (b *builder) setError(err error) { + if b.err == nil { + b.err = err + } +} + +// An Option can be passed to Gen. +type Option func(b *builder) error + +// Compact configures the trie generator to use the given Compacter. +func Compact(c Compacter) Option { + return func(b *builder) error { + b.Compactions = append(b.Compactions, compaction{ + c: c, + Handler: c.Handler() + "(n, b)"}) + return nil + } +} + +// Gen writes Go code for a shared trie lookup structure to w for the given +// Tries. The generated trie type will be called nameTrie. newNameTrie(x) will +// return the *nameTrie for tries[x]. A value can be looked up by using one of +// the various lookup methods defined on nameTrie. It returns the table size of +// the generated trie. +func Gen(w io.Writer, name string, tries []*Trie, opts ...Option) (sz int, err error) { + // The index contains two dummy blocks, followed by the zero block. The zero + // block is at offset 0x80, so that the offset for the zero block for + // continuation bytes is 0. + b := &builder{ + Name: name, + Trie: tries, + IndexBlocks: []*node{{}, {}, {}}, + Compactions: []compaction{{ + Handler: name + "Values[n<<6+uint32(b)]", + }}, + // The 0 key in indexBlockIdx and valueBlockIdx is the hash of the zero + // block. + indexBlockIdx: map[uint64]int{0: 0}, + valueBlockIdx: map[uint64]nodeIndex{0: {}}, + asciiBlockIdx: map[uint64]int{}, + } + b.Compactions[0].c = (*simpleCompacter)(b) + + for _, f := range opts { + if err := f(b); err != nil { + return 0, err + } + } + b.build() + if b.err != nil { + return 0, b.err + } + if err = b.print(w); err != nil { + return 0, err + } + return b.Size(), nil +} + +// A Trie represents a single root node of a trie. A builder may build several +// overlapping tries at once. +type Trie struct { + root *node + + hiddenTrie +} + +// hiddenTrie contains values we want to be visible to the template generator, +// but hidden from the API documentation. +type hiddenTrie struct { + Name string + Checksum uint64 + ASCIIIndex int + StarterIndex int +} + +// NewTrie returns a new trie root. +func NewTrie(name string) *Trie { + return &Trie{ + &node{ + children: make([]*node, blockSize), + values: make([]uint64, utf8.RuneSelf), + }, + hiddenTrie{Name: name}, + } +} + +// Gen is a convenience wrapper around the Gen func passing t as the only trie +// and uses the name passed to NewTrie. It returns the size of the generated +// tables. +func (t *Trie) Gen(w io.Writer, opts ...Option) (sz int, err error) { + return Gen(w, t.Name, []*Trie{t}, opts...) +} + +// node is a node of the intermediate trie structure. +type node struct { + // children holds this node's children. It is always of length 64. + // A child node may be nil. + children []*node + + // values contains the values of this node. If it is non-nil, this node is + // either a root or leaf node: + // For root nodes, len(values) == 128 and it maps the bytes in [0x00, 0x7F]. + // For leaf nodes, len(values) == 64 and it maps the bytes in [0x80, 0xBF]. + values []uint64 + + index nodeIndex +} + +// Insert associates value with the given rune. Insert will panic if a non-zero +// value is passed for an invalid rune. +func (t *Trie) Insert(r rune, value uint64) { + if value == 0 { + return + } + s := string(r) + if []rune(s)[0] != r && value != 0 { + // Note: The UCD tables will always assign what amounts to a zero value + // to a surrogate. Allowing a zero value for an illegal rune allows + // users to iterate over [0..MaxRune] without having to explicitly + // exclude surrogates, which would be tedious. + panic(fmt.Sprintf("triegen: non-zero value for invalid rune %U", r)) + } + if len(s) == 1 { + // It is a root node value (ASCII). + t.root.values[s[0]] = value + return + } + + n := t.root + for ; len(s) > 1; s = s[1:] { + if n.children == nil { + n.children = make([]*node, blockSize) + } + p := s[0] % blockSize + c := n.children[p] + if c == nil { + c = &node{} + n.children[p] = c + } + if len(s) > 2 && c.values != nil { + log.Fatalf("triegen: insert(%U): found internal node with values", r) + } + n = c + } + if n.values == nil { + n.values = make([]uint64, blockSize) + } + if n.children != nil { + log.Fatalf("triegen: insert(%U): found leaf node that also has child nodes", r) + } + n.values[s[0]-0x80] = value +} + +// Size returns the number of bytes the generated trie will take to store. It +// needs to be exported as it is used in the templates. +func (b *builder) Size() int { + // Index blocks. + sz := len(b.IndexBlocks) * blockSize * b.IndexSize + + // Skip the first compaction, which represents the normal value blocks, as + // its totalSize does not account for the ASCII blocks, which are managed + // separately. + sz += len(b.ValueBlocks) * blockSize * b.ValueSize + for _, c := range b.Compactions[1:] { + sz += c.totalSize + } + + // TODO: this computation does not account for the fixed overhead of a using + // a compaction, either code or data. As for data, though, the typical + // overhead of data is in the order of bytes (2 bytes for cases). Further, + // the savings of using a compaction should anyway be substantial for it to + // be worth it. + + // For multi-root tries, we also need to account for the handles. + if len(b.Trie) > 1 { + sz += 2 * b.IndexSize * len(b.Trie) + } + return sz +} + +func (b *builder) build() { + // Compute the sizes of the values. + var vmax uint64 + for _, t := range b.Trie { + vmax = maxValue(t.root, vmax) + } + b.ValueType, b.ValueSize = getIntType(vmax) + + // Compute all block allocations. + // TODO: first compute the ASCII blocks for all tries and then the other + // nodes. ASCII blocks are more restricted in placement, as they require two + // blocks to be placed consecutively. Processing them first may improve + // sharing (at least one zero block can be expected to be saved.) + for _, t := range b.Trie { + b.Checksum += b.buildTrie(t) + } + + // Compute the offsets for all the Compacters. + offset := uint32(0) + for i := range b.Compactions { + c := &b.Compactions[i] + c.Offset = offset + offset += c.maxHandle + 1 + c.Cutoff = offset + } + + // Compute the sizes of indexes. + // TODO: different byte positions could have different sizes. So far we have + // not found a case where this is beneficial. + imax := uint64(b.Compactions[len(b.Compactions)-1].Cutoff) + for _, ib := range b.IndexBlocks { + if x := uint64(ib.index.index); x > imax { + imax = x + } + } + b.IndexType, b.IndexSize = getIntType(imax) +} + +func maxValue(n *node, max uint64) uint64 { + if n == nil { + return max + } + for _, c := range n.children { + max = maxValue(c, max) + } + for _, v := range n.values { + if max < v { + max = v + } + } + return max +} + +func getIntType(v uint64) (string, int) { + switch { + case v < 1<<8: + return "uint8", 1 + case v < 1<<16: + return "uint16", 2 + case v < 1<<32: + return "uint32", 4 + } + return "uint64", 8 +} + +const ( + blockSize = 64 + + // Subtract two blocks to offset 0x80, the first continuation byte. + blockOffset = 2 + + // Subtract three blocks to offset 0xC0, the first non-ASCII starter. + rootBlockOffset = 3 +) + +var crcTable = crc64.MakeTable(crc64.ISO) + +func (b *builder) buildTrie(t *Trie) uint64 { + n := t.root + + // Get the ASCII offset. For the first trie, the ASCII block will be at + // position 0. + hasher := crc64.New(crcTable) + binary.Write(hasher, binary.BigEndian, n.values) + hash := hasher.Sum64() + + v, ok := b.asciiBlockIdx[hash] + if !ok { + v = len(b.ValueBlocks) + b.asciiBlockIdx[hash] = v + + b.ValueBlocks = append(b.ValueBlocks, n.values[:blockSize], n.values[blockSize:]) + if v == 0 { + // Add the zero block at position 2 so that it will be assigned a + // zero reference in the lookup blocks. + // TODO: always do this? This would allow us to remove a check from + // the trie lookup, but at the expense of extra space. Analyze + // performance for unicode/norm. + b.ValueBlocks = append(b.ValueBlocks, make([]uint64, blockSize)) + } + } + t.ASCIIIndex = v + + // Compute remaining offsets. + t.Checksum = b.computeOffsets(n, true) + // We already subtracted the normal blockOffset from the index. Subtract the + // difference for starter bytes. + t.StarterIndex = n.index.index - (rootBlockOffset - blockOffset) + return t.Checksum +} + +func (b *builder) computeOffsets(n *node, root bool) uint64 { + // For the first trie, the root lookup block will be at position 3, which is + // the offset for UTF-8 non-ASCII starter bytes. + first := len(b.IndexBlocks) == rootBlockOffset + if first { + b.IndexBlocks = append(b.IndexBlocks, n) + } + + // We special-case the cases where all values recursively are 0. This allows + // for the use of a zero block to which all such values can be directed. + hash := uint64(0) + if n.children != nil || n.values != nil { + hasher := crc64.New(crcTable) + for _, c := range n.children { + var v uint64 + if c != nil { + v = b.computeOffsets(c, false) + } + binary.Write(hasher, binary.BigEndian, v) + } + binary.Write(hasher, binary.BigEndian, n.values) + hash = hasher.Sum64() + } + + if first { + b.indexBlockIdx[hash] = rootBlockOffset - blockOffset + } + + // Compacters don't apply to internal nodes. + if n.children != nil { + v, ok := b.indexBlockIdx[hash] + if !ok { + v = len(b.IndexBlocks) - blockOffset + b.IndexBlocks = append(b.IndexBlocks, n) + b.indexBlockIdx[hash] = v + } + n.index = nodeIndex{0, v} + } else { + h, ok := b.valueBlockIdx[hash] + if !ok { + bestI, bestSize := 0, blockSize*b.ValueSize + for i, c := range b.Compactions[1:] { + if sz, ok := c.c.Size(n.values); ok && bestSize > sz { + bestI, bestSize = i+1, sz + } + } + c := &b.Compactions[bestI] + c.totalSize += bestSize + v := c.c.Store(n.values) + if c.maxHandle < v { + c.maxHandle = v + } + h = nodeIndex{bestI, int(v)} + b.valueBlockIdx[hash] = h + } + n.index = h + } + return hash +} -- cgit v1.2.3-1-g7c22