From 54d3d47daf9190275bbdaf8703b84969a4593451 Mon Sep 17 00:00:00 2001 From: Corey Hulen Date: Fri, 24 Mar 2017 23:31:34 -0700 Subject: PLT-6076 Adding viper libs for config file changes (#5871) * Adding viper libs for config file changes * Removing the old fsnotify lib * updating some missing libs --- vendor/golang.org/x/text/cases/map_test.go | 950 +++++++++++++++++++++++++++++ 1 file changed, 950 insertions(+) create mode 100644 vendor/golang.org/x/text/cases/map_test.go (limited to 'vendor/golang.org/x/text/cases/map_test.go') diff --git a/vendor/golang.org/x/text/cases/map_test.go b/vendor/golang.org/x/text/cases/map_test.go new file mode 100644 index 000000000..8ac39118d --- /dev/null +++ b/vendor/golang.org/x/text/cases/map_test.go @@ -0,0 +1,950 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cases + +import ( + "bytes" + "fmt" + "path" + "strings" + "testing" + "unicode/utf8" + + "golang.org/x/text/internal/testtext" + "golang.org/x/text/language" + "golang.org/x/text/transform" + "golang.org/x/text/unicode/norm" +) + +type testCase struct { + lang string + src interface{} // string, []string, or nil to skip test + title interface{} // string, []string, or nil to skip test + lower interface{} // string, []string, or nil to skip test + upper interface{} // string, []string, or nil to skip test + opts options +} + +var testCases = []testCase{ + 0: { + lang: "und", + src: "abc aBc ABC abC İsıI ΕΣΆΣ", + title: "Abc Abc Abc Abc İsıi Εσάσ", + lower: "abc abc abc abc i\u0307sıi εσάσ", + upper: "ABC ABC ABC ABC İSII ΕΣΆΣ", + opts: getOpts(HandleFinalSigma(false)), + }, + + 1: { + lang: "und", + src: "abc aBc ABC abC İsıI ΕΣΆΣ Σ _Σ -Σ", + title: "Abc Abc Abc Abc İsıi Εσάς Σ _Σ -Σ", + lower: "abc abc abc abc i\u0307sıi εσάς σ _σ -σ", + upper: "ABC ABC ABC ABC İSII ΕΣΆΣ Σ _Σ -Σ", + opts: getOpts(HandleFinalSigma(true)), + }, + + 2: { // Title cased runes. + lang: supported, + src: "DžA", + title: "Dža", + lower: "dža", + upper: "DŽA", + }, + + 3: { + // Title breaking. + lang: supported, + src: []string{ + "FOO CASE TEST", + "DON'T DO THiS", + "χωΡΊΣ χωΡΊΣ^a χωΡΊΣ:a χωΡΊΣ:^a χωΡΊΣ^ όμΩΣ Σ", + "with-hyphens", + "49ers 49ers", + `"capitalize a^a -hyphen 0X _u a_u:a`, + "MidNumLet a.b\u2018c\u2019d\u2024e\ufe52f\uff07f\uff0eg", + "MidNum a,b;c\u037ed\u0589e\u060cf\u2044g\ufe50h", + "\u0345 x\u3031x x\u05d0x \u05d0x a'.a a.a a4,a", + }, + title: []string{ + "Foo Case Test", + "Don't Do This", + "Χωρίς Χωρίσ^A Χωρίσ:a Χωρίσ:^A Χωρίς^ Όμως Σ", + "With-Hyphens", + // Note that 49Ers is correct according to the spec. + // TODO: provide some option to the user to treat different + // characters as cased. + "49Ers 49Ers", + `"Capitalize A^A -Hyphen 0X _U A_u:a`, + "Midnumlet A.b\u2018c\u2019d\u2024e\ufe52f\uff07f\uff0eg", + "Midnum A,B;C\u037eD\u0589E\u060cF\u2044G\ufe50H", + "\u0399 X\u3031X X\u05d0x \u05d0X A'.A A.a A4,A", + }, + }, + + // TODO: These are known deviations from the options{} Unicode Word Breaking + // Algorithm. + // { + // "und", + // "x_\u3031_x a4,4a", + // "X_\u3031_x A4,4a", // Currently is "X_\U3031_X A4,4A". + // "x_\u3031_x a4,4a", + // "X_\u3031_X A4,4A", + // options{}, + // }, + + 4: { + // Tests title options + lang: "und", + src: "abc aBc ABC abC İsıI o'Brien", + title: "Abc ABc ABC AbC İsıI O'Brien", + opts: getOpts(NoLower), + }, + + 5: { + lang: "el", + src: "aBc ΟΔΌΣ Οδός Σο ΣΟ Σ oΣ ΟΣ σ ἕξ \u03ac", + title: "Abc Οδός Οδός Σο Σο Σ Oς Ος Σ Ἕξ \u0386", + lower: "abc οδός οδός σο σο σ oς ος σ ἕξ \u03ac", + upper: "ABC ΟΔΟΣ ΟΔΟΣ ΣΟ ΣΟ Σ OΣ ΟΣ Σ ΕΞ \u0391", // Uppercase removes accents + }, + + 6: { + lang: "tr az", + src: "Isiİ İsıI I\u0307sIiİ İsıI\u0307 I\u0300\u0307", + title: "Isii İsıı I\u0307sıii İsıi I\u0300\u0307", + lower: "ısii isıı isıii isıi \u0131\u0300\u0307", + upper: "ISİİ İSII I\u0307SIİİ İSII\u0307 I\u0300\u0307", + }, + + 7: { + lang: "lt", + src: "I Ï J J̈ Į Į̈ Ì Í Ĩ xi̇̈ xj̇̈ xį̇̈ xi̇̀ xi̇́ xi̇̃ XI XÏ XJ XJ̈ XĮ XĮ̈ XI̟̤", + title: "I Ï J J̈ Į Į̈ Ì Í Ĩ Xi̇̈ Xj̇̈ Xį̇̈ Xi̇̀ Xi̇́ Xi̇̃ Xi Xi̇̈ Xj Xj̇̈ Xį Xį̇̈ Xi̟̤", + lower: "i i̇̈ j j̇̈ į į̇̈ i̇̀ i̇́ i̇̃ xi̇̈ xj̇̈ xį̇̈ xi̇̀ xi̇́ xi̇̃ xi xi̇̈ xj xj̇̈ xį xį̇̈ xi̟̤", + upper: "I Ï J J̈ Į Į̈ Ì Í Ĩ XÏ XJ̈ XĮ̈ XÌ XÍ XĨ XI XÏ XJ XJ̈ XĮ XĮ̈ XI̟̤", + }, + + 8: { + lang: "lt", + src: "\u012e\u0300 \u00cc i\u0307\u0300 i\u0307\u0301 i\u0307\u0303 i\u0307\u0308 i\u0300\u0307", + title: "\u012e\u0300 \u00cc \u00cc \u00cd \u0128 \u00cf I\u0300\u0307", + lower: "\u012f\u0307\u0300 i\u0307\u0300 i\u0307\u0300 i\u0307\u0301 i\u0307\u0303 i\u0307\u0308 i\u0300\u0307", + upper: "\u012e\u0300 \u00cc \u00cc \u00cd \u0128 \u00cf I\u0300\u0307", + }, + + 9: { + lang: "nl", + src: "ijs IJs Ij Ijs İJ İJs aa aA 'ns 'S", + title: "IJs IJs IJ IJs İj İjs Aa Aa 'ns 's", + }, + + // Note: this specification is not currently part of CLDR. The same holds + // for the leading apostrophe handling for Dutch. + // See http://unicode.org/cldr/trac/ticket/7078. + 10: { + lang: "af", + src: "wag 'n bietjie", + title: "Wag 'n Bietjie", + lower: "wag 'n bietjie", + upper: "WAG 'N BIETJIE", + }, +} + +func TestCaseMappings(t *testing.T) { + for i, tt := range testCases { + src, ok := tt.src.([]string) + if !ok { + src = strings.Split(tt.src.(string), " ") + } + + for _, lang := range strings.Split(tt.lang, " ") { + tag := language.MustParse(lang) + testEntry := func(name string, mk func(language.Tag, options) transform.SpanningTransformer, gold interface{}) { + c := Caser{mk(tag, tt.opts)} + if gold != nil { + wants, ok := gold.([]string) + if !ok { + wants = strings.Split(gold.(string), " ") + } + for j, want := range wants { + if got := c.String(src[j]); got != want { + t.Errorf("%d:%s:\n%s.String(%+q):\ngot %+q;\nwant %+q", i, lang, name, src[j], got, want) + } + } + } + dst := make([]byte, 256) // big enough to hold any result + src := []byte(strings.Join(src, " ")) + v := testtext.AllocsPerRun(20, func() { + c.Transform(dst, src, true) + }) + if v > 1.1 { + t.Errorf("%d:%s:\n%s: number of allocs was %f; want 0", i, lang, name, v) + } + } + testEntry("Upper", makeUpper, tt.upper) + testEntry("Lower", makeLower, tt.lower) + testEntry("Title", makeTitle, tt.title) + } + } +} + +// TestAlloc tests that some mapping methods should not cause any allocation. +func TestAlloc(t *testing.T) { + dst := make([]byte, 256) // big enough to hold any result + src := []byte(txtNonASCII) + + for i, f := range []func() Caser{ + func() Caser { return Upper(language.Und) }, + func() Caser { return Lower(language.Und) }, + func() Caser { return Lower(language.Und, HandleFinalSigma(false)) }, + // TODO: use a shared copy for these casers as well, in order of + // importance, starting with the most important: + // func() Caser { return Title(language.Und) }, + // func() Caser { return Title(language.Und, HandleFinalSigma(false)) }, + } { + testtext.Run(t, "", func(t *testing.T) { + var c Caser + v := testtext.AllocsPerRun(10, func() { + c = f() + }) + if v > 0 { + // TODO: Right now only Upper has 1 allocation. Special-case Lower + // and Title as well to have less allocations for the root locale. + t.Errorf("%d:init: number of allocs was %f; want 0", i, v) + } + v = testtext.AllocsPerRun(2, func() { + c.Transform(dst, src, true) + }) + if v > 0 { + t.Errorf("%d:transform: number of allocs was %f; want 0", i, v) + } + }) + } +} + +func testHandover(t *testing.T, c Caser, src string) { + want := c.String(src) + // Find the common prefix. + pSrc := 0 + for ; pSrc < len(src) && pSrc < len(want) && want[pSrc] == src[pSrc]; pSrc++ { + } + + // Test handover for each substring of the prefix. + for i := 0; i < pSrc; i++ { + testtext.Run(t, fmt.Sprint("interleave/", i), func(t *testing.T) { + dst := make([]byte, 4*len(src)) + c.Reset() + nSpan, _ := c.Span([]byte(src[:i]), false) + copy(dst, src[:nSpan]) + nTransform, _, _ := c.Transform(dst[nSpan:], []byte(src[nSpan:]), true) + got := string(dst[:nSpan+nTransform]) + if got != want { + t.Errorf("full string: got %q; want %q", got, want) + } + }) + } +} + +func TestHandover(t *testing.T) { + testCases := []struct { + desc string + t Caser + first, second string + }{{ + "title/nosigma/single midword", + Title(language.Und, HandleFinalSigma(false)), + "A.", "a", + }, { + "title/nosigma/single midword", + Title(language.Und, HandleFinalSigma(false)), + "A", ".a", + }, { + "title/nosigma/double midword", + Title(language.Und, HandleFinalSigma(false)), + "A..", "a", + }, { + "title/nosigma/double midword", + Title(language.Und, HandleFinalSigma(false)), + "A.", ".a", + }, { + "title/nosigma/double midword", + Title(language.Und, HandleFinalSigma(false)), + "A", "..a", + }, { + "title/sigma/single midword", + Title(language.Und), + "ΟΣ.", "a", + }, { + "title/sigma/single midword", + Title(language.Und), + "ΟΣ", ".a", + }, { + "title/sigma/double midword", + Title(language.Und), + "ΟΣ..", "a", + }, { + "title/sigma/double midword", + Title(language.Und), + "ΟΣ.", ".a", + }, { + "title/sigma/double midword", + Title(language.Und), + "ΟΣ", "..a", + }, { + "title/af/leading apostrophe", + Title(language.Afrikaans), + "'", "n bietje", + }} + for _, tc := range testCases { + testtext.Run(t, tc.desc, func(t *testing.T) { + src := tc.first + tc.second + want := tc.t.String(src) + tc.t.Reset() + n, _ := tc.t.Span([]byte(tc.first), false) + + dst := make([]byte, len(want)) + copy(dst, tc.first[:n]) + + nDst, _, _ := tc.t.Transform(dst[n:], []byte(src[n:]), true) + got := string(dst[:n+nDst]) + if got != want { + t.Errorf("got %q; want %q", got, want) + } + }) + } +} + +// minBufSize is the size of the buffer by which the casing operation in +// this package are guaranteed to make progress. +const minBufSize = norm.MaxSegmentSize + +type bufferTest struct { + desc, src, want string + firstErr error + dstSize, srcSize int + t transform.SpanningTransformer +} + +var bufferTests []bufferTest + +func init() { + bufferTests = []bufferTest{{ + desc: "und/upper/short dst", + src: "abcdefg", + want: "ABCDEFG", + firstErr: transform.ErrShortDst, + dstSize: 3, + srcSize: minBufSize, + t: Upper(language.Und), + }, { + desc: "und/upper/short src", + src: "123é56", + want: "123É56", + firstErr: transform.ErrShortSrc, + dstSize: 4, + srcSize: 4, + t: Upper(language.Und), + }, { + desc: "und/upper/no error on short", + src: "12", + want: "12", + firstErr: nil, + dstSize: 1, + srcSize: 1, + t: Upper(language.Und), + }, { + desc: "und/lower/short dst", + src: "ABCDEFG", + want: "abcdefg", + firstErr: transform.ErrShortDst, + dstSize: 3, + srcSize: minBufSize, + t: Lower(language.Und), + }, { + desc: "und/lower/short src", + src: "123É56", + want: "123é56", + firstErr: transform.ErrShortSrc, + dstSize: 4, + srcSize: 4, + t: Lower(language.Und), + }, { + desc: "und/lower/no error on short", + src: "12", + want: "12", + firstErr: nil, + dstSize: 1, + srcSize: 1, + t: Lower(language.Und), + }, { + desc: "und/lower/simple (no final sigma)", + src: "ΟΣ ΟΣΣ", + want: "οσ οσσ", + dstSize: minBufSize, + srcSize: minBufSize, + t: Lower(language.Und, HandleFinalSigma(false)), + }, { + desc: "und/title/simple (no final sigma)", + src: "ΟΣ ΟΣΣ", + want: "Οσ Οσσ", + dstSize: minBufSize, + srcSize: minBufSize, + t: Title(language.Und, HandleFinalSigma(false)), + }, { + desc: "und/title/final sigma: no error", + src: "ΟΣ", + want: "Ος", + dstSize: minBufSize, + srcSize: minBufSize, + t: Title(language.Und), + }, { + desc: "und/title/final sigma: short source", + src: "ΟΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣ", + want: "Οσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσς", + firstErr: transform.ErrShortSrc, + dstSize: minBufSize, + srcSize: 10, + t: Title(language.Und), + }, { + desc: "und/title/final sigma: short destination 1", + src: "ΟΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣ", + want: "Οσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσς", + firstErr: transform.ErrShortDst, + dstSize: 10, + srcSize: minBufSize, + t: Title(language.Und), + }, { + desc: "und/title/final sigma: short destination 2", + src: "ΟΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣ", + want: "Οσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσς", + firstErr: transform.ErrShortDst, + dstSize: 9, + srcSize: minBufSize, + t: Title(language.Und), + }, { + desc: "und/title/final sigma: short destination 3", + src: "ΟΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣ", + want: "Οσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσς", + firstErr: transform.ErrShortDst, + dstSize: 8, + srcSize: minBufSize, + t: Title(language.Und), + }, { + desc: "und/title/clipped UTF-8 rune", + src: "σσσσσσσσσσσ", + want: "Σσσσσσσσσσσ", + firstErr: transform.ErrShortSrc, + dstSize: minBufSize, + srcSize: 5, + t: Title(language.Und), + }, { + desc: "und/title/clipped UTF-8 rune atEOF", + src: "σσσ" + string([]byte{0xCF}), + want: "Σσσ" + string([]byte{0xCF}), + dstSize: minBufSize, + srcSize: minBufSize, + t: Title(language.Und), + }, { + // Note: the choice to change the final sigma at the end in case of + // too many case ignorables is arbitrary. The main reason for this + // choice is that it results in simpler code. + desc: "und/title/final sigma: max ignorables", + src: "ΟΣ" + strings.Repeat(".", maxIgnorable) + "a", + want: "Οσ" + strings.Repeat(".", maxIgnorable) + "A", + dstSize: minBufSize, + srcSize: minBufSize, + t: Title(language.Und), + }, { + // Note: the choice to change the final sigma at the end in case of + // too many case ignorables is arbitrary. The main reason for this + // choice is that it results in simpler code. + desc: "und/title/long string", + src: "AA" + strings.Repeat(".", maxIgnorable+1) + "a", + want: "Aa" + strings.Repeat(".", maxIgnorable+1) + "A", + dstSize: minBufSize, + srcSize: len("AA" + strings.Repeat(".", maxIgnorable+1)), + t: Title(language.Und), + }, { + // Note: the choice to change the final sigma at the end in case of + // too many case ignorables is arbitrary. The main reason for this + // choice is that it results in simpler code. + desc: "und/title/final sigma: too many ignorables", + src: "ΟΣ" + strings.Repeat(".", maxIgnorable+1) + "a", + want: "Ος" + strings.Repeat(".", maxIgnorable+1) + "A", + dstSize: minBufSize, + srcSize: len("ΟΣ" + strings.Repeat(".", maxIgnorable+1)), + t: Title(language.Und), + }, { + desc: "und/title/final sigma: apostrophe", + src: "ΟΣ''a", + want: "Οσ''A", + dstSize: minBufSize, + srcSize: minBufSize, + t: Title(language.Und), + }, { + desc: "el/upper/max ignorables", + src: "ο" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0313", + want: "Ο" + strings.Repeat("\u0321", maxIgnorable-1), + dstSize: minBufSize, + srcSize: minBufSize, + t: Upper(language.Greek), + }, { + desc: "el/upper/too many ignorables", + src: "ο" + strings.Repeat("\u0321", maxIgnorable) + "\u0313", + want: "Ο" + strings.Repeat("\u0321", maxIgnorable) + "\u0313", + dstSize: minBufSize, + srcSize: len("ο" + strings.Repeat("\u0321", maxIgnorable)), + t: Upper(language.Greek), + }, { + desc: "el/upper/short dst", + src: "123ο", + want: "123Ο", + firstErr: transform.ErrShortDst, + dstSize: 3, + srcSize: minBufSize, + t: Upper(language.Greek), + }, { + desc: "lt/lower/max ignorables", + src: "I" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0300", + want: "i" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0307\u0300", + dstSize: minBufSize, + srcSize: minBufSize, + t: Lower(language.Lithuanian), + }, { + desc: "lt/lower/too many ignorables", + src: "I" + strings.Repeat("\u0321", maxIgnorable) + "\u0300", + want: "i" + strings.Repeat("\u0321", maxIgnorable) + "\u0300", + dstSize: minBufSize, + srcSize: len("I" + strings.Repeat("\u0321", maxIgnorable)), + t: Lower(language.Lithuanian), + }, { + desc: "lt/lower/decomposition with short dst buffer 1", + src: "aaaaa\u00cc", // U+00CC LATIN CAPITAL LETTER I GRAVE + firstErr: transform.ErrShortDst, + want: "aaaaai\u0307\u0300", + dstSize: 5, + srcSize: minBufSize, + t: Lower(language.Lithuanian), + }, { + desc: "lt/lower/decomposition with short dst buffer 2", + src: "aaaa\u00cc", // U+00CC LATIN CAPITAL LETTER I GRAVE + firstErr: transform.ErrShortDst, + want: "aaaai\u0307\u0300", + dstSize: 5, + srcSize: minBufSize, + t: Lower(language.Lithuanian), + }, { + desc: "lt/upper/max ignorables", + src: "i" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0307\u0300", + want: "I" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0300", + dstSize: minBufSize, + srcSize: minBufSize, + t: Upper(language.Lithuanian), + }, { + desc: "lt/upper/too many ignorables", + src: "i" + strings.Repeat("\u0321", maxIgnorable) + "\u0307\u0300", + want: "I" + strings.Repeat("\u0321", maxIgnorable) + "\u0307\u0300", + dstSize: minBufSize, + srcSize: len("i" + strings.Repeat("\u0321", maxIgnorable)), + t: Upper(language.Lithuanian), + }, { + desc: "lt/upper/short dst", + src: "12i\u0307\u0300", + want: "12\u00cc", + firstErr: transform.ErrShortDst, + dstSize: 3, + srcSize: minBufSize, + t: Upper(language.Lithuanian), + }, { + desc: "aztr/lower/max ignorables", + src: "I" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0307\u0300", + want: "i" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0300", + dstSize: minBufSize, + srcSize: minBufSize, + t: Lower(language.Turkish), + }, { + desc: "aztr/lower/too many ignorables", + src: "I" + strings.Repeat("\u0321", maxIgnorable) + "\u0307\u0300", + want: "\u0131" + strings.Repeat("\u0321", maxIgnorable) + "\u0307\u0300", + dstSize: minBufSize, + srcSize: len("I" + strings.Repeat("\u0321", maxIgnorable)), + t: Lower(language.Turkish), + }, { + desc: "nl/title/pre-IJ cutoff", + src: " ij", + want: " IJ", + firstErr: transform.ErrShortDst, + dstSize: 2, + srcSize: minBufSize, + t: Title(language.Dutch), + }, { + desc: "nl/title/mid-IJ cutoff", + src: " ij", + want: " IJ", + firstErr: transform.ErrShortDst, + dstSize: 3, + srcSize: minBufSize, + t: Title(language.Dutch), + }, { + desc: "af/title/apostrophe", + src: "'n bietje", + want: "'n Bietje", + firstErr: transform.ErrShortDst, + dstSize: 3, + srcSize: minBufSize, + t: Title(language.Afrikaans), + }} +} + +func TestShortBuffersAndOverflow(t *testing.T) { + for i, tt := range bufferTests { + testtext.Run(t, tt.desc, func(t *testing.T) { + buf := make([]byte, tt.dstSize) + got := []byte{} + var nSrc, nDst int + var err error + for p := 0; p < len(tt.src); p += nSrc { + q := p + tt.srcSize + if q > len(tt.src) { + q = len(tt.src) + } + nDst, nSrc, err = tt.t.Transform(buf, []byte(tt.src[p:q]), q == len(tt.src)) + got = append(got, buf[:nDst]...) + + if p == 0 && err != tt.firstErr { + t.Errorf("%d:%s:\n error was %v; want %v", i, tt.desc, err, tt.firstErr) + break + } + } + if string(got) != tt.want { + t.Errorf("%d:%s:\ngot %+q;\nwant %+q", i, tt.desc, got, tt.want) + } + testHandover(t, Caser{tt.t}, tt.src) + }) + } +} + +func TestSpan(t *testing.T) { + for _, tt := range []struct { + desc string + src string + want string + atEOF bool + err error + t Caser + }{{ + desc: "und/upper/basic", + src: "abcdefg", + want: "", + atEOF: true, + err: transform.ErrEndOfSpan, + t: Upper(language.Und), + }, { + desc: "und/upper/short src", + src: "123É"[:4], + want: "123", + atEOF: false, + err: transform.ErrShortSrc, + t: Upper(language.Und), + }, { + desc: "und/upper/no error on short", + src: "12", + want: "12", + atEOF: false, + t: Upper(language.Und), + }, { + desc: "und/lower/basic", + src: "ABCDEFG", + want: "", + atEOF: true, + err: transform.ErrEndOfSpan, + t: Lower(language.Und), + }, { + desc: "und/lower/short src num", + src: "123é"[:4], + want: "123", + atEOF: false, + err: transform.ErrShortSrc, + t: Lower(language.Und), + }, { + desc: "und/lower/short src greek", + src: "αβγé"[:7], + want: "αβγ", + atEOF: false, + err: transform.ErrShortSrc, + t: Lower(language.Und), + }, { + desc: "und/lower/no error on short", + src: "12", + want: "12", + atEOF: false, + t: Lower(language.Und), + }, { + desc: "und/lower/simple (no final sigma)", + src: "ος οσσ", + want: "οσ οσσ", + atEOF: true, + t: Lower(language.Und, HandleFinalSigma(false)), + }, { + desc: "und/title/simple (no final sigma)", + src: "Οσ Οσσ", + want: "Οσ Οσσ", + atEOF: true, + t: Title(language.Und, HandleFinalSigma(false)), + }, { + desc: "und/lower/final sigma: no error", + src: "οΣ", // Oς + want: "ο", // Oς + err: transform.ErrEndOfSpan, + t: Lower(language.Und), + }, { + desc: "und/title/final sigma: no error", + src: "ΟΣ", // Oς + want: "Ο", // Oς + err: transform.ErrEndOfSpan, + t: Title(language.Und), + }, { + desc: "und/title/final sigma: no short source!", + src: "ΟσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσΣ", + want: "Οσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσ", + err: transform.ErrEndOfSpan, + t: Title(language.Und), + }, { + desc: "und/title/clipped UTF-8 rune", + src: "Σσ" + string([]byte{0xCF}), + want: "Σσ", + atEOF: false, + err: transform.ErrShortSrc, + t: Title(language.Und), + }, { + desc: "und/title/clipped UTF-8 rune atEOF", + src: "Σσσ" + string([]byte{0xCF}), + want: "Σσσ" + string([]byte{0xCF}), + atEOF: true, + t: Title(language.Und), + }, { + // Note: the choice to change the final sigma at the end in case of + // too many case ignorables is arbitrary. The main reason for this + // choice is that it results in simpler code. + desc: "und/title/long string", + src: "A" + strings.Repeat("a", maxIgnorable+5), + want: "A" + strings.Repeat("a", maxIgnorable+5), + t: Title(language.Und), + }, { + // Note: the choice to change the final sigma at the end in case of + // too many case ignorables is arbitrary. The main reason for this + // choice is that it results in simpler code. + desc: "und/title/cyrillic", + src: "При", + want: "При", + atEOF: true, + t: Title(language.Und, HandleFinalSigma(false)), + }, { + // Note: the choice to change the final sigma at the end in case of + // too many case ignorables is arbitrary. The main reason for this + // choice is that it results in simpler code. + desc: "und/title/final sigma: max ignorables", + src: "Οσ" + strings.Repeat(".", maxIgnorable) + "A", + want: "Οσ" + strings.Repeat(".", maxIgnorable) + "A", + t: Title(language.Und), + }, { + desc: "el/upper/max ignorables - not implemented", + src: "Ο" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0313", + want: "", + err: transform.ErrEndOfSpan, + t: Upper(language.Greek), + }, { + desc: "el/upper/too many ignorables - not implemented", + src: "Ο" + strings.Repeat("\u0321", maxIgnorable) + "\u0313", + want: "", + err: transform.ErrEndOfSpan, + t: Upper(language.Greek), + }, { + desc: "el/upper/short dst", + src: "123ο", + want: "", + err: transform.ErrEndOfSpan, + t: Upper(language.Greek), + }, { + desc: "lt/lower/max ignorables", + src: "i" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0307\u0300", + want: "i" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0307\u0300", + t: Lower(language.Lithuanian), + }, { + desc: "lt/lower/isLower", + src: "I" + strings.Repeat("\u0321", maxIgnorable) + "\u0300", + want: "", + err: transform.ErrEndOfSpan, + t: Lower(language.Lithuanian), + }, { + desc: "lt/lower/not identical", + src: "aaaaa\u00cc", // U+00CC LATIN CAPITAL LETTER I GRAVE + err: transform.ErrEndOfSpan, + want: "aaaaa", + t: Lower(language.Lithuanian), + }, { + desc: "lt/lower/identical", + src: "aaaai\u0307\u0300", // U+00CC LATIN CAPITAL LETTER I GRAVE + want: "aaaai\u0307\u0300", + t: Lower(language.Lithuanian), + }, { + desc: "lt/upper/not implemented", + src: "I" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0300", + want: "", + err: transform.ErrEndOfSpan, + t: Upper(language.Lithuanian), + }, { + desc: "lt/upper/not implemented, ascii", + src: "AB", + want: "", + err: transform.ErrEndOfSpan, + t: Upper(language.Lithuanian), + }, { + desc: "nl/title/pre-IJ cutoff", + src: " IJ", + want: " IJ", + t: Title(language.Dutch), + }, { + desc: "nl/title/mid-IJ cutoff", + src: " Ia", + want: " Ia", + t: Title(language.Dutch), + }, { + desc: "af/title/apostrophe", + src: "'n Bietje", + want: "'n Bietje", + t: Title(language.Afrikaans), + }, { + desc: "af/title/apostrophe-incorrect", + src: "'N Bietje", + // The Single_Quote (a MidWord), needs to be retained as unspanned so + // that a successive call to Transform can detect that N should not be + // capitalized. + want: "", + err: transform.ErrEndOfSpan, + t: Title(language.Afrikaans), + }} { + testtext.Run(t, tt.desc, func(t *testing.T) { + for p := 0; p < len(tt.want); p += utf8.RuneLen([]rune(tt.src[p:])[0]) { + tt.t.Reset() + n, err := tt.t.Span([]byte(tt.src[:p]), false) + if err != nil && err != transform.ErrShortSrc { + t.Errorf("early failure:Span(%+q): %v (%d < %d)", tt.src[:p], err, n, len(tt.want)) + break + } + } + tt.t.Reset() + n, err := tt.t.Span([]byte(tt.src), tt.atEOF) + if n != len(tt.want) || err != tt.err { + t.Errorf("Span(%+q, %v): got %d, %v; want %d, %v", tt.src, tt.atEOF, n, err, len(tt.want), tt.err) + } + testHandover(t, tt.t, tt.src) + }) + } +} + +var txtASCII = strings.Repeat("The quick brown fox jumps over the lazy dog. ", 50) + +// Taken from http://creativecommons.org/licenses/by-sa/3.0/vn/ +const txt_vn = `Với các điều kiện sau: Ghi nhận công của tác giả. Nếu bạn sử +dụng, chuyển đổi, hoặc xây dựng dự án từ nội dung được chia sẻ này, bạn phải áp +dụng giấy phép này hoặc một giấy phép khác có các điều khoản tương tự như giấy +phép này cho dự án của bạn. Hiểu rằng: Miễn — Bất kỳ các điều kiện nào trên đây +cũng có thể được miễn bỏ nếu bạn được sự cho phép của người sở hữu bản quyền. +Phạm vi công chúng — Khi tác phẩm hoặc bất kỳ chương nào của tác phẩm đã trong +vùng dành cho công chúng theo quy định của pháp luật thì tình trạng của nó không +bị ảnh hưởng bởi giấy phép trong bất kỳ trường hợp nào.` + +// http://creativecommons.org/licenses/by-sa/2.5/cn/ +const txt_cn = `您可以自由: 复制、发行、展览、表演、放映、 +广播或通过信息网络传播本作品 创作演绎作品 +对本作品进行商业性使用 惟须遵守下列条件: +署名 — 您必须按照作者或者许可人指定的方式对作品进行署名。 +相同方式共享 — 如果您改变、转换本作品或者以本作品为基础进行创作, +您只能采用与本协议相同的许可协议发布基于本作品的演绎作品。` + +// Taken from http://creativecommons.org/licenses/by-sa/1.0/deed.ru +const txt_ru = `При обязательном соблюдении следующих условий: Attribution — Вы +должны атрибутировать произведение (указывать автора и источник) в порядке, +предусмотренном автором или лицензиаром (но только так, чтобы никоим образом не +подразумевалось, что они поддерживают вас или использование вами данного +произведения). Υπό τις ακόλουθες προϋποθέσεις:` + +// Taken from http://creativecommons.org/licenses/by-sa/3.0/gr/ +const txt_gr = `Αναφορά Δημιουργού — Θα πρέπει να κάνετε την αναφορά στο έργο με +τον τρόπο που έχει οριστεί από το δημιουργό ή το χορηγούντο την άδεια (χωρίς +όμως να εννοείται με οποιονδήποτε τρόπο ότι εγκρίνουν εσάς ή τη χρήση του έργου +από εσάς). Παρόμοια Διανομή — Εάν αλλοιώσετε, τροποποιήσετε ή δημιουργήσετε +περαιτέρω βασισμένοι στο έργο θα μπορείτε να διανέμετε το έργο που θα προκύψει +μόνο με την ίδια ή παρόμοια άδεια.` + +const txtNonASCII = txt_vn + txt_cn + txt_ru + txt_gr + +// TODO: Improve ASCII performance. + +func BenchmarkCasers(b *testing.B) { + for _, s := range []struct{ name, text string }{ + {"ascii", txtASCII}, + {"nonASCII", txtNonASCII}, + {"short", "При"}, + } { + src := []byte(s.text) + // Measure case mappings in bytes package for comparison. + for _, f := range []struct { + name string + fn func(b []byte) []byte + }{ + {"lower", bytes.ToLower}, + {"title", bytes.ToTitle}, + {"upper", bytes.ToUpper}, + } { + testtext.Bench(b, path.Join(s.name, "bytes", f.name), func(b *testing.B) { + b.SetBytes(int64(len(src))) + for i := 0; i < b.N; i++ { + f.fn(src) + } + }) + } + for _, t := range []struct { + name string + caser transform.SpanningTransformer + }{ + {"fold/default", Fold()}, + {"upper/default", Upper(language.Und)}, + {"lower/sigma", Lower(language.Und)}, + {"lower/simple", Lower(language.Und, HandleFinalSigma(false))}, + {"title/sigma", Title(language.Und)}, + {"title/simple", Title(language.Und, HandleFinalSigma(false))}, + } { + c := Caser{t.caser} + dst := make([]byte, len(src)) + testtext.Bench(b, path.Join(s.name, t.name, "transform"), func(b *testing.B) { + b.SetBytes(int64(len(src))) + for i := 0; i < b.N; i++ { + c.Reset() + c.Transform(dst, src, true) + } + }) + // No need to check span for simple cases, as they will be the same + // as sigma. + if strings.HasSuffix(t.name, "/simple") { + continue + } + spanSrc := c.Bytes(src) + testtext.Bench(b, path.Join(s.name, t.name, "span"), func(b *testing.B) { + c.Reset() + if n, _ := c.Span(spanSrc, true); n < len(spanSrc) { + b.Fatalf("spanner is not recognizing text %q as done (at %d)", spanSrc, n) + } + b.SetBytes(int64(len(spanSrc))) + for i := 0; i < b.N; i++ { + c.Reset() + c.Span(spanSrc, true) + } + }) + } + } +} -- cgit v1.2.3-1-g7c22