// Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package utf32 import ( "testing" "golang.org/x/text/encoding" "golang.org/x/text/encoding/internal/enctest" "golang.org/x/text/transform" ) var ( utf32LEIB = UTF32(LittleEndian, IgnoreBOM) // UTF-32LE (atypical interpretation) utf32LEUB = UTF32(LittleEndian, UseBOM) // UTF-32, LE // utf32LEEB = UTF32(LittleEndian, ExpectBOM) // UTF-32, LE, Expect - covered in encoding_test.go utf32BEIB = UTF32(BigEndian, IgnoreBOM) // UTF-32BE (atypical interpretation) utf32BEUB = UTF32(BigEndian, UseBOM) // UTF-32 default utf32BEEB = UTF32(BigEndian, ExpectBOM) // UTF-32 Expect ) func TestBasics(t *testing.T) { testCases := []struct { e encoding.Encoding encPrefix string encSuffix string encoded string utf8 string }{{ e: utf32BEIB, encoded: "\x00\x00\x00\x57\x00\x00\x00\xe4\x00\x01\xd5\x65", utf8: "\x57\u00e4\U0001d565", }, { e: UTF32(BigEndian, ExpectBOM), encPrefix: "\x00\x00\xfe\xff", encoded: "\x00\x00\x00\x57\x00\x00\x00\xe4\x00\x01\xd5\x65", utf8: "\x57\u00e4\U0001d565", }, { e: UTF32(LittleEndian, IgnoreBOM), encoded: "\x57\x00\x00\x00\xe4\x00\x00\x00\x65\xd5\x01\x00", utf8: "\x57\u00e4\U0001d565", }, { e: UTF32(LittleEndian, ExpectBOM), encPrefix: "\xff\xfe\x00\x00", encoded: "\x57\x00\x00\x00\xe4\x00\x00\x00\x65\xd5\x01\x00", utf8: "\x57\u00e4\U0001d565", }} for _, tc := range testCases { enctest.TestEncoding(t, tc.e, tc.encoded, tc.utf8, tc.encPrefix, tc.encSuffix) } } func TestFiles(t *testing.T) { enctest.TestFile(t, utf32BEIB) } func BenchmarkEncoding(b *testing.B) { enctest.Benchmark(b, utf32BEIB) } func TestUTF32(t *testing.T) { testCases := []struct { desc string src string notEOF bool // the inverse of atEOF sizeDst int want string nSrc int err error t transform.Transformer }{{ desc: "utf-32 IgnoreBOM dec: empty string", t: utf32BEIB.NewDecoder(), }, { desc: "utf-32 UseBOM dec: empty string", t: utf32BEUB.NewDecoder(), }, { desc: "utf-32 ExpectBOM dec: empty string", err: ErrMissingBOM, t: utf32BEEB.NewDecoder(), }, { desc: "utf-32be dec: Doesn't interpret U+FEFF as BOM", src: "\x00\x00\xFE\xFF\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61", sizeDst: 100, want: "\uFEFF\U00012345=Ra", nSrc: 20, t: utf32BEIB.NewDecoder(), }, { desc: "utf-32be dec: Interprets little endian U+FEFF as invalid", src: "\xFF\xFE\x00\x00\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61", sizeDst: 100, want: "\uFFFD\U00012345=Ra", nSrc: 20, t: utf32BEIB.NewDecoder(), }, { desc: "utf-32le dec: Doesn't interpret U+FEFF as BOM", src: "\xFF\xFE\x00\x00\x45\x23\x01\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61\x00\x00\x00", sizeDst: 100, want: "\uFEFF\U00012345=Ra", nSrc: 20, t: utf32LEIB.NewDecoder(), }, { desc: "utf-32le dec: Interprets big endian U+FEFF as invalid", src: "\x00\x00\xFE\xFF\x45\x23\x01\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61\x00\x00\x00", sizeDst: 100, want: "\uFFFD\U00012345=Ra", nSrc: 20, t: utf32LEIB.NewDecoder(), }, { desc: "utf-32 enc: Writes big-endian BOM", src: "\U00012345=Ra", sizeDst: 100, want: "\x00\x00\xFE\xFF\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61", nSrc: 7, t: utf32BEUB.NewEncoder(), }, { desc: "utf-32 enc: Writes little-endian BOM", src: "\U00012345=Ra", sizeDst: 100, want: "\xFF\xFE\x00\x00\x45\x23\x01\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61\x00\x00\x00", nSrc: 7, t: utf32LEUB.NewEncoder(), }, { desc: "utf-32 dec: Interprets text using big-endian default when BOM not present", src: "\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61", sizeDst: 100, want: "\U00012345=Ra", nSrc: 16, t: utf32BEUB.NewDecoder(), }, { desc: "utf-32 dec: Interprets text using little-endian default when BOM not present", src: "\x45\x23\x01\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61\x00\x00\x00", sizeDst: 100, want: "\U00012345=Ra", nSrc: 16, t: utf32LEUB.NewDecoder(), }, { desc: "utf-32 dec: BOM determines encoding BE", src: "\x00\x00\xFE\xFF\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61", sizeDst: 100, want: "\U00012345=Ra", nSrc: 20, t: utf32BEUB.NewDecoder(), }, { desc: "utf-32 dec: BOM determines encoding LE", src: "\xFF\xFE\x00\x00\x45\x23\x01\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61\x00\x00\x00", sizeDst: 100, want: "\U00012345=Ra", nSrc: 20, t: utf32LEUB.NewDecoder(), }, { desc: "utf-32 dec: BOM determines encoding LE, change default", src: "\xFF\xFE\x00\x00\x45\x23\x01\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61\x00\x00\x00", sizeDst: 100, want: "\U00012345=Ra", nSrc: 20, t: utf32BEUB.NewDecoder(), }, { desc: "utf-32 dec: BOM determines encoding BE, change default", src: "\x00\x00\xFE\xFF\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61", sizeDst: 100, want: "\U00012345=Ra", nSrc: 20, t: utf32LEUB.NewDecoder(), }, { desc: "utf-32 dec: Don't change big-endian byte order mid-stream", src: "\x00\x01\x23\x45\x00\x00\x00\x3D\xFF\xFE\x00\x00\x00\x00\xFE\xFF\x00\x00\x00\x52\x00\x00\x00\x61", sizeDst: 100, want: "\U00012345=\uFFFD\uFEFFRa", nSrc: 24, t: utf32BEUB.NewDecoder(), }, { desc: "utf-32 dec: Don't change little-endian byte order mid-stream", src: "\x45\x23\x01\x00\x3D\x00\x00\x00\x00\x00\xFE\xFF\xFF\xFE\x00\x00\x52\x00\x00\x00\x61\x00\x00\x00", sizeDst: 100, want: "\U00012345=\uFFFD\uFEFFRa", nSrc: 24, t: utf32LEUB.NewDecoder(), }, { desc: "utf-32 dec: Fail on missing BOM when required", src: "\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61", sizeDst: 100, want: "", nSrc: 0, err: ErrMissingBOM, t: utf32BEEB.NewDecoder(), }, { desc: "utf-32 enc: Short dst", src: "\U00012345=Ra", sizeDst: 15, want: "\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52", nSrc: 6, err: transform.ErrShortDst, t: utf32BEIB.NewEncoder(), }, { desc: "utf-32 enc: Short src", src: "\U00012345=Ra\xC2", notEOF: true, sizeDst: 100, want: "\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61", nSrc: 7, err: transform.ErrShortSrc, t: utf32BEIB.NewEncoder(), }, { desc: "utf-32 enc: Invalid input", src: "\x80\xC1\xC2\x7F\xC2", sizeDst: 100, want: "\x00\x00\xFF\xFD\x00\x00\xFF\xFD\x00\x00\xFF\xFD\x00\x00\x00\x7F\x00\x00\xFF\xFD", nSrc: 5, t: utf32BEIB.NewEncoder(), }, { desc: "utf-32 dec: Short dst", src: "\x00\x00\x00\x41", sizeDst: 0, want: "", nSrc: 0, err: transform.ErrShortDst, t: utf32BEIB.NewDecoder(), }, { desc: "utf-32 dec: Short src", src: "\x00\x00\x00", notEOF: true, sizeDst: 4, want: "", nSrc: 0, err: transform.ErrShortSrc, t: utf32BEIB.NewDecoder(), }, { desc: "utf-32 dec: Invalid input", src: "\x00\x00\xD8\x00\x00\x00\xDF\xFF\x00\x11\x00\x00\x00\x00\x00", sizeDst: 100, want: "\uFFFD\uFFFD\uFFFD\uFFFD", nSrc: 15, t: utf32BEIB.NewDecoder(), }} for i, tc := range testCases { b := make([]byte, tc.sizeDst) nDst, nSrc, err := tc.t.Transform(b, []byte(tc.src), !tc.notEOF) if err != tc.err { t.Errorf("%d:%s: error was %v; want %v", i, tc.desc, err, tc.err) } if got := string(b[:nDst]); got != tc.want { t.Errorf("%d:%s: result was %q: want %q", i, tc.desc, got, tc.want) } if nSrc != tc.nSrc { t.Errorf("%d:%s: nSrc was %d; want %d", i, tc.desc, nSrc, tc.nSrc) } } }