From c8b99b97dffbc7c96b3911f6301f0ec69399cea2 Mon Sep 17 00:00:00 2001 From: Carlos Tadeu Panato Junior Date: Thu, 14 Jun 2018 19:27:21 +0200 Subject: Update html2text lib to use the original repo (#8900) --- vendor/github.com/dimchansky/utfbom/utfbom.go | 174 -------------------------- 1 file changed, 174 deletions(-) delete mode 100644 vendor/github.com/dimchansky/utfbom/utfbom.go (limited to 'vendor/github.com/dimchansky/utfbom/utfbom.go') diff --git a/vendor/github.com/dimchansky/utfbom/utfbom.go b/vendor/github.com/dimchansky/utfbom/utfbom.go deleted file mode 100644 index 648184a12..000000000 --- a/vendor/github.com/dimchansky/utfbom/utfbom.go +++ /dev/null @@ -1,174 +0,0 @@ -// Package utfbom implements the detection of the BOM (Unicode Byte Order Mark) and removing as necessary. -// It wraps an io.Reader object, creating another object (Reader) that also implements the io.Reader -// interface but provides automatic BOM checking and removing as necessary. -package utfbom - -import ( - "errors" - "io" -) - -// Encoding is type alias for detected UTF encoding. -type Encoding int - -// Constants to identify detected UTF encodings. -const ( - // Unknown encoding, returned when no BOM was detected - Unknown Encoding = iota - - // UTF8, BOM bytes: EF BB BF - UTF8 - - // UTF-16, big-endian, BOM bytes: FE FF - UTF16BigEndian - - // UTF-16, little-endian, BOM bytes: FF FE - UTF16LittleEndian - - // UTF-32, big-endian, BOM bytes: 00 00 FE FF - UTF32BigEndian - - // UTF-32, little-endian, BOM bytes: FF FE 00 00 - UTF32LittleEndian -) - -const maxConsecutiveEmptyReads = 100 - -// Skip creates Reader which automatically detects BOM (Unicode Byte Order Mark) and removes it as necessary. -// It also returns the encoding detected by the BOM. -// If the detected encoding is not needed, you can call the SkipOnly function. -func Skip(rd io.Reader) (*Reader, Encoding) { - // Is it already a Reader? - b, ok := rd.(*Reader) - if ok { - return b, Unknown - } - - enc, left, err := detectUtf(rd) - return &Reader{ - rd: rd, - buf: left, - err: err, - }, enc -} - -// SkipOnly creates Reader which automatically detects BOM (Unicode Byte Order Mark) and removes it as necessary. -func SkipOnly(rd io.Reader) *Reader { - r, _ := Skip(rd) - return r -} - -// Reader implements automatic BOM (Unicode Byte Order Mark) checking and -// removing as necessary for an io.Reader object. -type Reader struct { - rd io.Reader // reader provided by the client - buf []byte // buffered data - err error // last error -} - -// Read is an implementation of io.Reader interface. -// The bytes are taken from the underlying Reader, but it checks for BOMs, removing them as necessary. -func (r *Reader) Read(p []byte) (n int, err error) { - if len(p) == 0 { - return 0, nil - } - - if r.buf == nil { - if r.err != nil { - return 0, r.readErr() - } - - return r.rd.Read(p) - } - - // copy as much as we can - n = copy(p, r.buf) - r.buf = nilIfEmpty(r.buf[n:]) - return n, nil -} - -func (r *Reader) readErr() error { - err := r.err - r.err = nil - return err -} - -var errNegativeRead = errors.New("utfbom: reader returned negative count from Read") - -func detectUtf(rd io.Reader) (enc Encoding, buf []byte, err error) { - buf, err = readBOM(rd) - - if len(buf) >= 4 { - if isUTF32BigEndianBOM4(buf) { - return UTF32BigEndian, nilIfEmpty(buf[4:]), err - } - if isUTF32LittleEndianBOM4(buf) { - return UTF32LittleEndian, nilIfEmpty(buf[4:]), err - } - } - - if len(buf) > 2 && isUTF8BOM3(buf) { - return UTF8, nilIfEmpty(buf[3:]), err - } - - if (err != nil && err != io.EOF) || (len(buf) < 2) { - return Unknown, nilIfEmpty(buf), err - } - - if isUTF16BigEndianBOM2(buf) { - return UTF16BigEndian, nilIfEmpty(buf[2:]), err - } - if isUTF16LittleEndianBOM2(buf) { - return UTF16LittleEndian, nilIfEmpty(buf[2:]), err - } - - return Unknown, nilIfEmpty(buf), err -} - -func readBOM(rd io.Reader) (buf []byte, err error) { - const maxBOMSize = 4 - var bom [maxBOMSize]byte // used to read BOM - - // read as many bytes as possible - for nEmpty, n := 0, 0; err == nil && len(buf) < maxBOMSize; buf = bom[:len(buf)+n] { - if n, err = rd.Read(bom[len(buf):]); n < 0 { - panic(errNegativeRead) - } - if n > 0 { - nEmpty = 0 - } else { - nEmpty++ - if nEmpty >= maxConsecutiveEmptyReads { - err = io.ErrNoProgress - } - } - } - return -} - -func isUTF32BigEndianBOM4(buf []byte) bool { - return buf[0] == 0x00 && buf[1] == 0x00 && buf[2] == 0xFE && buf[3] == 0xFF -} - -func isUTF32LittleEndianBOM4(buf []byte) bool { - return buf[0] == 0xFF && buf[1] == 0xFE && buf[2] == 0x00 && buf[3] == 0x00 -} - -func isUTF8BOM3(buf []byte) bool { - return buf[0] == 0xEF && buf[1] == 0xBB && buf[2] == 0xBF -} - -func isUTF16BigEndianBOM2(buf []byte) bool { - return buf[0] == 0xFE && buf[1] == 0xFF -} - -func isUTF16LittleEndianBOM2(buf []byte) bool { - return buf[0] == 0xFF && buf[1] == 0xFE -} - -func nilIfEmpty(buf []byte) (res []byte) { - if len(buf) > 0 { - res = buf - } - return -} -- cgit v1.2.3-1-g7c22