From 961c04cae992eadb42d286d2f85f8a675bdc68c8 Mon Sep 17 00:00:00 2001 From: Christopher Speller Date: Mon, 29 Jan 2018 14:17:40 -0800 Subject: Upgrading server dependancies (#8154) --- vendor/github.com/cpanato/html2text/html2text.go | 312 ----------------------- 1 file changed, 312 deletions(-) delete mode 100644 vendor/github.com/cpanato/html2text/html2text.go (limited to 'vendor/github.com/cpanato/html2text/html2text.go') diff --git a/vendor/github.com/cpanato/html2text/html2text.go b/vendor/github.com/cpanato/html2text/html2text.go deleted file mode 100644 index 61774e8a0..000000000 --- a/vendor/github.com/cpanato/html2text/html2text.go +++ /dev/null @@ -1,312 +0,0 @@ -package html2text - -import ( - "bytes" - "io" - "io/ioutil" - "regexp" - "strings" - "unicode" - - "github.com/dimchansky/utfbom" - - "golang.org/x/net/html" - "golang.org/x/net/html/atom" -) - -var ( - spacingRe = regexp.MustCompile(`[ \r\n\t]+`) - newlineRe = regexp.MustCompile(`\n\n+`) -) - -type textifyTraverseCtx struct { - Buf bytes.Buffer - - prefix string - blockquoteLevel int - lineLength int - endsWithSpace bool - endsWithNewline bool - justClosedDiv bool -} - -func (ctx *textifyTraverseCtx) traverse(node *html.Node) error { - switch node.Type { - default: - return ctx.traverseChildren(node) - - case html.TextNode: - data := strings.Trim(spacingRe.ReplaceAllString(node.Data, " "), " ") - return ctx.emit(data) - - case html.ElementNode: - return ctx.handleElementNode(node) - } -} - -func (ctx *textifyTraverseCtx) handleElementNode(node *html.Node) error { - ctx.justClosedDiv = false - switch node.DataAtom { - case atom.Br: - return ctx.emit("\n") - - case atom.H1, atom.H2, atom.H3: - subCtx := textifyTraverseCtx{} - if err := subCtx.traverseChildren(node); err != nil { - return err - } - - str := subCtx.Buf.String() - dividerLen := 0 - for _, line := range strings.Split(str, "\n") { - if lineLen := len([]rune(line)); lineLen-1 > dividerLen { - dividerLen = lineLen - 1 - } - } - divider := "" - if node.DataAtom == atom.H1 { - divider = strings.Repeat("*", dividerLen) - } else { - divider = strings.Repeat("-", dividerLen) - } - - if node.DataAtom == atom.H3 { - return ctx.emit("\n\n" + str + "\n" + divider + "\n\n") - } - return ctx.emit("\n\n" + divider + "\n" + str + "\n" + divider + "\n\n") - - case atom.Blockquote: - ctx.blockquoteLevel++ - ctx.prefix = strings.Repeat(">", ctx.blockquoteLevel) + " " - if err := ctx.emit("\n"); err != nil { - return err - } - if ctx.blockquoteLevel == 1 { - if err := ctx.emit("\n"); err != nil { - return err - } - } - if err := ctx.traverseChildren(node); err != nil { - return err - } - ctx.blockquoteLevel-- - ctx.prefix = strings.Repeat(">", ctx.blockquoteLevel) - if ctx.blockquoteLevel > 0 { - ctx.prefix += " " - } - return ctx.emit("\n\n") - - case atom.Div: - if ctx.lineLength > 0 { - if err := ctx.emit("\n"); err != nil { - return err - } - } - if err := ctx.traverseChildren(node); err != nil { - return err - } - var err error - if ctx.justClosedDiv == false { - err = ctx.emit("\n") - } - ctx.justClosedDiv = true - return err - - case atom.Li: - if err := ctx.emit("* "); err != nil { - return err - } - - if err := ctx.traverseChildren(node); err != nil { - return err - } - - return ctx.emit("\n") - - case atom.B, atom.Strong: - subCtx := textifyTraverseCtx{} - subCtx.endsWithSpace = true - if err := subCtx.traverseChildren(node); err != nil { - return err - } - str := subCtx.Buf.String() - return ctx.emit("*" + str + "*") - - case atom.A: - // If image is the only child, take its alt text as the link text - if img := node.FirstChild; img != nil && node.LastChild == img && img.DataAtom == atom.Img { - if altText := getAttrVal(img, "alt"); altText != "" { - ctx.emit(altText) - } - } else if err := ctx.traverseChildren(node); err != nil { - return err - } - - hrefLink := "" - if attrVal := getAttrVal(node, "href"); attrVal != "" { - attrVal = ctx.normalizeHrefLink(attrVal) - if attrVal != "" { - hrefLink = "( " + attrVal + " )" - } - } - - return ctx.emit(hrefLink) - - case atom.P, atom.Ul, atom.Table: - if err := ctx.emit("\n\n"); err != nil { - return err - } - - if err := ctx.traverseChildren(node); err != nil { - return err - } - - return ctx.emit("\n\n") - - case atom.Tr: - if err := ctx.traverseChildren(node); err != nil { - return err - } - - return ctx.emit("\n") - - case atom.Style, atom.Script, atom.Head: - // Ignore the subtree - return nil - - default: - return ctx.traverseChildren(node) - } -} -func (ctx *textifyTraverseCtx) traverseChildren(node *html.Node) error { - for c := node.FirstChild; c != nil; c = c.NextSibling { - if err := ctx.traverse(c); err != nil { - return err - } - } - - return nil -} - -func (ctx *textifyTraverseCtx) emit(data string) error { - if len(data) == 0 { - return nil - } - lines := ctx.breakLongLines(data) - var err error - for _, line := range lines { - runes := []rune(line) - startsWithSpace := unicode.IsSpace(runes[0]) - if !startsWithSpace && !ctx.endsWithSpace { - ctx.Buf.WriteByte(' ') - ctx.lineLength++ - } - ctx.endsWithSpace = unicode.IsSpace(runes[len(runes)-1]) - for _, c := range line { - _, err = ctx.Buf.WriteString(string(c)) - if err != nil { - return err - } - ctx.lineLength++ - if c == '\n' { - ctx.lineLength = 0 - if ctx.prefix != "" { - _, err = ctx.Buf.WriteString(ctx.prefix) - if err != nil { - return err - } - } - } - } - } - return nil -} - -func (ctx *textifyTraverseCtx) breakLongLines(data string) []string { - // only break lines when we are in blockquotes - if ctx.blockquoteLevel == 0 { - return []string{data} - } - var ret []string - runes := []rune(data) - l := len(runes) - existing := ctx.lineLength - if existing >= 74 { - ret = append(ret, "\n") - existing = 0 - } - for l+existing > 74 { - i := 74 - existing - for i >= 0 && !unicode.IsSpace(runes[i]) { - i-- - } - if i == -1 { - // no spaces, so go the other way - i = 74 - existing - for i < l && !unicode.IsSpace(runes[i]) { - i++ - } - } - ret = append(ret, string(runes[:i])+"\n") - for i < l && unicode.IsSpace(runes[i]) { - i++ - } - runes = runes[i:] - l = len(runes) - existing = 0 - } - if len(runes) > 0 { - ret = append(ret, string(runes)) - } - return ret -} - -func (ctx *textifyTraverseCtx) normalizeHrefLink(link string) string { - link = strings.TrimSpace(link) - link = strings.TrimPrefix(link, "mailto:") - return link -} - -func getAttrVal(node *html.Node, attrName string) string { - for _, attr := range node.Attr { - if attr.Key == attrName { - return attr.Val - } - } - - return "" -} - -func FromHtmlNode(doc *html.Node) (string, error) { - ctx := textifyTraverseCtx{ - Buf: bytes.Buffer{}, - } - if err := ctx.traverse(doc); err != nil { - return "", err - } - - text := strings.TrimSpace(newlineRe.ReplaceAllString( - strings.Replace(ctx.Buf.String(), "\n ", "\n", -1), "\n\n")) - return text, nil - -} - -func FromReader(reader io.Reader) (string, error) { - bs, err := ioutil.ReadAll(reader) - newReader, _ := utfbom.Skip(bytes.NewReader(bs)) - - doc, err := html.Parse(newReader) - if err != nil { - return "", err - } - return FromHtmlNode(doc) -} - -func FromString(input string) (string, error) { - bs := utfbom.SkipOnly(bytes.NewReader([]byte(input))) - text, err := FromReader(bs) - if err != nil { - return "", err - } - return text, nil -} -- cgit v1.2.3-1-g7c22