// Copyright (c) 2017-present Mattermost, Inc. All Rights Reserved. // See License.txt for license information. package markdown import ( "container/list" "strings" "unicode" "unicode/utf8" ) type Inline interface { IsInline() bool } type inlineBase struct{} func (inlineBase) IsInline() bool { return true } type Text struct { inlineBase Text string Range Range } type CodeSpan struct { inlineBase Code string } type HardLineBreak struct { inlineBase } type SoftLineBreak struct { inlineBase } type InlineLinkOrImage struct { inlineBase Children []Inline RawDestination Range markdown string rawTitle string } func (i *InlineLinkOrImage) Destination() string { return Unescape(i.markdown[i.RawDestination.Position:i.RawDestination.End]) } func (i *InlineLinkOrImage) Title() string { return Unescape(i.rawTitle) } type InlineLink struct { InlineLinkOrImage } type InlineImage struct { InlineLinkOrImage } type ReferenceLinkOrImage struct { inlineBase *ReferenceDefinition Children []Inline } type ReferenceLink struct { ReferenceLinkOrImage } type ReferenceImage struct { ReferenceLinkOrImage } type Autolink struct { inlineBase Children []Inline RawDestination Range markdown string } func (i *Autolink) Destination() string { destination := Unescape(i.markdown[i.RawDestination.Position:i.RawDestination.End]) if strings.HasPrefix(destination, "www") { destination = "http://" + destination } return destination } type delimiterType int const ( linkOpeningDelimiter delimiterType = iota imageOpeningDelimiter ) type delimiter struct { Type delimiterType IsInactive bool TextNode int Range Range } type inlineParser struct { markdown string ranges []Range referenceDefinitions []*ReferenceDefinition raw string position int inlines []Inline delimiterStack *list.List } func newInlineParser(markdown string, ranges []Range, referenceDefinitions []*ReferenceDefinition) *inlineParser { return &inlineParser{ markdown: markdown, ranges: ranges, referenceDefinitions: referenceDefinitions, delimiterStack: list.New(), } } func (p *inlineParser) parseBackticks() { count := 1 for i := p.position + 1; i < len(p.raw) && p.raw[i] == '`'; i++ { count++ } opening := p.raw[p.position : p.position+count] search := p.position + count for search < len(p.raw) { end := strings.Index(p.raw[search:], opening) if end == -1 { break } if search+end+count < len(p.raw) && p.raw[search+end+count] == '`' { search += end + count for search < len(p.raw) && p.raw[search] == '`' { search++ } continue } code := strings.Join(strings.Fields(p.raw[p.position+count:search+end]), " ") p.position = search + end + count p.inlines = append(p.inlines, &CodeSpan{ Code: code, }) return } p.position += len(opening) absPos := relativeToAbsolutePosition(p.ranges, p.position-len(opening)) p.inlines = append(p.inlines, &Text{ Text: opening, Range: Range{absPos, absPos + len(opening)}, }) } func (p *inlineParser) parseLineEnding() { if p.position >= 1 && p.raw[p.position-1] == '\t' { p.inlines = append(p.inlines, &HardLineBreak{}) } else if p.position >= 2 && p.raw[p.position-1] == ' ' && (p.raw[p.position-2] == '\t' || p.raw[p.position-1] == ' ') { p.inlines = append(p.inlines, &HardLineBreak{}) } else { p.inlines = append(p.inlines, &SoftLineBreak{}) } p.position++ if p.position < len(p.raw) && p.raw[p.position] == '\n' { p.position++ } } func (p *inlineParser) parseEscapeCharacter() { if p.position+1 < len(p.raw) && isEscapableByte(p.raw[p.position+1]) { absPos := relativeToAbsolutePosition(p.ranges, p.position+1) p.inlines = append(p.inlines, &Text{ Text: string(p.raw[p.position+1]), Range: Range{absPos, absPos + len(string(p.raw[p.position+1]))}, }) p.position += 2 } else { absPos := relativeToAbsolutePosition(p.ranges, p.position) p.inlines = append(p.inlines, &Text{ Text: `\`, Range: Range{absPos, absPos + 1}, }) p.position++ } } func (p *inlineParser) parseText() { if next := strings.IndexAny(p.raw[p.position:], "\r\n\\`&![]wW:"); next == -1 { absPos := relativeToAbsolutePosition(p.ranges, p.position) p.inlines = append(p.inlines, &Text{ Text: strings.TrimRightFunc(p.raw[p.position:], isWhitespace), Range: Range{absPos, absPos + len(p.raw[p.position:])}, }) p.position = len(p.raw) } else { absPos := relativeToAbsolutePosition(p.ranges, p.position) if p.raw[p.position+next] == '\r' || p.raw[p.position+next] == '\n' { s := strings.TrimRightFunc(p.raw[p.position:p.position+next], isWhitespace) p.inlines = append(p.inlines, &Text{ Text: s, Range: Range{absPos, absPos + len(s)}, }) } else { if next == 0 { // Always read at least one character since 'w', 'W', and ':' may not actually match another // type of node next = 1 } p.inlines = append(p.inlines, &Text{ Text: p.raw[p.position : p.position+next], Range: Range{absPos, absPos + next}, }) } p.position += next } } func (p *inlineParser) parseLinkOrImageDelimiter() { absPos := relativeToAbsolutePosition(p.ranges, p.position) if p.raw[p.position] == '[' { p.inlines = append(p.inlines, &Text{ Text: "[", Range: Range{absPos, absPos + 1}, }) p.delimiterStack.PushBack(&delimiter{ Type: linkOpeningDelimiter, TextNode: len(p.inlines) - 1, Range: Range{p.position, p.position + 1}, }) p.position++ } else if p.raw[p.position] == '!' && p.position+1 < len(p.raw) && p.raw[p.position+1] == '[' { p.inlines = append(p.inlines, &Text{ Text: "![", Range: Range{absPos, absPos + 2}, }) p.delimiterStack.PushBack(&delimiter{ Type: imageOpeningDelimiter, TextNode: len(p.inlines) - 1, Range: Range{p.position, p.position + 2}, }) p.position += 2 } else { p.inlines = append(p.inlines, &Text{ Text: "!", Range: Range{absPos, absPos + 1}, }) p.position++ } } func (p *inlineParser) peekAtInlineLinkDestinationAndTitle(position int, isImage bool) (destination, title Range, end int, ok bool) { if position >= len(p.raw) || p.raw[position] != '(' { return } position++ destinationStart := nextNonWhitespace(p.raw, position) if destinationStart >= len(p.raw) { return } else if p.raw[destinationStart] == ')' { return Range{destinationStart, destinationStart}, Range{destinationStart, destinationStart}, destinationStart + 1, true } destination, end, ok = parseLinkDestination(p.raw, destinationStart) if !ok { return } position = end if isImage && position < len(p.raw) && isWhitespaceByte(p.raw[position]) { dimensionsStart := nextNonWhitespace(p.raw, position) if dimensionsStart >= len(p.raw) { return } if p.raw[dimensionsStart] == '=' { // Read optional image dimensions even if we don't use them _, end, ok = parseImageDimensions(p.raw, dimensionsStart) if !ok { return } position = end } } if position < len(p.raw) && isWhitespaceByte(p.raw[position]) { titleStart := nextNonWhitespace(p.raw, position) if titleStart >= len(p.raw) { return } else if p.raw[titleStart] == ')' { return destination, Range{titleStart, titleStart}, titleStart + 1, true } if p.raw[titleStart] == '"' || p.raw[titleStart] == '\'' || p.raw[titleStart] == '(' { title, end, ok = parseLinkTitle(p.raw, titleStart) if !ok { return } position = end } } closingPosition := nextNonWhitespace(p.raw, position) if closingPosition >= len(p.raw) || p.raw[closingPosition] != ')' { return Range{}, Range{}, 0, false } return destination, title, closingPosition + 1, true } func (p *inlineParser) referenceDefinition(label string) *ReferenceDefinition { clean := strings.Join(strings.Fields(label), " ") for _, d := range p.referenceDefinitions { if strings.EqualFold(clean, strings.Join(strings.Fields(d.Label()), " ")) { return d } } return nil } func (p *inlineParser) lookForLinkOrImage() { for element := p.delimiterStack.Back(); element != nil; element = element.Prev() { d := element.Value.(*delimiter) if d.Type != imageOpeningDelimiter && d.Type != linkOpeningDelimiter { continue } if d.IsInactive { p.delimiterStack.Remove(element) break } isImage := d.Type == imageOpeningDelimiter var inline Inline if destination, title, next, ok := p.peekAtInlineLinkDestinationAndTitle(p.position+1, isImage); ok { destinationMarkdownPosition := relativeToAbsolutePosition(p.ranges, destination.Position) linkOrImage := InlineLinkOrImage{ Children: append([]Inline(nil), p.inlines[d.TextNode+1:]...), RawDestination: Range{destinationMarkdownPosition, destinationMarkdownPosition + destination.End - destination.Position}, markdown: p.markdown, rawTitle: p.raw[title.Position:title.End], } if d.Type == imageOpeningDelimiter { inline = &InlineImage{linkOrImage} } else { inline = &InlineLink{linkOrImage} } p.position = next } else { referenceLabel := "" label, next, hasLinkLabel := parseLinkLabel(p.raw, p.position+1) if hasLinkLabel && label.End > label.Position { referenceLabel = p.raw[label.Position:label.End] } else { referenceLabel = p.raw[d.Range.End:p.position] if !hasLinkLabel { next = p.position + 1 } } if referenceLabel != "" { if reference := p.referenceDefinition(referenceLabel); reference != nil { linkOrImage := ReferenceLinkOrImage{ ReferenceDefinition: reference, Children: append([]Inline(nil), p.inlines[d.TextNode+1:]...), } if d.Type == imageOpeningDelimiter { inline = &ReferenceImage{linkOrImage} } else { inline = &ReferenceLink{linkOrImage} } p.position = next } } } if inline != nil { if d.Type == imageOpeningDelimiter { p.inlines = append(p.inlines[:d.TextNode], inline) } else { p.inlines = append(p.inlines[:d.TextNode], inline) for element := element.Prev(); element != nil; element = element.Prev() { if d := element.Value.(*delimiter); d.Type == linkOpeningDelimiter { d.IsInactive = true } } } p.delimiterStack.Remove(element) return } else { p.delimiterStack.Remove(element) break } } absPos := relativeToAbsolutePosition(p.ranges, p.position) p.inlines = append(p.inlines, &Text{ Text: "]", Range: Range{absPos, absPos + 1}, }) p.position++ } func CharacterReference(ref string) string { if ref == "" { return "" } if ref[0] == '#' { if len(ref) < 2 { return "" } n := 0 if ref[1] == 'X' || ref[1] == 'x' { if len(ref) < 3 { return "" } for i := 2; i < len(ref); i++ { if i > 9 { return "" } d := ref[i] switch { case d >= '0' && d <= '9': n = n*16 + int(d-'0') case d >= 'a' && d <= 'f': n = n*16 + 10 + int(d-'a') case d >= 'A' && d <= 'F': n = n*16 + 10 + int(d-'A') default: return "" } } } else { for i := 1; i < len(ref); i++ { if i > 8 || ref[i] < '0' || ref[i] > '9' { return "" } n = n*10 + int(ref[i]-'0') } } c := rune(n) if c == '\u0000' || !utf8.ValidRune(c) { return string(unicode.ReplacementChar) } return string(c) } if entity, ok := htmlEntities[ref]; ok { return entity } return "" } func (p *inlineParser) parseCharacterReference() { absPos := relativeToAbsolutePosition(p.ranges, p.position) p.position++ if semicolon := strings.IndexByte(p.raw[p.position:], ';'); semicolon == -1 { p.inlines = append(p.inlines, &Text{ Text: "&", Range: Range{absPos, absPos + 1}, }) } else if s := CharacterReference(p.raw[p.position : p.position+semicolon]); s != "" { p.position += semicolon + 1 p.inlines = append(p.inlines, &Text{ Text: s, Range: Range{absPos, absPos + len(s)}, }) } else { p.inlines = append(p.inlines, &Text{ Text: "&", Range: Range{absPos, absPos + 1}, }) } } func (p *inlineParser) parseAutolink(c rune) bool { for element := p.delimiterStack.Back(); element != nil; element = element.Prev() { d := element.Value.(*delimiter) if !d.IsInactive { return false } } var link Range if c == ':' { var ok bool link, ok = parseURLAutolink(p.raw, p.position) if !ok { return false } // Since the current position is at the colon, we have to rewind the parsing slightly so that // we don't duplicate the URL scheme rewind := strings.Index(p.raw[link.Position:link.End], ":") if rewind != -1 { lastInline := p.inlines[len(p.inlines)-1] lastText, ok := lastInline.(*Text) if !ok { // This should never occur since parseURLAutolink will only return a non-empty value // when the previous text ends in a valid URL protocol which would mean that the previous // node is a Text node return false } p.inlines = p.inlines[0 : len(p.inlines)-1] p.inlines = append(p.inlines, &Text{ Text: lastText.Text[:len(lastText.Text)-rewind], Range: Range{lastText.Range.Position, lastText.Range.End - rewind}, }) p.position -= rewind } } else if c == 'w' || c == 'W' { var ok bool link, ok = parseWWWAutolink(p.raw, p.position) if !ok { return false } } linkMarkdownPosition := relativeToAbsolutePosition(p.ranges, link.Position) linkRange := Range{linkMarkdownPosition, linkMarkdownPosition + link.End - link.Position} p.inlines = append(p.inlines, &Autolink{ Children: []Inline{ &Text{ Text: p.raw[link.Position:link.End], Range: linkRange, }, }, RawDestination: linkRange, markdown: p.markdown, }) p.position += (link.End - link.Position) return true } func (p *inlineParser) Parse() []Inline { for _, r := range p.ranges { p.raw += p.markdown[r.Position:r.End] } for p.position < len(p.raw) { c, _ := utf8.DecodeRuneInString(p.raw[p.position:]) switch c { case '\r', '\n': p.parseLineEnding() case '\\': p.parseEscapeCharacter() case '`': p.parseBackticks() case '&': p.parseCharacterReference() case '!', '[': p.parseLinkOrImageDelimiter() case ']': p.lookForLinkOrImage() case 'w', 'W', ':': matched := p.parseAutolink(c) if !matched { p.parseText() } default: p.parseText() } } return p.inlines } func ParseInlines(markdown string, ranges []Range, referenceDefinitions []*ReferenceDefinition) (inlines []Inline) { return newInlineParser(markdown, ranges, referenceDefinitions).Parse() } func MergeInlineText(inlines []Inline) []Inline { var ret []Inline for i, v := range inlines { // always add first node if i == 0 { ret = append(ret, v) continue } // not a text node? nothing to merge text, ok := v.(*Text) if !ok { ret = append(ret, v) continue } // previous node is not a text node? nothing to merge prevText, ok := ret[len(ret)-1].(*Text) if !ok { ret = append(ret, v) continue } // previous node is not right before this one if prevText.Range.End != text.Range.Position { ret = append(ret, v) continue } // we have two consecutive text nodes ret[len(ret)-1] = &Text{ Text: prevText.Text + text.Text, Range: Range{prevText.Range.Position, text.Range.End}, } } return ret } func Unescape(markdown string) string { ret := "" position := 0 for position < len(markdown) { c, cSize := utf8.DecodeRuneInString(markdown[position:]) switch c { case '\\': if position+1 < len(markdown) && isEscapableByte(markdown[position+1]) { ret += string(markdown[position+1]) position += 2 } else { ret += `\` position++ } case '&': position++ if semicolon := strings.IndexByte(markdown[position:], ';'); semicolon == -1 { ret += "&" } else if s := CharacterReference(markdown[position : position+semicolon]); s != "" { position += semicolon + 1 ret += s } else { ret += "&" } default: ret += string(c) position += cSize } } return ret }