From e135cc99e1b93f41f67dbdfac214198a55f387a6 Mon Sep 17 00:00:00 2001 From: Adrian Date: Wed, 11 Jul 2018 18:12:46 +0200 Subject: Expose range information for markdown text nodes [WIP] (#9067) * Track positions of markdown text * Add tests for markdown text ranges --- utils/markdown/inlines.go | 51 +++++++++++++----- utils/markdown/text_range_test.go | 110 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 147 insertions(+), 14 deletions(-) create mode 100644 utils/markdown/text_range_test.go (limited to 'utils') diff --git a/utils/markdown/inlines.go b/utils/markdown/inlines.go index 03da2f15c..8a4f7a531 100644 --- a/utils/markdown/inlines.go +++ b/utils/markdown/inlines.go @@ -21,7 +21,8 @@ func (inlineBase) IsInline() bool { return true } type Text struct { inlineBase - Text string + Text string + Range Range } type CodeSpan struct { @@ -141,8 +142,10 @@ func (p *inlineParser) parseBackticks() { return } p.position += len(opening) + absPos := relativeToAbsolutePosition(p.ranges, p.position-len(opening)) p.inlines = append(p.inlines, &Text{ - Text: opening, + Text: opening, + Range: Range{absPos, absPos + len(opening)}, }) } @@ -162,13 +165,17 @@ func (p *inlineParser) parseLineEnding() { func (p *inlineParser) parseEscapeCharacter() { if p.position+1 < len(p.raw) && isEscapableByte(p.raw[p.position+1]) { + absPos := relativeToAbsolutePosition(p.ranges, p.position+1) p.inlines = append(p.inlines, &Text{ - Text: string(p.raw[p.position+1]), + Text: string(p.raw[p.position+1]), + Range: Range{absPos, absPos + len(string(p.raw[p.position+1]))}, }) p.position += 2 } else { + absPos := relativeToAbsolutePosition(p.ranges, p.position) p.inlines = append(p.inlines, &Text{ - Text: `\`, + Text: `\`, + Range: Range{absPos, absPos + 1}, }) p.position++ } @@ -176,18 +183,24 @@ func (p *inlineParser) parseEscapeCharacter() { func (p *inlineParser) parseText() { if next := strings.IndexAny(p.raw[p.position:], "\r\n\\`&![]"); next == -1 { + absPos := relativeToAbsolutePosition(p.ranges, p.position) p.inlines = append(p.inlines, &Text{ - Text: strings.TrimRightFunc(p.raw[p.position:], isWhitespace), + Text: strings.TrimRightFunc(p.raw[p.position:], isWhitespace), + Range: Range{absPos, absPos + len(p.raw[p.position:])}, }) p.position = len(p.raw) } else { + absPos := relativeToAbsolutePosition(p.ranges, p.position) if p.raw[p.position+next] == '\r' || p.raw[p.position+next] == '\n' { + s := strings.TrimRightFunc(p.raw[p.position:p.position+next], isWhitespace) p.inlines = append(p.inlines, &Text{ - Text: strings.TrimRightFunc(p.raw[p.position:p.position+next], isWhitespace), + Text: s, + Range: Range{absPos, absPos + len(s)}, }) } else { p.inlines = append(p.inlines, &Text{ - Text: p.raw[p.position : p.position+next], + Text: p.raw[p.position : p.position+next], + Range: Range{absPos, absPos + next}, }) } p.position += next @@ -195,9 +208,11 @@ func (p *inlineParser) parseText() { } func (p *inlineParser) parseLinkOrImageDelimiter() { + absPos := relativeToAbsolutePosition(p.ranges, p.position) if p.raw[p.position] == '[' { p.inlines = append(p.inlines, &Text{ - Text: "[", + Text: "[", + Range: Range{absPos, absPos + 1}, }) p.delimiterStack.PushBack(&delimiter{ Type: linkOpeningDelimiter, @@ -207,7 +222,8 @@ func (p *inlineParser) parseLinkOrImageDelimiter() { p.position++ } else if p.raw[p.position] == '!' && p.position+1 < len(p.raw) && p.raw[p.position+1] == '[' { p.inlines = append(p.inlines, &Text{ - Text: "![", + Text: "![", + Range: Range{absPos, absPos + 2}, }) p.delimiterStack.PushBack(&delimiter{ Type: imageOpeningDelimiter, @@ -217,7 +233,8 @@ func (p *inlineParser) parseLinkOrImageDelimiter() { p.position += 2 } else { p.inlines = append(p.inlines, &Text{ - Text: "!", + Text: "!", + Range: Range{absPos, absPos + 1}, }) p.position++ } @@ -347,8 +364,10 @@ func (p *inlineParser) lookForLinkOrImage() { break } } + absPos := relativeToAbsolutePosition(p.ranges, p.position) p.inlines = append(p.inlines, &Text{ - Text: "]", + Text: "]", + Range: Range{absPos, absPos + 1}, }) p.position++ } @@ -403,19 +422,23 @@ func CharacterReference(ref string) string { } func (p *inlineParser) parseCharacterReference() { + absPos := relativeToAbsolutePosition(p.ranges, p.position) p.position++ if semicolon := strings.IndexByte(p.raw[p.position:], ';'); semicolon == -1 { p.inlines = append(p.inlines, &Text{ - Text: "&", + Text: "&", + Range: Range{absPos, 1}, }) } else if s := CharacterReference(p.raw[p.position : p.position+semicolon]); s != "" { p.position += semicolon + 1 p.inlines = append(p.inlines, &Text{ - Text: s, + Text: s, + Range: Range{absPos, absPos + len(s)}, }) } else { p.inlines = append(p.inlines, &Text{ - Text: "&", + Text: "&", + Range: Range{absPos, 1}, }) } } diff --git a/utils/markdown/text_range_test.go b/utils/markdown/text_range_test.go new file mode 100644 index 000000000..9c0efea85 --- /dev/null +++ b/utils/markdown/text_range_test.go @@ -0,0 +1,110 @@ +// Copyright (c) 2018-present Mattermost, Inc. All Rights Reserved. +// See License.txt for license information. + +package markdown + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestTextRanges(t *testing.T) { + for name, tc := range map[string]struct { + Markdown string + ExpectedRanges []Range + ExpectedValues []string + }{ + "simple": { + Markdown: "hello", + ExpectedRanges: []Range{{0, 5}}, + ExpectedValues: []string{"hello"}, + }, + "simple2": { + Markdown: "hello!", + ExpectedRanges: []Range{{0, 5}, {5, 6}}, + ExpectedValues: []string{"hello", "!"}, + }, + "multiline": { + Markdown: "hello world\nfoobar", + ExpectedRanges: []Range{{0, 11}, {12, 18}}, + ExpectedValues: []string{"hello world", "foobar"}, + }, + "code": { + Markdown: "hello `code` world", + ExpectedRanges: []Range{{0, 6}, {12, 18}}, + ExpectedValues: []string{"hello ", " world"}, + }, + "notcode": { + Markdown: "hello ` world", + ExpectedRanges: []Range{{0, 6}, {6, 7}, {7, 13}}, + ExpectedValues: []string{"hello ", "`", " world"}, + }, + "escape": { + Markdown: "\\*hello\\*", + ExpectedRanges: []Range{{1, 2}, {2, 7}, {8, 9}}, + ExpectedValues: []string{"*", "hello", "*"}, + }, + "escapeescape": { + Markdown: "\\\\", + ExpectedRanges: []Range{{1, 2}}, + ExpectedValues: []string{"\\"}, + }, + "notescape": { + Markdown: "foo\\x", + ExpectedRanges: []Range{{0, 3}, {3, 4}, {4, 5}}, + ExpectedValues: []string{"foo", "\\", "x"}, + }, + "notlink": { + Markdown: "[foo", + ExpectedRanges: []Range{{0, 1}, {1, 4}}, + ExpectedValues: []string{"[", "foo"}, + }, + "notlinkend": { + Markdown: "[foo]", + ExpectedRanges: []Range{{0, 1}, {1, 4}, {4, 5}}, + ExpectedValues: []string{"[", "foo", "]"}, + }, + "notimage": { + Markdown: "![foo", + ExpectedRanges: []Range{{0, 2}, {2, 5}}, + ExpectedValues: []string{"![", "foo"}, + }, + "notimage2": { + Markdown: "!foo", + ExpectedRanges: []Range{{0, 1}, {1, 4}}, + ExpectedValues: []string{"!", "foo"}, + }, + "charref": { + Markdown: ""test", + ExpectedRanges: []Range{{0, 1}, {6, 10}}, + ExpectedValues: []string{"\"", "test"}, + }, + "notcharref": { + Markdown: "& test", + ExpectedRanges: []Range{{0, 1}, {1, 9}}, + ExpectedValues: []string{"&", "amp test"}, + }, + "notcharref2": { + Markdown: "&mattermost;", + ExpectedRanges: []Range{{0, 1}, {1, 12}}, + ExpectedValues: []string{"&", "mattermost;"}, + }, + } { + t.Run(name, func(t *testing.T) { + var ranges []Range + var values []string + Inspect(tc.Markdown, func(node interface{}) bool { + if textNode, ok := node.(*Text); ok { + ranges = append(ranges, textNode.Range) + values = append(values, textNode.Text) + } + return true + }) + assert.Equal(t, ranges, tc.ExpectedRanges) + assert.Equal(t, values, tc.ExpectedValues) + + }) + } + +} -- cgit v1.2.3-1-g7c22