summaryrefslogtreecommitdiffstats
path: root/utils
diff options
context:
space:
mode:
authorHarrison Healey <harrisonmhealey@gmail.com>2018-08-08 06:13:04 -0400
committerJesús Espino <jespinog@gmail.com>2018-08-08 12:13:04 +0200
commit99cf15b56eb561dc30def86ad7e3cd97af4c338c (patch)
tree18636fea7eee2674ae70ce8adf151e9be8cf826c /utils
parent935f0c5ff9a7e3dea3db70f4df59d0db52543f29 (diff)
downloadchat-99cf15b56eb561dc30def86ad7e3cd97af4c338c.tar.gz
chat-99cf15b56eb561dc30def86ad7e3cd97af4c338c.tar.bz2
chat-99cf15b56eb561dc30def86ad7e3cd97af4c338c.zip
Update serverside markdown parser to respect unicode and capitalized links (#9235)
Diffstat (limited to 'utils')
-rw-r--r--utils/markdown/autolink.go44
-rw-r--r--utils/markdown/autolink_test.go150
-rw-r--r--utils/markdown/commonmark_test.go2
-rw-r--r--utils/markdown/html.go2
-rw-r--r--utils/markdown/inlines.go53
5 files changed, 173 insertions, 78 deletions
diff --git a/utils/markdown/autolink.go b/utils/markdown/autolink.go
index 16c40e609..7f7d1117f 100644
--- a/utils/markdown/autolink.go
+++ b/utils/markdown/autolink.go
@@ -16,27 +16,27 @@ var (
DefaultUrlSchemes = []string{"http", "https", "ftp", "mailto", "tel"}
)
-// Given a string with a w at the given position, tries to parse and return a link starting with "www."
+// Given a string with a w at the given position, tries to parse and return a range containing a www link.
// if one exists. If the text at the given position isn't a link, returns an empty string. Equivalent to
// www_match from the reference code.
-func parseWWWAutolink(data string, position int) string {
+func parseWWWAutolink(data string, position int) (Range, bool) {
// Check that this isn't part of another word
if position > 1 {
prevChar := data[position-1]
if !isWhitespaceByte(prevChar) && !isAllowedBeforeWWWLink(prevChar) {
- return ""
+ return Range{}, false
}
}
// Check that this starts with www
if len(data)-position < 4 || !regexp.MustCompile(`^www\d{0,3}\.`).MatchString(data[position:]) {
- return ""
+ return Range{}, false
}
end := checkDomain(data[position:], false)
if end == 0 {
- return ""
+ return Range{}, false
}
end += position
@@ -47,12 +47,12 @@ func parseWWWAutolink(data string, position int) string {
}
// Trim trailing punctuation
- link := trimTrailingCharactersFromLink(data[position:end])
- if link == "" {
- return ""
+ end = trimTrailingCharactersFromLink(data, position, end)
+ if position == end {
+ return Range{}, false
}
- return link
+ return Range{position, end}, true
}
func isAllowedBeforeWWWLink(c byte) bool {
@@ -64,13 +64,13 @@ func isAllowedBeforeWWWLink(c byte) bool {
}
}
-// Given a string with a : at the given position, tried to parse and return a link starting with a URL scheme
+// Given a string with a : at the given position, tried to parse and return a range containing a URL scheme
// if one exists. If the text around the given position isn't a link, returns an empty string. Equivalent to
// url_match from the reference code.
-func parseURLAutolink(data string, position int) string {
+func parseURLAutolink(data string, position int) (Range, bool) {
// Check that a :// exists. This doesn't match the clients that treat the slashes as optional.
if len(data)-position < 4 || data[position+1] != '/' || data[position+2] != '/' {
- return ""
+ return Range{}, false
}
start := position - 1
@@ -81,12 +81,12 @@ func parseURLAutolink(data string, position int) string {
// Ensure that the URL scheme is allowed and that at least one character after the scheme is valid.
scheme := data[start:position]
if !isSchemeAllowed(scheme) || !isValidHostCharacter(data[position+3:]) {
- return ""
+ return Range{}, false
}
end := checkDomain(data[position+3:], true)
if end == 0 {
- return ""
+ return Range{}, false
}
end += position
@@ -97,12 +97,12 @@ func parseURLAutolink(data string, position int) string {
}
// Trim trailing punctuation
- link := trimTrailingCharactersFromLink(data[start:end])
- if link == "" {
- return ""
+ end = trimTrailingCharactersFromLink(data, start, end)
+ if start == end {
+ return Range{}, false
}
- return link
+ return Range{start, end}, true
}
func isSchemeAllowed(scheme string) bool {
@@ -166,9 +166,9 @@ func isValidHostCharacter(link string) bool {
}
// Removes any trailing characters such as punctuation or stray brackets that shouldn't be part of the link.
-// Equivalent to autolink_delim from the reference code.
-func trimTrailingCharactersFromLink(link string) string {
- runes := []rune(link)
+// Returns a new end position for the link. Equivalent to autolink_delim from the reference code.
+func trimTrailingCharactersFromLink(markdown string, start int, end int) int {
+ runes := []rune(markdown[start:end])
linkEnd := len(runes)
// Cut off the link before an open angle bracket if it contains one
@@ -240,7 +240,7 @@ func trimTrailingCharactersFromLink(link string) string {
}
}
- return string(runes[:linkEnd])
+ return start + len(string(runes[:linkEnd]))
}
func canEndAutolink(c rune) bool {
diff --git a/utils/markdown/autolink_test.go b/utils/markdown/autolink_test.go
index d0ea53fa4..997124338 100644
--- a/utils/markdown/autolink_test.go
+++ b/utils/markdown/autolink_test.go
@@ -134,7 +134,15 @@ func TestParseURLAutolink(t *testing.T) {
for _, testCase := range testCases {
t.Run(testCase.Description, func(t *testing.T) {
- assert.Equal(t, testCase.Expected, parseURLAutolink(testCase.Input, testCase.Position))
+ rawRange, ok := parseURLAutolink(testCase.Input, testCase.Position)
+
+ if testCase.Expected == "" {
+ assert.False(t, ok)
+ assert.Equal(t, Range{0, 0}, rawRange)
+ } else {
+ assert.True(t, ok)
+ assert.Equal(t, testCase.Expected, testCase.Input[rawRange.Position:rawRange.End])
+ }
})
}
}
@@ -264,89 +272,153 @@ func TestParseWWWAutolink(t *testing.T) {
for _, testCase := range testCases {
t.Run(testCase.Description, func(t *testing.T) {
- assert.Equal(t, testCase.Expected, parseWWWAutolink(testCase.Input, testCase.Position))
+ rawRange, ok := parseWWWAutolink(testCase.Input, testCase.Position)
+
+ if testCase.Expected == "" {
+ assert.False(t, ok)
+ assert.Equal(t, Range{0, 0}, rawRange)
+ } else {
+ assert.True(t, ok)
+ assert.Equal(t, testCase.Expected, testCase.Input[rawRange.Position:rawRange.End])
+ }
})
}
}
func TestTrimTrailingCharactersFromLink(t *testing.T) {
testCases := []struct {
- Input string
- Expected string
+ Input string
+ Start int
+ End int
+ ExpectedEnd int
}{
{
- Input: "http://www.example.com",
- Expected: "http://www.example.com",
+ Input: "http://www.example.com",
+ ExpectedEnd: 22,
+ },
+ {
+ Input: "http://www.example.com/abcd",
+ ExpectedEnd: 27,
+ },
+ {
+ Input: "http://www.example.com/abcd/",
+ ExpectedEnd: 28,
+ },
+ {
+ Input: "http://www.example.com/1234",
+ ExpectedEnd: 27,
+ },
+ {
+ Input: "http://www.example.com/abcd?foo=bar",
+ ExpectedEnd: 35,
},
{
- Input: "http://www.example.com/abcd",
- Expected: "http://www.example.com/abcd",
+ Input: "http://www.example.com/abcd#heading",
+ ExpectedEnd: 35,
},
{
- Input: "http://www.example.com/abcd/",
- Expected: "http://www.example.com/abcd/",
+ Input: "http://www.example.com.",
+ ExpectedEnd: 22,
},
{
- Input: "http://www.example.com/1234",
- Expected: "http://www.example.com/1234",
+ Input: "http://www.example.com,",
+ ExpectedEnd: 22,
},
{
- Input: "http://www.example.com/abcd?foo=bar",
- Expected: "http://www.example.com/abcd?foo=bar",
+ Input: "http://www.example.com?",
+ ExpectedEnd: 22,
},
{
- Input: "http://www.example.com/abcd#heading",
- Expected: "http://www.example.com/abcd#heading",
+ Input: "http://www.example.com)",
+ ExpectedEnd: 22,
},
{
- Input: "http://www.example.com.",
- Expected: "http://www.example.com",
+ Input: "http://www.example.com",
+ ExpectedEnd: 22,
},
{
- Input: "http://www.example.com,",
- Expected: "http://www.example.com",
+ Input: "https://en.wikipedia.org/wiki/Dolphin_(disambiguation)",
+ ExpectedEnd: 54,
},
{
- Input: "http://www.example.com?",
- Expected: "http://www.example.com",
+ Input: "https://en.wikipedia.org/wiki/Dolphin_(disambiguation",
+ ExpectedEnd: 53,
},
{
- Input: "http://www.example.com)",
- Expected: "http://www.example.com",
+ Input: "https://en.wikipedia.org/wiki/Dolphin_(disambiguation))",
+ ExpectedEnd: 54,
},
{
- Input: "http://www.example.com",
- Expected: "http://www.example.com",
+ Input: "https://en.wikipedia.org/wiki/Dolphin_(disambiguation)_(disambiguation)",
+ ExpectedEnd: 71,
},
{
- Input: "https://en.wikipedia.org/wiki/Dolphin_(disambiguation)",
- Expected: "https://en.wikipedia.org/wiki/Dolphin_(disambiguation)",
+ Input: "https://en.wikipedia.org/wiki/Dolphin_(disambiguation_(disambiguation))",
+ ExpectedEnd: 71,
},
{
- Input: "https://en.wikipedia.org/wiki/Dolphin_(disambiguation",
- Expected: "https://en.wikipedia.org/wiki/Dolphin_(disambiguation",
+ Input: "http://www.example.com&quot;",
+ ExpectedEnd: 22,
},
{
- Input: "https://en.wikipedia.org/wiki/Dolphin_(disambiguation))",
- Expected: "https://en.wikipedia.org/wiki/Dolphin_(disambiguation)",
+ Input: "this is a sentence containing http://www.example.com in it",
+ Start: 30,
+ End: 52,
+ ExpectedEnd: 52,
},
{
- Input: "https://en.wikipedia.org/wiki/Dolphin_(disambiguation)_(disambiguation)",
- Expected: "https://en.wikipedia.org/wiki/Dolphin_(disambiguation)_(disambiguation)",
+ Input: "this is a sentence containing http://www.example.com???",
+ Start: 30,
+ End: 55,
+ ExpectedEnd: 52,
},
{
- Input: "https://en.wikipedia.org/wiki/Dolphin_(disambiguation_(disambiguation))",
- Expected: "https://en.wikipedia.org/wiki/Dolphin_(disambiguation_(disambiguation))",
+ Input: "http://google.com/å",
+ ExpectedEnd: len("http://google.com/å"),
},
{
- Input: "http://www.example.com&quot;",
- Expected: "http://www.example.com",
+ Input: "http://google.com/å...",
+ ExpectedEnd: len("http://google.com/å"),
+ },
+ {
+ Input: "This is http://google.com/å, a link, and http://google.com/å",
+ Start: 8,
+ End: len("This is http://google.com/å,"),
+ ExpectedEnd: len("This is http://google.com/å"),
+ },
+ {
+ Input: "This is http://google.com/å, a link, and http://google.com/å",
+ Start: 41,
+ End: len("This is http://google.com/å, a link, and http://google.com/å"),
+ ExpectedEnd: len("This is http://google.com/å, a link, and http://google.com/å"),
+ },
+ {
+ Input: "This is http://google.com/å, a link, and http://google.com/å.",
+ Start: 41,
+ End: len("This is http://google.com/å, a link, and http://google.com/å."),
+ ExpectedEnd: len("This is http://google.com/å, a link, and http://google.com/å"),
+ },
+ {
+ Input: "http://🍄.ga/ http://x🍄.ga/",
+ Start: 0,
+ End: len("http://🍄.ga/"),
+ ExpectedEnd: len("http://🍄.ga/"),
+ },
+ {
+ Input: "http://🍄.ga/ http://x🍄.ga/",
+ Start: len("http://🍄.ga/ "),
+ End: len("http://🍄.ga/ http://x🍄.ga/"),
+ ExpectedEnd: len("http://🍄.ga/ http://x🍄.ga/"),
},
}
for _, testCase := range testCases {
t.Run(testCase.Input, func(t *testing.T) {
- assert.Equal(t, testCase.Expected, trimTrailingCharactersFromLink(testCase.Input))
+ if testCase.End == 0 {
+ testCase.End = len(testCase.Input) - testCase.Start
+ }
+
+ assert.Equal(t, testCase.ExpectedEnd, trimTrailingCharactersFromLink(testCase.Input, testCase.Start, testCase.End))
})
}
}
diff --git a/utils/markdown/commonmark_test.go b/utils/markdown/commonmark_test.go
index 13e61f52d..d1381cee5 100644
--- a/utils/markdown/commonmark_test.go
+++ b/utils/markdown/commonmark_test.go
@@ -1000,7 +1000,7 @@ func TestCommonMarkReferenceStrings(t *testing.T) {
}
}
-func TestCommonMarkRefernceAutolinks(t *testing.T) {
+func TestCommonMarkReferenceAutolinks(t *testing.T) {
// These tests are adapted from the GitHub-flavoured CommonMark extension tests located at
// https://github.com/github/cmark/blob/master/test/extensions.txt
for name, tc := range map[string]struct {
diff --git a/utils/markdown/html.go b/utils/markdown/html.go
index 1a857afed..afb72bff3 100644
--- a/utils/markdown/html.go
+++ b/utils/markdown/html.go
@@ -157,7 +157,7 @@ func RenderInlineHTML(inline Inline) (result string) {
}
result += "</a>"
case *Autolink:
- result += `<a href="` + htmlEscaper.Replace(escapeURL(v.Link)) + `">`
+ result += `<a href="` + htmlEscaper.Replace(escapeURL(v.Destination())) + `">`
for _, inline := range v.Children {
result += RenderInlineHTML(inline)
}
diff --git a/utils/markdown/inlines.go b/utils/markdown/inlines.go
index 453f4bbe5..a3abccef3 100644
--- a/utils/markdown/inlines.go
+++ b/utils/markdown/inlines.go
@@ -86,7 +86,19 @@ type Autolink struct {
Children []Inline
- Link string
+ RawDestination Range
+
+ markdown string
+}
+
+func (i *Autolink) Destination() string {
+ destination := Unescape(i.markdown[i.RawDestination.Position:i.RawDestination.End])
+
+ if strings.HasPrefix(destination, "www") {
+ destination = "http://" + destination
+ }
+
+ return destination
}
type delimiterType int
@@ -486,15 +498,18 @@ func (p *inlineParser) parseAutolink(c rune) bool {
}
}
- link := ""
- text := ""
+ var link Range
if c == ':' {
- text = parseURLAutolink(p.raw, p.position)
- link = text
+ var ok bool
+ link, ok = parseURLAutolink(p.raw, p.position)
+
+ if !ok {
+ return false
+ }
// Since the current position is at the colon, we have to rewind the parsing slightly so that
// we don't duplicate the URL scheme
- rewind := strings.Index(text, ":")
+ rewind := strings.Index(p.raw[link.Position:link.End], ":")
if rewind != -1 {
lastInline := p.inlines[len(p.inlines)-1]
lastText, ok := lastInline.(*Text)
@@ -512,22 +527,30 @@ func (p *inlineParser) parseAutolink(c rune) bool {
Range: Range{lastText.Range.Position, lastText.Range.End - rewind},
})
p.position -= rewind
+ }
+ } else if c == 'w' || c == 'W' {
+ var ok bool
+ link, ok = parseWWWAutolink(p.raw, p.position)
+ if !ok {
+ return false
}
- } else if c == 'w' {
- text = parseWWWAutolink(p.raw, p.position)
- link = "http://" + text
}
- if text == "" {
- return false
- }
+ linkMarkdownPosition := relativeToAbsolutePosition(p.ranges, link.Position)
+ linkRange := Range{linkMarkdownPosition, linkMarkdownPosition + link.End - link.Position}
p.inlines = append(p.inlines, &Autolink{
- Link: link,
- Children: []Inline{&Text{Text: text}},
+ Children: []Inline{
+ &Text{
+ Text: p.raw[link.Position:link.End],
+ Range: linkRange,
+ },
+ },
+ RawDestination: linkRange,
+ markdown: p.markdown,
})
- p.position += len(text)
+ p.position += (link.End - link.Position)
return true
}