MM-11451 Added autolinking to Markdown parser (#9151)

* MM-11451 Added autolinking to Markdown parser * Added missing headers * Added mailto and tel links
author: Harrison Healey <harrisonmhealey@gmail.com> 2018-07-24 10:40:01 -0400
committer: Christopher Speller <crspeller@gmail.com> 2018-07-24 07:40:01 -0700
commit: c8d3e421391520af45d8e0f60a884fee139652a0 (patch)
tree: 963d24f65d75cd413ba539779935223b165ae938 /utils
parent: bfb2640451c95b242ae7b8bb8528538b706412ef (diff)
download: chat-c8d3e421391520af45d8e0f60a884fee139652a0.tar.gz
chat-c8d3e421391520af45d8e0f60a884fee139652a0.tar.bz2
chat-c8d3e421391520af45d8e0f60a884fee139652a0.zip
6 files changed, 1002 insertions, 1 deletions
diff --git a/utils/markdown/autolink.go b/utils/markdown/autolink.go
new file mode 100644
index 000000000..16c40e609
--- /dev/null
+++ b/utils/markdown/autolink.go
@@ -0,0 +1,253 @@
+// Copyright (c) 2017-present Mattermost, Inc. All Rights Reserved.
+// See License.txt for license information.
+
+package markdown
+
+import (
+	"regexp"
+	"strings"
+	"unicode"
+	"unicode/utf8"
+)
+
+// Based off of extensions/autolink.c from https://github.com/github/cmark
+
+var (
+	DefaultUrlSchemes = []string{"http", "https", "ftp", "mailto", "tel"}
+)
+
+// Given a string with a w at the given position, tries to parse and return a link starting with "www."
+// if one exists. If the text at the given position isn't a link, returns an empty string. Equivalent to
+// www_match from the reference code.
+func parseWWWAutolink(data string, position int) string {
+	// Check that this isn't part of another word
+	if position > 1 {
+		prevChar := data[position-1]
+
+		if !isWhitespaceByte(prevChar) && !isAllowedBeforeWWWLink(prevChar) {
+			return ""
+		}
+	}
+
+	// Check that this starts with www
+	if len(data)-position < 4 || !regexp.MustCompile(`^www\d{0,3}\.`).MatchString(data[position:]) {
+		return ""
+	}
+
+	end := checkDomain(data[position:], false)
+	if end == 0 {
+		return ""
+	}
+
+	end += position
+
+	// Grab all text until the end of the string or the next whitespace character
+	for end < len(data) && !isWhitespaceByte(data[end]) {
+		end += 1
+	}
+
+	// Trim trailing punctuation
+	link := trimTrailingCharactersFromLink(data[position:end])
+	if link == "" {
+		return ""
+	}
+
+	return link
+}
+
+func isAllowedBeforeWWWLink(c byte) bool {
+	switch c {
+	case '*', '_', '~', ')':
+		return true
+	default:
+		return false
+	}
+}
+
+// Given a string with a : at the given position, tried to parse and return a link starting with a URL scheme
+// if one exists. If the text around the given position isn't a link, returns an empty string. Equivalent to
+// url_match from the reference code.
+func parseURLAutolink(data string, position int) string {
+	// Check that a :// exists. This doesn't match the clients that treat the slashes as optional.
+	if len(data)-position < 4 || data[position+1] != '/' || data[position+2] != '/' {
+		return ""
+	}
+
+	start := position - 1
+	for start > 0 && isAlphanumericByte(data[start-1]) {
+		start -= 1
+	}
+
+	// Ensure that the URL scheme is allowed and that at least one character after the scheme is valid.
+	scheme := data[start:position]
+	if !isSchemeAllowed(scheme) || !isValidHostCharacter(data[position+3:]) {
+		return ""
+	}
+
+	end := checkDomain(data[position+3:], true)
+	if end == 0 {
+		return ""
+	}
+
+	end += position
+
+	// Grab all text until the end of the string or the next whitespace character
+	for end < len(data) && !isWhitespaceByte(data[end]) {
+		end += 1
+	}
+
+	// Trim trailing punctuation
+	link := trimTrailingCharactersFromLink(data[start:end])
+	if link == "" {
+		return ""
+	}
+
+	return link
+}
+
+func isSchemeAllowed(scheme string) bool {
+	// Note that this doesn't support the custom URL schemes implemented by the client
+	for _, allowed := range DefaultUrlSchemes {
+		if strings.EqualFold(allowed, scheme) {
+			return true
+		}
+	}
+
+	return false
+}
+
+// Given a string starting with a URL, returns the number of valid characters that make up the URL's domain.
+// Returns 0 if the string doesn't start with a domain name. allowShort determines whether or not the domain
+// needs to contain a period to be considered valid. Equivalent to check_domain from the reference code.
+func checkDomain(data string, allowShort bool) int {
+	foundUnderscore := false
+	foundPeriod := false
+
+	i := 1
+	for ; i < len(data)-1; i++ {
+		if data[i] == '_' {
+			foundUnderscore = true
+			break
+		} else if data[i] == '.' {
+			foundPeriod = true
+		} else if !isValidHostCharacter(data[i:]) && data[i] != '-' {
+			break
+		}
+	}
+
+	if foundUnderscore {
+		return 0
+	}
+
+	if allowShort {
+		// If allowShort is set, accept any string of valid domain characters
+		return i
+	}
+
+	// If allowShort isn't set, a valid domain just requires at least a single period. Note that this
+	// logic isn't entirely necessary because we already know the string starts with "www." when
+	// this is called from parseWWWAutolink
+	if foundPeriod {
+		return i
+	} else {
+		return 0
+	}
+}
+
+// Returns true if the provided link starts with a valid character for a domain name. Equivalent to
+// is_valid_hostchar from the reference code.
+func isValidHostCharacter(link string) bool {
+	c, _ := utf8.DecodeRuneInString(link)
+	if c == utf8.RuneError {
+		return false
+	}
+
+	return !unicode.IsSpace(c) && !unicode.IsPunct(c)
+}
+
+// Removes any trailing characters such as punctuation or stray brackets that shouldn't be part of the link.
+// Equivalent to autolink_delim from the reference code.
+func trimTrailingCharactersFromLink(link string) string {
+	runes := []rune(link)
+	linkEnd := len(runes)
+
+	// Cut off the link before an open angle bracket if it contains one
+	for i, c := range runes {
+		if c == '<' {
+			linkEnd = i
+			break
+		}
+	}
+
+	for linkEnd > 0 {
+		c := runes[linkEnd-1]
+
+		if !canEndAutolink(c) {
+			// Trim trailing quotes, periods, etc
+			linkEnd = linkEnd - 1
+		} else if c == ';' {
+			// Trim a trailing HTML entity
+			newEnd := linkEnd - 2
+
+			for newEnd > 0 && ((runes[newEnd] >= 'a' && runes[newEnd] <= 'z') || (runes[newEnd] >= 'A' && runes[newEnd] <= 'Z')) {
+				newEnd -= 1
+			}
+
+			if newEnd < linkEnd-2 && runes[newEnd] == '&' {
+				linkEnd = newEnd
+			} else {
+				// This isn't actually an HTML entity, so just trim the semicolon
+				linkEnd = linkEnd - 1
+			}
+		} else if c == ')' {
+			// Only allow an autolink ending with a bracket if that bracket is part of a matching pair of brackets.
+			// If there are more closing brackets than opening ones, remove the extra bracket
+
+			numClosing := 0
+			numOpening := 0
+
+			// Examples (input text => output linked portion):
+			//
+			//  http://www.pokemon.com/Pikachu_(Electric)
+			//    => http://www.pokemon.com/Pikachu_(Electric)
+			//
+			//  http://www.pokemon.com/Pikachu_((Electric)
+			//    => http://www.pokemon.com/Pikachu_((Electric)
+			//
+			//  http://www.pokemon.com/Pikachu_(Electric))
+			//    => http://www.pokemon.com/Pikachu_(Electric)
+			//
+			//  http://www.pokemon.com/Pikachu_((Electric))
+			//    => http://www.pokemon.com/Pikachu_((Electric))
+
+			for i := 0; i < linkEnd; i++ {
+				if runes[i] == '(' {
+					numOpening += 1
+				} else if runes[i] == ')' {
+					numClosing += 1
+				}
+			}
+
+			if numClosing <= numOpening {
+				// There's fewer or equal closing brackets, so we've found the end of the link
+				break
+			}
+
+			linkEnd -= 1
+		} else {
+			// There's no special characters at the end of the link, so we're at the end
+			break
+		}
+	}
+
+	return string(runes[:linkEnd])
+}
+
+func canEndAutolink(c rune) bool {
+	switch c {
+	case '?', '!', '.', ',', ':', '*', '_', '~', '\'', '"':
+		return false
+	default:
+		return true
+	}
+}
diff --git a/utils/markdown/autolink_test.go b/utils/markdown/autolink_test.go
new file mode 100644
index 000000000..d0ea53fa4
--- /dev/null
+++ b/utils/markdown/autolink_test.go
@@ -0,0 +1,617 @@
+// Copyright (c) 2017-present Mattermost, Inc. All Rights Reserved.
+// See License.txt for license information.
+
+package markdown
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestParseURLAutolink(t *testing.T) {
+	testCases := []struct {
+		Description string
+		Input       string
+		Position    int
+		Expected    string
+	}{
+		{
+			Description: "no link",
+			Input:       "This is an :emoji:",
+			Position:    11,
+			Expected:    "",
+		},
+		{
+			Description: "no link 2",
+			Input:       "These are two things: apple and orange",
+			Position:    20,
+			Expected:    "",
+		},
+		{
+			Description: "link with http",
+			Input:       "http://example.com and some text",
+			Position:    4,
+			Expected:    "http://example.com",
+		},
+		{
+			Description: "link with https",
+			Input:       "https://example.com and some text",
+			Position:    5,
+			Expected:    "https://example.com",
+		},
+		{
+			Description: "link with ftp",
+			Input:       "ftp://example.com and some text",
+			Position:    3,
+			Expected:    "ftp://example.com",
+		},
+		{
+			Description: "link with a path",
+			Input:       "https://example.com/abcd and some text",
+			Position:    5,
+			Expected:    "https://example.com/abcd",
+		},
+		{
+			Description: "link with parameters",
+			Input:       "ftp://example.com/abcd?foo=bar and some text",
+			Position:    3,
+			Expected:    "ftp://example.com/abcd?foo=bar",
+		},
+		{
+			Description: "link, not at start",
+			Input:       "This is https://example.com and some text",
+			Position:    13,
+			Expected:    "https://example.com",
+		},
+		{
+			Description: "link with a path, not at start",
+			Input:       "This is also http://www.example.com/abcd and some text",
+			Position:    17,
+			Expected:    "http://www.example.com/abcd",
+		},
+		{
+			Description: "link with parameters, not at start",
+			Input:       "These are https://www.example.com/abcd?foo=bar and some text",
+			Position:    15,
+			Expected:    "https://www.example.com/abcd?foo=bar",
+		},
+		{
+			Description: "link with trailing characters",
+			Input:       "This is ftp://www.example.com??",
+			Position:    11,
+			Expected:    "ftp://www.example.com",
+		},
+		{
+			Description: "multiple links",
+			Input:       "This is https://example.com/abcd and ftp://www.example.com/1234",
+			Position:    13,
+			Expected:    "https://example.com/abcd",
+		},
+		{
+			Description: "second of multiple links",
+			Input:       "This is https://example.com/abcd and ftp://www.example.com/1234",
+			Position:    40,
+			Expected:    "ftp://www.example.com/1234",
+		},
+		{
+			Description: "link with brackets",
+			Input:       "Go to ftp://www.example.com/my/page_(disambiguation) and some text",
+			Position:    9,
+			Expected:    "ftp://www.example.com/my/page_(disambiguation)",
+		},
+		{
+			Description: "link in brackets",
+			Input:       "(https://www.example.com/foo/bar)",
+			Position:    6,
+			Expected:    "https://www.example.com/foo/bar",
+		},
+		{
+			Description: "link in underscores",
+			Input:       "_http://www.example.com_",
+			Position:    5,
+			Expected:    "http://www.example.com",
+		},
+		{
+			Description: "link in asterisks",
+			Input:       "This is **ftp://example.com**",
+			Position:    13,
+			Expected:    "ftp://example.com",
+		},
+		{
+			Description: "link in strikethrough",
+			Input:       "Those were ~~https://example.com~~",
+			Position:    18,
+			Expected:    "https://example.com",
+		},
+		{
+			Description: "link with angle brackets",
+			Input:       "<b>We use http://example.com</b>",
+			Position:    14,
+			Expected:    "http://example.com",
+		},
+	}
+
+	for _, testCase := range testCases {
+		t.Run(testCase.Description, func(t *testing.T) {
+			assert.Equal(t, testCase.Expected, parseURLAutolink(testCase.Input, testCase.Position))
+		})
+	}
+}
+
+func TestParseWWWAutolink(t *testing.T) {
+	testCases := []struct {
+		Description string
+		Input       string
+		Position    int
+		Expected    string
+	}{
+		{
+			Description: "no link",
+			Input:       "This is some text",
+			Position:    0,
+			Expected:    "",
+		},
+		{
+			Description: "link",
+			Input:       "www.example.com and some text",
+			Position:    0,
+			Expected:    "www.example.com",
+		},
+		{
+			Description: "link with a path",
+			Input:       "www.example.com/abcd and some text",
+			Position:    0,
+			Expected:    "www.example.com/abcd",
+		},
+		{
+			Description: "link with parameters",
+			Input:       "www.example.com/abcd?foo=bar and some text",
+			Position:    0,
+			Expected:    "www.example.com/abcd?foo=bar",
+		},
+		{
+			Description: "link, not at start",
+			Input:       "This is www.example.com and some text",
+			Position:    8,
+			Expected:    "www.example.com",
+		},
+		{
+			Description: "link with a path, not at start",
+			Input:       "This is also www.example.com/abcd and some text",
+			Position:    13,
+			Expected:    "www.example.com/abcd",
+		},
+		{
+			Description: "link with parameters, not at start",
+			Input:       "These are www.example.com/abcd?foo=bar and some text",
+			Position:    10,
+			Expected:    "www.example.com/abcd?foo=bar",
+		},
+		{
+			Description: "link with trailing characters",
+			Input:       "This is www.example.com??",
+			Position:    8,
+			Expected:    "www.example.com",
+		},
+		{
+			Description: "link after current position",
+			Input:       "This is some text and www.example.com",
+			Position:    0,
+			Expected:    "",
+		},
+		{
+			Description: "multiple links",
+			Input:       "This is www.example.com/abcd and www.example.com/1234",
+			Position:    8,
+			Expected:    "www.example.com/abcd",
+		},
+		{
+			Description: "multiple links 2",
+			Input:       "This is www.example.com/abcd and www.example.com/1234",
+			Position:    33,
+			Expected:    "www.example.com/1234",
+		},
+		{
+			Description: "link with brackets",
+			Input:       "Go to www.example.com/my/page_(disambiguation) and some text",
+			Position:    6,
+			Expected:    "www.example.com/my/page_(disambiguation)",
+		},
+		{
+			Description: "link following other letters",
+			Input:       "aaawww.example.com and some text",
+			Position:    3,
+			Expected:    "",
+		},
+		{
+			Description: "link in brackets",
+			Input:       "(www.example.com)",
+			Position:    1,
+			Expected:    "www.example.com",
+		},
+		{
+			Description: "link in underscores",
+			Input:       "_www.example.com_",
+			Position:    1,
+			Expected:    "www.example.com",
+		},
+		{
+			Description: "link in asterisks",
+			Input:       "This is **www.example.com**",
+			Position:    10,
+			Expected:    "www.example.com",
+		},
+		{
+			Description: "link in strikethrough",
+			Input:       "Those were ~~www.example.com~~",
+			Position:    13,
+			Expected:    "www.example.com",
+		},
+		{
+			Description: "using www1",
+			Input:       "Our backup site is at www1.example.com/foo",
+			Position:    22,
+			Expected:    "www1.example.com/foo",
+		},
+		{
+			Description: "link with angle brackets",
+			Input:       "<b>We use www2.example.com</b>",
+			Position:    10,
+			Expected:    "www2.example.com",
+		},
+	}
+
+	for _, testCase := range testCases {
+		t.Run(testCase.Description, func(t *testing.T) {
+			assert.Equal(t, testCase.Expected, parseWWWAutolink(testCase.Input, testCase.Position))
+		})
+	}
+}
+
+func TestTrimTrailingCharactersFromLink(t *testing.T) {
+	testCases := []struct {
+		Input    string
+		Expected string
+	}{
+		{
+			Input:    "http://www.example.com",
+			Expected: "http://www.example.com",
+		},
+		{
+			Input:    "http://www.example.com/abcd",
+			Expected: "http://www.example.com/abcd",
+		},
+		{
+			Input:    "http://www.example.com/abcd/",
+			Expected: "http://www.example.com/abcd/",
+		},
+		{
+			Input:    "http://www.example.com/1234",
+			Expected: "http://www.example.com/1234",
+		},
+		{
+			Input:    "http://www.example.com/abcd?foo=bar",
+			Expected: "http://www.example.com/abcd?foo=bar",
+		},
+		{
+			Input:    "http://www.example.com/abcd#heading",
+			Expected: "http://www.example.com/abcd#heading",
+		},
+		{
+			Input:    "http://www.example.com.",
+			Expected: "http://www.example.com",
+		},
+		{
+			Input:    "http://www.example.com,",
+			Expected: "http://www.example.com",
+		},
+		{
+			Input:    "http://www.example.com?",
+			Expected: "http://www.example.com",
+		},
+		{
+			Input:    "http://www.example.com)",
+			Expected: "http://www.example.com",
+		},
+		{
+			Input:    "http://www.example.com",
+			Expected: "http://www.example.com",
+		},
+		{
+			Input:    "https://en.wikipedia.org/wiki/Dolphin_(disambiguation)",
+			Expected: "https://en.wikipedia.org/wiki/Dolphin_(disambiguation)",
+		},
+		{
+			Input:    "https://en.wikipedia.org/wiki/Dolphin_(disambiguation",
+			Expected: "https://en.wikipedia.org/wiki/Dolphin_(disambiguation",
+		},
+		{
+			Input:    "https://en.wikipedia.org/wiki/Dolphin_(disambiguation))",
+			Expected: "https://en.wikipedia.org/wiki/Dolphin_(disambiguation)",
+		},
+		{
+			Input:    "https://en.wikipedia.org/wiki/Dolphin_(disambiguation)_(disambiguation)",
+			Expected: "https://en.wikipedia.org/wiki/Dolphin_(disambiguation)_(disambiguation)",
+		},
+		{
+			Input:    "https://en.wikipedia.org/wiki/Dolphin_(disambiguation_(disambiguation))",
+			Expected: "https://en.wikipedia.org/wiki/Dolphin_(disambiguation_(disambiguation))",
+		},
+		{
+			Input:    "http://www.example.com&quot;",
+			Expected: "http://www.example.com",
+		},
+	}
+
+	for _, testCase := range testCases {
+		t.Run(testCase.Input, func(t *testing.T) {
+			assert.Equal(t, testCase.Expected, trimTrailingCharactersFromLink(testCase.Input))
+		})
+	}
+}
+
+func TestAutolinking(t *testing.T) {
+	// These tests are adapted from https://github.com/mattermost/commonmark.js/test/mattermost.txt.
+	// It is missing tests for:
+	// 1. Links surrounded by emphasis (emphasis not implemented on the server)
+	// 2. IPv6 addresses (not implemented on the server or by GitHub)
+	// 3. Custom URL schemes (not implemented)
+
+	for name, tc := range map[string]struct {
+		Markdown     string
+		ExpectedHTML string
+	}{
+		"valid-link-1": {
+			Markdown:     `http://example.com`,
+			ExpectedHTML: `<p><a href="http://example.com">http://example.com</a></p>`,
+		},
+		"valid-link-2": {
+			Markdown:     `https://example.com`,
+			ExpectedHTML: `<p><a href="https://example.com">https://example.com</a></p>`,
+		},
+		"valid-link-3": {
+			Markdown:     `ftp://example.com`,
+			ExpectedHTML: `<p><a href="ftp://example.com">ftp://example.com</a></p>`,
+		},
+		// "valid-link-4": {
+		// 	Markdown:     `ts3server://example.com?port=9001`,
+		// 	ExpectedHTML: `<p><a href="ts3server://example.com?port=9001">ts3server://example.com?port=9001</a></p>`,
+		// },
+		"valid-link-5": {
+			Markdown:     `www.example.com`,
+			ExpectedHTML: `<p><a href="http://www.example.com">www.example.com</a></p>`,
+		},
+		"valid-link-6": {
+			Markdown:     `www.example.com/index`,
+			ExpectedHTML: `<p><a href="http://www.example.com/index">www.example.com/index</a></p>`,
+		},
+		"valid-link-7": {
+			Markdown:     `www.example.com/index.html`,
+			ExpectedHTML: `<p><a href="http://www.example.com/index.html">www.example.com/index.html</a></p>`,
+		},
+		"valid-link-8": {
+			Markdown:     `http://example.com/index/sub`,
+			ExpectedHTML: `<p><a href="http://example.com/index/sub">http://example.com/index/sub</a></p>`,
+		},
+		"valid-link-9": {
+			Markdown:     `www1.example.com`,
+			ExpectedHTML: `<p><a href="http://www1.example.com">www1.example.com</a></p>`,
+		},
+		"valid-link-10": {
+			Markdown:     `https://en.wikipedia.org/wiki/URLs#Syntax`,
+			ExpectedHTML: `<p><a href="https://en.wikipedia.org/wiki/URLs#Syntax">https://en.wikipedia.org/wiki/URLs#Syntax</a></p>`,
+		},
+		"valid-link-11": {
+			Markdown:     `https://groups.google.com/forum/#!msg`,
+			ExpectedHTML: `<p><a href="https://groups.google.com/forum/#!msg">https://groups.google.com/forum/#!msg</a></p>`,
+		},
+		"valid-link-12": {
+			Markdown:     `www.example.com/index?params=1`,
+			ExpectedHTML: `<p><a href="http://www.example.com/index?params=1">www.example.com/index?params=1</a></p>`,
+		},
+		"valid-link-13": {
+			Markdown:     `www.example.com/index?params=1&other=2`,
+			ExpectedHTML: `<p><a href="http://www.example.com/index?params=1&amp;other=2">www.example.com/index?params=1&amp;other=2</a></p>`,
+		},
+		"valid-link-14": {
+			Markdown:     `www.example.com/index?params=1;other=2`,
+			ExpectedHTML: `<p><a href="http://www.example.com/index?params=1;other=2">www.example.com/index?params=1;other=2</a></p>`,
+		},
+		"valid-link-15": {
+			Markdown:     `http://www.example.com/_/page`,
+			ExpectedHTML: `<p><a href="http://www.example.com/_/page">http://www.example.com/_/page</a></p>`,
+		},
+		"valid-link-16": {
+			Markdown:     `https://en.wikipedia.org/wiki/🐬`,
+			ExpectedHTML: `<p><a href="https://en.wikipedia.org/wiki/%F0%9F%90%AC">https://en.wikipedia.org/wiki/🐬</a></p>`,
+		},
+		"valid-link-17": {
+			Markdown:     `http://✪df.ws/1234`,
+			ExpectedHTML: `<p><a href="http://%E2%9C%AAdf.ws/1234">http://✪df.ws/1234</a></p>`,
+		},
+		"valid-link-18": {
+			Markdown:     `https://groups.google.com/forum/#!msg`,
+			ExpectedHTML: `<p><a href="https://groups.google.com/forum/#!msg">https://groups.google.com/forum/#!msg</a></p>`,
+		},
+		"valid-link-19": {
+			Markdown:     `https://пример.срб/пример-26/`,
+			ExpectedHTML: `<p><a href="https://%D0%BF%D1%80%D0%B8%D0%BC%D0%B5%D1%80.%D1%81%D1%80%D0%B1/%D0%BF%D1%80%D0%B8%D0%BC%D0%B5%D1%80-26/">https://пример.срб/пример-26/</a></p>`,
+		},
+		"valid-link-20": {
+			Markdown:     `mailto://test@example.com`,
+			ExpectedHTML: `<p><a href="mailto://test@example.com">mailto://test@example.com</a></p>`,
+		},
+		"valid-link-21": {
+			Markdown:     `tel://555-123-4567`,
+			ExpectedHTML: `<p><a href="tel://555-123-4567">tel://555-123-4567</a></p>`,
+		},
+
+		"ip-address-1": {
+			Markdown:     `http://127.0.0.1`,
+			ExpectedHTML: `<p><a href="http://127.0.0.1">http://127.0.0.1</a></p>`,
+		},
+		"ip-address-2": {
+			Markdown:     `http://192.168.1.1:4040`,
+			ExpectedHTML: `<p><a href="http://192.168.1.1:4040">http://192.168.1.1:4040</a></p>`,
+		},
+		"ip-address-3": {
+			Markdown:     `http://username:password@127.0.0.1`,
+			ExpectedHTML: `<p><a href="http://username:password@127.0.0.1">http://username:password@127.0.0.1</a></p>`,
+		},
+		"ip-address-4": {
+			Markdown:     `http://username:password@[2001:0:5ef5:79fb:303a:62d5:3312:ff42]:80`,
+			ExpectedHTML: `<p><a href="http://username:password@%5B2001:0:5ef5:79fb:303a:62d5:3312:ff42%5D:80">http://username:password@[2001:0:5ef5:79fb:303a:62d5:3312:ff42]:80</a></p>`,
+		},
+
+		"link-with-brackets-1": {
+			Markdown:     `https://en.wikipedia.org/wiki/Rendering_(computer_graphics)`,
+			ExpectedHTML: `<p><a href="https://en.wikipedia.org/wiki/Rendering_(computer_graphics)">https://en.wikipedia.org/wiki/Rendering_(computer_graphics)</a></p>`,
+		},
+		"link-with-brackets-2": {
+			Markdown:     `http://example.com/more_(than)_one_(parens)`,
+			ExpectedHTML: `<p><a href="http://example.com/more_(than)_one_(parens)">http://example.com/more_(than)_one_(parens)</a></p>`,
+		},
+		"link-with-brackets-3": {
+			Markdown:     `http://example.com/(something)?after=parens`,
+			ExpectedHTML: `<p><a href="http://example.com/(something)?after=parens">http://example.com/(something)?after=parens</a></p>`,
+		},
+		"link-with-brackets-4": {
+			Markdown:     `http://foo.com/unicode_(✪)_in_parens`,
+			ExpectedHTML: `<p><a href="http://foo.com/unicode_(%E2%9C%AA)_in_parens">http://foo.com/unicode_(✪)_in_parens</a></p>`,
+		},
+
+		"inside-another-link-1": {
+			Markdown:     `[www.example.com](https://example.com)`,
+			ExpectedHTML: `<p><a href="https://example.com">www.example.com</a></p>`,
+		},
+		"inside-another-link-2": {
+			Markdown:     `[http://www.example.com](https://example.com)`,
+			ExpectedHTML: `<p><a href="https://example.com">http://www.example.com</a></p>`,
+		},
+
+		"link-in-sentence-1": {
+			Markdown:     `(http://example.com)`,
+			ExpectedHTML: `<p>(<a href="http://example.com">http://example.com</a>)</p>`,
+		},
+		"link-in-sentence-2": {
+			Markdown:     `(see http://example.com)`,
+			ExpectedHTML: `<p>(see <a href="http://example.com">http://example.com</a>)</p>`,
+		},
+		"link-in-sentence-3": {
+			Markdown:     `(http://example.com watch this)`,
+			ExpectedHTML: `<p>(<a href="http://example.com">http://example.com</a> watch this)</p>`,
+		},
+		"link-in-sentence-4": {
+			Markdown:     `This is a sentence with a http://example.com in it.`,
+			ExpectedHTML: `<p>This is a sentence with a <a href="http://example.com">http://example.com</a> in it.</p>`,
+		},
+		"link-in-sentence-5": {
+			Markdown:     `This is a sentence with a [link](http://example.com) in it.`,
+			ExpectedHTML: `<p>This is a sentence with a <a href="http://example.com">link</a> in it.</p>`,
+		},
+		"link-in-sentence-6": {
+			Markdown:     `This is a sentence with a http://example.com/_/underscore in it.`,
+			ExpectedHTML: `<p>This is a sentence with a <a href="http://example.com/_/underscore">http://example.com/_/underscore</a> in it.</p>`,
+		},
+		"link-in-sentence-7": {
+			Markdown:     `This is a sentence with a link (http://example.com) in it.`,
+			ExpectedHTML: `<p>This is a sentence with a link (<a href="http://example.com">http://example.com</a>) in it.</p>`,
+		},
+		"link-in-sentence-8": {
+			Markdown:     `This is a sentence with a (https://en.wikipedia.org/wiki/Rendering_(computer_graphics)) in it.`,
+			ExpectedHTML: `<p>This is a sentence with a (<a href="https://en.wikipedia.org/wiki/Rendering_(computer_graphics)">https://en.wikipedia.org/wiki/Rendering_(computer_graphics)</a>) in it.</p>`,
+		},
+		"link-in-sentence-9": {
+			Markdown:     `This is a sentence with a http://192.168.1.1:4040 in it.`,
+			ExpectedHTML: `<p>This is a sentence with a <a href="http://192.168.1.1:4040">http://192.168.1.1:4040</a> in it.</p>`,
+		},
+		"link-in-sentence-10": {
+			Markdown:     `This is a link to http://example.com.`,
+			ExpectedHTML: `<p>This is a link to <a href="http://example.com">http://example.com</a>.</p>`,
+		},
+		"link-in-sentence-11": {
+			Markdown:     `This is a link to http://example.com*`,
+			ExpectedHTML: `<p>This is a link to <a href="http://example.com">http://example.com</a>*</p>`,
+		},
+		"link-in-sentence-12": {
+			Markdown:     `This is a link to http://example.com_`,
+			ExpectedHTML: `<p>This is a link to <a href="http://example.com">http://example.com</a>_</p>`,
+		},
+		"link-in-sentence-13": {
+			Markdown:     `This is a link containing http://example.com/something?with,commas,in,url, but not at the end`,
+			ExpectedHTML: `<p>This is a link containing <a href="http://example.com/something?with,commas,in,url">http://example.com/something?with,commas,in,url</a>, but not at the end</p>`,
+		},
+		"link-in-sentence-14": {
+			Markdown:     `This is a question about a link http://example.com?`,
+			ExpectedHTML: `<p>This is a question about a link <a href="http://example.com">http://example.com</a>?</p>`,
+		},
+
+		"plt-7250-link-with-trailing-periods-1": {
+			Markdown:     `http://example.com.`,
+			ExpectedHTML: `<p><a href="http://example.com">http://example.com</a>.</p>`,
+		},
+		"plt-7250-link-with-trailing-periods-2": {
+			Markdown:     `http://example.com...`,
+			ExpectedHTML: `<p><a href="http://example.com">http://example.com</a>...</p>`,
+		},
+		"plt-7250-link-with-trailing-periods-3": {
+			Markdown:     `http://example.com/foo.`,
+			ExpectedHTML: `<p><a href="http://example.com/foo">http://example.com/foo</a>.</p>`,
+		},
+		"plt-7250-link-with-trailing-periods-4": {
+			Markdown:     `http://example.com/foo...`,
+			ExpectedHTML: `<p><a href="http://example.com/foo">http://example.com/foo</a>...</p>`,
+		},
+		"plt-7250-link-with-trailing-periods-5": {
+			Markdown:     `http://example.com/foo.bar`,
+			ExpectedHTML: `<p><a href="http://example.com/foo.bar">http://example.com/foo.bar</a></p>`,
+		},
+		"plt-7250-link-with-trailing-periods-6": {
+			Markdown:     `http://example.com/foo...bar`,
+			ExpectedHTML: `<p><a href="http://example.com/foo...bar">http://example.com/foo...bar</a></p>`,
+		},
+
+		"rn-319-www-link-as-part-of-word-1": {
+			Markdown:     `testwww.example.com`,
+			ExpectedHTML: `<p>testwww.example.com</p>`,
+		},
+
+		"mm-10180-link-containing-period-followed-by-non-letter-1": {
+			Markdown:     `https://example.com/123.+Pagetitle`,
+			ExpectedHTML: `<p><a href="https://example.com/123.+Pagetitle">https://example.com/123.+Pagetitle</a></p>`,
+		},
+		"mm-10180-link-containing-period-followed-by-non-letter-2": {
+			Markdown:     `https://example.com/123.?Pagetitle`,
+			ExpectedHTML: `<p><a href="https://example.com/123.?Pagetitle">https://example.com/123.?Pagetitle</a></p>`,
+		},
+		"mm-10180-link-containing-period-followed-by-non-letter-3": {
+			Markdown:     `https://example.com/123.-Pagetitle`,
+			ExpectedHTML: `<p><a href="https://example.com/123.-Pagetitle">https://example.com/123.-Pagetitle</a></p>`,
+		},
+		"mm-10180-link-containing-period-followed-by-non-letter-4": {
+			Markdown:     `https://example.com/123._Pagetitle`,
+			ExpectedHTML: `<p><a href="https://example.com/123._Pagetitle">https://example.com/123._Pagetitle</a></p>`,
+		},
+		"mm-10180-link-containing-period-followed-by-non-letter-5": {
+			Markdown:     `https://example.com/123.+`,
+			ExpectedHTML: `<p><a href="https://example.com/123.+">https://example.com/123.+</a></p>`,
+		},
+		"mm-10180-link-containing-period-followed-by-non-letter-6": {
+			Markdown:     `https://example.com/123.?`,
+			ExpectedHTML: `<p><a href="https://example.com/123">https://example.com/123</a>.?</p>`,
+		},
+		"mm-10180-link-containing-period-followed-by-non-letter-7": {
+			Markdown:     `https://example.com/123.-`,
+			ExpectedHTML: `<p><a href="https://example.com/123.-">https://example.com/123.-</a></p>`,
+		},
+		"mm-10180-link-containing-period-followed-by-non-letter-8": {
+			Markdown:     `https://example.com/123._`,
+			ExpectedHTML: `<p><a href="https://example.com/123">https://example.com/123</a>._</p>`,
+		},
+	} {
+		t.Run(name, func(t *testing.T) {
+			assert.Equal(t, tc.ExpectedHTML, RenderHTML(tc.Markdown))
+		})
+	}
+}
diff --git a/utils/markdown/commonmark_test.go b/utils/markdown/commonmark_test.go
index 0a0959030..13e61f52d 100644
--- a/utils/markdown/commonmark_test.go
+++ b/utils/markdown/commonmark_test.go
@@ -999,3 +999,46 @@ func TestCommonMarkReferenceStrings(t *testing.T) {
 		})
 	}
 }
+
+func TestCommonMarkRefernceAutolinks(t *testing.T) {
+	// These tests are adapted from the GitHub-flavoured CommonMark extension tests located at
+	// https://github.com/github/cmark/blob/master/test/extensions.txt
+	for name, tc := range map[string]struct {
+		Markdown     string
+		ExpectedHTML string
+	}{
+		"autolinks-1": {
+			Markdown: `: http://google.com https://google.com
+
+http://google.com/å
+
+www.github.com www.github.com/á
+
+www.google.com/a_b
+
+![http://inline.com/image](http://inline.com/image)
+
+Full stop outside parens shouldn't be included http://google.com/ok.
+
+(Full stop inside parens shouldn't be included http://google.com/ok.)
+
+"http://google.com"
+
+'http://google.com'
+
+http://🍄.ga/ http://x🍄.ga/`,
+			ExpectedHTML: `<p>: <a href="http://google.com">http://google.com</a> <a href="https://google.com">https://google.com</a></p><p><a href="http://google.com/%C3%A5">http://google.com/å</a></p><p><a href="http://www.github.com">www.github.com</a> <a href="http://www.github.com/%C3%A1">www.github.com/á</a></p><p><a href="http://www.google.com/a_b">www.google.com/a_b</a></p><p><img src="http://inline.com/image" alt="http://inline.com/image" /></p><p>Full stop outside parens shouldn't be included <a href="http://google.com/ok">http://google.com/ok</a>.</p><p>(Full stop inside parens shouldn't be included <a href="http://google.com/ok">http://google.com/ok</a>.)</p><p>&quot;<a href="http://google.com">http://google.com</a>&quot;</p><p>'<a href="http://google.com">http://google.com</a>'</p><p><a href="http://%F0%9F%8D%84.ga/">http://🍄.ga/</a> <a href="http://x%F0%9F%8D%84.ga/">http://x🍄.ga/</a></p>`,
+		},
+		"autolinks-2": {
+			Markdown: `These should not link:
+
+* @a.b.c@. x
+* n@.  b`,
+			ExpectedHTML: `<p>These should not link:</p><ul><li>@a.b.c@. x</li><li>n@.  b</li></ul>`,
+		},
+	} {
+		t.Run(name, func(t *testing.T) {
+			assert.Equal(t, tc.ExpectedHTML, RenderHTML(tc.Markdown))
+		})
+	}
+}
diff --git a/utils/markdown/html.go b/utils/markdown/html.go
index 8d8e02c55..1a857afed 100644
--- a/utils/markdown/html.go
+++ b/utils/markdown/html.go
@@ -156,6 +156,12 @@ func RenderInlineHTML(inline Inline) (result string) {
 			result += RenderInlineHTML(inline)
 		}
 		result += "</a>"
+	case *Autolink:
+		result += `<a href="` + htmlEscaper.Replace(escapeURL(v.Link)) + `">`
+		for _, inline := range v.Children {
+			result += RenderInlineHTML(inline)
+		}
+		result += "</a>"
 	default:
 		panic(fmt.Sprintf("missing case for type %T", v))
 	}
diff --git a/utils/markdown/inlines.go b/utils/markdown/inlines.go
index 9198435ee..e6943a57d 100644
--- a/utils/markdown/inlines.go
+++ b/utils/markdown/inlines.go
@@ -81,6 +81,14 @@ type ReferenceImage struct {
 	ReferenceLinkOrImage
 }
 
+type Autolink struct {
+	inlineBase
+
+	Children []Inline
+
+	Link string
+}
+
 type delimiterType int
 
 const (
@@ -182,7 +190,7 @@ func (p *inlineParser) parseEscapeCharacter() {
 }
 
 func (p *inlineParser) parseText() {
-	if next := strings.IndexAny(p.raw[p.position:], "\r\n\\`&![]"); next == -1 {
+	if next := strings.IndexAny(p.raw[p.position:], "\r\n\\`&![]wW:"); next == -1 {
 		absPos := relativeToAbsolutePosition(p.ranges, p.position)
 		p.inlines = append(p.inlines, &Text{
 			Text:  strings.TrimRightFunc(p.raw[p.position:], isWhitespace),
@@ -198,6 +206,12 @@ func (p *inlineParser) parseText() {
 				Range: Range{absPos, absPos + len(s)},
 			})
 		} else {
+			if next == 0 {
+				// Always read at least one character since 'w', 'W', and ':' may not actually match another
+				// type of node
+				next = 1
+			}
+
 			p.inlines = append(p.inlines, &Text{
 				Text:  p.raw[p.position : p.position+next],
 				Range: Range{absPos, absPos + next},
@@ -443,6 +457,60 @@ func (p *inlineParser) parseCharacterReference() {
 	}
 }
 
+func (p *inlineParser) parseAutolink(c rune) bool {
+	for element := p.delimiterStack.Back(); element != nil; element = element.Prev() {
+		d := element.Value.(*delimiter)
+		if !d.IsInactive {
+			return false
+		}
+	}
+
+	link := ""
+	text := ""
+	if c == ':' {
+		text = parseURLAutolink(p.raw, p.position)
+		link = text
+
+		// Since the current position is at the colon, we have to rewind the parsing slightly so that
+		// we don't duplicate the URL scheme
+		rewind := strings.Index(text, ":")
+		if rewind != -1 {
+			lastInline := p.inlines[len(p.inlines)-1]
+			lastText, ok := lastInline.(*Text)
+
+			if !ok {
+				// This should never occur since parseURLAutolink will only return a non-empty value
+				// when the previous text ends in a valid URL protocol which would mean that the previous
+				// node is a Text node
+				return false
+			}
+
+			p.inlines = p.inlines[0 : len(p.inlines)-1]
+			p.inlines = append(p.inlines, &Text{
+				Text:  lastText.Text[:len(lastText.Text)-rewind],
+				Range: Range{lastText.Range.Position, lastText.Range.End - rewind},
+			})
+			p.position -= rewind
+
+		}
+	} else if c == 'w' {
+		text = parseWWWAutolink(p.raw, p.position)
+		link = "http://" + text
+	}
+
+	if text == "" {
+		return false
+	}
+
+	p.inlines = append(p.inlines, &Autolink{
+		Link:     link,
+		Children: []Inline{&Text{Text: text}},
+	})
+	p.position += len(text)
+
+	return true
+}
+
 func (p *inlineParser) Parse() []Inline {
 	for _, r := range p.ranges {
 		p.raw += p.markdown[r.Position:r.End]
@@ -464,6 +532,12 @@ func (p *inlineParser) Parse() []Inline {
 			p.parseLinkOrImageDelimiter()
 		case ']':
 			p.lookForLinkOrImage()
+		case 'w', 'W', ':':
+			matched := p.parseAutolink(c)
+
+			if !matched {
+				p.parseText()
+			}
 		default:
 			p.parseText()
 		}
diff --git a/utils/markdown/markdown.go b/utils/markdown/markdown.go
index 3061ba4bb..e0788d906 100644
--- a/utils/markdown/markdown.go
+++ b/utils/markdown/markdown.go
@@ -40,6 +40,14 @@ func isHexByte(c byte) bool {
 	return isHex(rune(c))
 }
 
+func isAlphanumeric(c rune) bool {
+	return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
+}
+
+func isAlphanumericByte(c byte) bool {
+	return isAlphanumeric(rune(c))
+}
+
 func nextNonWhitespace(markdown string, position int) int {
 	for offset, c := range []byte(markdown[position:]) {
 		if !isWhitespaceByte(c) {
author	Harrison Healey <harrisonmhealey@gmail.com>	2018-07-24 10:40:01 -0400
committer	Christopher Speller <crspeller@gmail.com>	2018-07-24 07:40:01 -0700
commit	c8d3e421391520af45d8e0f60a884fee139652a0 (patch)
tree	963d24f65d75cd413ba539779935223b165ae938 /utils
parent	bfb2640451c95b242ae7b8bb8528538b706412ef (diff)
download	chat-c8d3e421391520af45d8e0f60a884fee139652a0.tar.gz chat-c8d3e421391520af45d8e0f60a884fee139652a0.tar.bz2 chat-c8d3e421391520af45d8e0f60a884fee139652a0.zip