From e73f1d73143ebba9c7e80d21c45bba9b61f2611c Mon Sep 17 00:00:00 2001 From: Hyeseong Kim Date: Tue, 1 May 2018 01:54:11 +0900 Subject: MM-9072/MM-10185 Force-convert the encoding of OpenGraph metadata to UTF-8 (#8631) * Force-convert non-UTF8 HTML to UTF8 before opengraph processing * Split the force-encoding function * Add benchmark Test for the forceHTMLEncodingToUTF8() ``` Running tool: /home/comet/go-v1.9.2/bin/go test -benchmem -run=^$ github.com/mattermost/mattermost-server/app -bench ^BenchmarkForceHTMLEncodingToUTF8$ [03:32:58 KST 2018/04/21] [INFO] (github.com/mattermost/mattermost-server/app.TestMain:28) -test.run used, not creating temporary containers goos: linux goarch: amd64 pkg: github.com/mattermost/mattermost-server/app BenchmarkForceHTMLEncodingToUTF8/with_converting-4 100000 11201 ns/op 18704 B/op 32 allocs/op BenchmarkForceHTMLEncodingToUTF8/without_converting-4 300000 3931 ns/op 4632 B/op 13 allocs/op PASS ok github.com/mattermost/mattermost-server/app 2.703s Success: Benchmarks passed. ``` * Remove an unnecessary constraint * Add pre-check if content-type header is already utf-8 * Move the checking for utf-8 into forceHTMLEncodingToUTF8() for testing * Revert df3f347213faa0d023c26d201fa6531f46391086..HEAD, without Gopkg.lock --- app/post.go | 16 +++++++++++++++- app/post_test.go | 28 ++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) (limited to 'app') diff --git a/app/post.go b/app/post.go index 41139da63..bc31aee44 100644 --- a/app/post.go +++ b/app/post.go @@ -9,6 +9,7 @@ import ( "encoding/hex" "encoding/json" "fmt" + "io" "net/http" "net/url" "regexp" @@ -19,6 +20,7 @@ import ( "github.com/mattermost/mattermost-server/model" "github.com/mattermost/mattermost-server/store" "github.com/mattermost/mattermost-server/utils" + "golang.org/x/net/html/charset" ) var linkWithTextRegex = regexp.MustCompile(`<([^<\|]+)\|([^>]+)>`) @@ -726,7 +728,10 @@ func (a *App) GetOpenGraphMetadata(requestURL string) *opengraph.OpenGraph { } defer consumeAndClose(res) - if err := og.ProcessHTML(res.Body); err != nil { + contentType := res.Header.Get("Content-Type") + body := forceHTMLEncodingToUTF8(res.Body, contentType) + + if err := og.ProcessHTML(body); err != nil { mlog.Error(fmt.Sprintf("GetOpenGraphMetadata processing failed for url=%v with err=%v", requestURL, err.Error())) } @@ -735,6 +740,15 @@ func (a *App) GetOpenGraphMetadata(requestURL string) *opengraph.OpenGraph { return og } +func forceHTMLEncodingToUTF8(body io.Reader, contentType string) io.Reader { + r, err := charset.NewReader(body, contentType) + if err != nil { + mlog.Error(fmt.Sprintf("forceHTMLEncodingToUTF8 failed to convert for contentType=%v with err=%v", contentType, err.Error())) + return body + } + return r +} + func makeOpenGraphURLsAbsolute(og *opengraph.OpenGraph, requestURL string) { parsedRequestURL, err := url.Parse(requestURL) if err != nil { diff --git a/app/post_test.go b/app/post_test.go index 10b957751..aefc0ea35 100644 --- a/app/post_test.go +++ b/app/post_test.go @@ -297,6 +297,34 @@ func TestImageProxy(t *testing.T) { } } +func BenchmarkForceHTMLEncodingToUTF8(b *testing.B) { + HTML := ` + + + + + + + ` + ContentType := "text/html; utf-8" + + b.Run("with converting", func(b *testing.B) { + for i := 0; i < b.N; i++ { + r := forceHTMLEncodingToUTF8(strings.NewReader(HTML), ContentType) + + og := opengraph.NewOpenGraph() + og.ProcessHTML(r) + } + }) + + b.Run("without converting", func(b *testing.B) { + for i := 0; i < b.N; i++ { + og := opengraph.NewOpenGraph() + og.ProcessHTML(strings.NewReader(HTML)) + } + }) +} + func TestMakeOpenGraphURLsAbsolute(t *testing.T) { for name, tc := range map[string]struct { HTML string -- cgit v1.2.3-1-g7c22