From e73f1d73143ebba9c7e80d21c45bba9b61f2611c Mon Sep 17 00:00:00 2001 From: Hyeseong Kim Date: Tue, 1 May 2018 01:54:11 +0900 Subject: MM-9072/MM-10185 Force-convert the encoding of OpenGraph metadata to UTF-8 (#8631) * Force-convert non-UTF8 HTML to UTF8 before opengraph processing * Split the force-encoding function * Add benchmark Test for the forceHTMLEncodingToUTF8() ``` Running tool: /home/comet/go-v1.9.2/bin/go test -benchmem -run=^$ github.com/mattermost/mattermost-server/app -bench ^BenchmarkForceHTMLEncodingToUTF8$ [03:32:58 KST 2018/04/21] [INFO] (github.com/mattermost/mattermost-server/app.TestMain:28) -test.run used, not creating temporary containers goos: linux goarch: amd64 pkg: github.com/mattermost/mattermost-server/app BenchmarkForceHTMLEncodingToUTF8/with_converting-4 100000 11201 ns/op 18704 B/op 32 allocs/op BenchmarkForceHTMLEncodingToUTF8/without_converting-4 300000 3931 ns/op 4632 B/op 13 allocs/op PASS ok github.com/mattermost/mattermost-server/app 2.703s Success: Benchmarks passed. ``` * Remove an unnecessary constraint * Add pre-check if content-type header is already utf-8 * Move the checking for utf-8 into forceHTMLEncodingToUTF8() for testing * Revert df3f347213faa0d023c26d201fa6531f46391086..HEAD, without Gopkg.lock --- app/post.go | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'app/post.go') diff --git a/app/post.go b/app/post.go index 41139da63..bc31aee44 100644 --- a/app/post.go +++ b/app/post.go @@ -9,6 +9,7 @@ import ( "encoding/hex" "encoding/json" "fmt" + "io" "net/http" "net/url" "regexp" @@ -19,6 +20,7 @@ import ( "github.com/mattermost/mattermost-server/model" "github.com/mattermost/mattermost-server/store" "github.com/mattermost/mattermost-server/utils" + "golang.org/x/net/html/charset" ) var linkWithTextRegex = regexp.MustCompile(`<([^<\|]+)\|([^>]+)>`) @@ -726,7 +728,10 @@ func (a *App) GetOpenGraphMetadata(requestURL string) *opengraph.OpenGraph { } defer consumeAndClose(res) - if err := og.ProcessHTML(res.Body); err != nil { + contentType := res.Header.Get("Content-Type") + body := forceHTMLEncodingToUTF8(res.Body, contentType) + + if err := og.ProcessHTML(body); err != nil { mlog.Error(fmt.Sprintf("GetOpenGraphMetadata processing failed for url=%v with err=%v", requestURL, err.Error())) } @@ -735,6 +740,15 @@ func (a *App) GetOpenGraphMetadata(requestURL string) *opengraph.OpenGraph { return og } +func forceHTMLEncodingToUTF8(body io.Reader, contentType string) io.Reader { + r, err := charset.NewReader(body, contentType) + if err != nil { + mlog.Error(fmt.Sprintf("forceHTMLEncodingToUTF8 failed to convert for contentType=%v with err=%v", contentType, err.Error())) + return body + } + return r +} + func makeOpenGraphURLsAbsolute(og *opengraph.OpenGraph, requestURL string) { parsedRequestURL, err := url.Parse(requestURL) if err != nil { -- cgit v1.2.3-1-g7c22