From 2abcc25173ef40b29cdde28856a3f5c9234056d3 Mon Sep 17 00:00:00 2001 From: Hyeseong Kim Date: Thu, 24 Nov 2016 23:38:56 +0900 Subject: PLT-2077 Support CJK hashtags (#4555) * Add Korean character ranges to exist CJK pattern * Add constant for CJK hashtags Becuase most of keywords in CJK are two characters * Add CJK ranges to hashtag pattern to handle it. * Fixes hashtag pattern to apply numbers at last * Remove a wrong test case `test_` shouldn't be a hashtag * Fix hashtag regex to support standard unicodes * Remove wrong escapes from regex --- model/utils.go | 2 +- webapp/tests/formatting_hashtags.test.jsx | 6 ------ webapp/utils/text_formatting.jsx | 4 ++-- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/model/utils.go b/model/utils.go index 457b64c09..0ce243fe7 100644 --- a/model/utils.go +++ b/model/utils.go @@ -304,7 +304,7 @@ func Etag(parts ...interface{}) string { return etag } -var validHashtag = regexp.MustCompile(`^(#[A-Za-zäöüÄÖÜß]+[A-Za-z0-9äöüÄÖÜß_\-]*[A-Za-z0-9äöüÄÖÜß])$`) +var validHashtag = regexp.MustCompile(`^(#\pL[\pL\d\-_.]*[\pL\d])$`) var puncStart = regexp.MustCompile(`^[^\pL\d\s#]+`) var hashtagStart = regexp.MustCompile(`^#{2,}`) var puncEnd = regexp.MustCompile(`[^\pL\d\s]+$`) diff --git a/webapp/tests/formatting_hashtags.test.jsx b/webapp/tests/formatting_hashtags.test.jsx index 37b84a4a8..1c7de1541 100644 --- a/webapp/tests/formatting_hashtags.test.jsx +++ b/webapp/tests/formatting_hashtags.test.jsx @@ -73,12 +73,6 @@ describe('TextFormatting.Hashtags', function() { "

#test.

" ); - // Known issue, trailing underscore is captured by the clientside regex but not the serverside one - assert.equal( - TextFormatting.formatText('#test_').trim(), - "

#test_

" - ); - assert.equal( TextFormatting.formatText('This is a sentence #test containing a hashtag').trim(), "

This is a sentence #test containing a hashtag

" diff --git a/webapp/utils/text_formatting.jsx b/webapp/utils/text_formatting.jsx index 9c2edf954..9f983a1ee 100644 --- a/webapp/utils/text_formatting.jsx +++ b/webapp/utils/text_formatting.jsx @@ -11,7 +11,7 @@ import XRegExp from 'xregexp'; // pattern to detect the existance of a Chinese, Japanese, or Korean character in a string // http://stackoverflow.com/questions/15033196/using-javascript-to-check-whether-a-string-contains-japanese-characters-includi -const cjkPattern = /[\u3000-\u303f\u3040-\u309f\u30a0-\u30ff\uff00-\uff9f\u4e00-\u9faf\u3400-\u4dbf]/; +const cjkPattern = /[\u3000-\u303f\u3040-\u309f\u30a0-\u30ff\uff00-\uff9f\u4e00-\u9faf\u3400-\u4dbf\uac00-\ud7a3]/; // Performs formatting of user posts including highlighting mentions and search terms and converting urls, hashtags, // @mentions and ~channels to links by taking a user's message and returning a string of formatted html. Also takes @@ -342,7 +342,7 @@ function autolinkHashtags(text, tokens) { return prefix + alias; } - return output.replace(/(^|\W)(#[a-zA-ZäöüÄÖÜß][a-zA-Z0-9äöüÄÖÜß.\-_]*)\b/g, replaceHashtagWithToken); + return output.replace(XRegExp.cache('(^|\\W)(#\\pL[\\pL\\d\\-_.]*[\\pL\\d])', 'g'), replaceHashtagWithToken); } const puncStart = XRegExp.cache('^[^\\pL\\d\\s#]+'); -- cgit v1.2.3-1-g7c22