summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHyeseong Kim <cometkim.kr@gmail.com>2016-11-24 23:38:56 +0900
committerJoram Wilander <jwawilander@gmail.com>2016-11-24 09:38:56 -0500
commit2abcc25173ef40b29cdde28856a3f5c9234056d3 (patch)
tree1a94ee38e5f3827e9fed61c4e70e2f0be57f7708
parent981ea33b8e10456bc279f36235c814305d01b243 (diff)
downloadchat-2abcc25173ef40b29cdde28856a3f5c9234056d3.tar.gz
chat-2abcc25173ef40b29cdde28856a3f5c9234056d3.tar.bz2
chat-2abcc25173ef40b29cdde28856a3f5c9234056d3.zip
PLT-2077 Support CJK hashtags (#4555)
* Add Korean character ranges to exist CJK pattern * Add constant for CJK hashtags Becuase most of keywords in CJK are two characters * Add CJK ranges to hashtag pattern to handle it. * Fixes hashtag pattern to apply numbers at last * Remove a wrong test case `test_` shouldn't be a hashtag * Fix hashtag regex to support standard unicodes * Remove wrong escapes from regex
-rw-r--r--model/utils.go2
-rw-r--r--webapp/tests/formatting_hashtags.test.jsx6
-rw-r--r--webapp/utils/text_formatting.jsx4
3 files changed, 3 insertions, 9 deletions
diff --git a/model/utils.go b/model/utils.go
index 457b64c09..0ce243fe7 100644
--- a/model/utils.go
+++ b/model/utils.go
@@ -304,7 +304,7 @@ func Etag(parts ...interface{}) string {
return etag
}
-var validHashtag = regexp.MustCompile(`^(#[A-Za-zäöüÄÖÜß]+[A-Za-z0-9äöüÄÖÜß_\-]*[A-Za-z0-9äöüÄÖÜß])$`)
+var validHashtag = regexp.MustCompile(`^(#\pL[\pL\d\-_.]*[\pL\d])$`)
var puncStart = regexp.MustCompile(`^[^\pL\d\s#]+`)
var hashtagStart = regexp.MustCompile(`^#{2,}`)
var puncEnd = regexp.MustCompile(`[^\pL\d\s]+$`)
diff --git a/webapp/tests/formatting_hashtags.test.jsx b/webapp/tests/formatting_hashtags.test.jsx
index 37b84a4a8..1c7de1541 100644
--- a/webapp/tests/formatting_hashtags.test.jsx
+++ b/webapp/tests/formatting_hashtags.test.jsx
@@ -73,12 +73,6 @@ describe('TextFormatting.Hashtags', function() {
"<p><a class='mention-link' href='#' data-hashtag='#test'>#test</a>.</p>"
);
- // Known issue, trailing underscore is captured by the clientside regex but not the serverside one
- assert.equal(
- TextFormatting.formatText('#test_').trim(),
- "<p><a class='mention-link' href='#' data-hashtag='#test_'>#test_</a></p>"
- );
-
assert.equal(
TextFormatting.formatText('This is a sentence #test containing a hashtag').trim(),
"<p>This is a sentence <a class='mention-link' href='#' data-hashtag='#test'>#test</a> containing a hashtag</p>"
diff --git a/webapp/utils/text_formatting.jsx b/webapp/utils/text_formatting.jsx
index 9c2edf954..9f983a1ee 100644
--- a/webapp/utils/text_formatting.jsx
+++ b/webapp/utils/text_formatting.jsx
@@ -11,7 +11,7 @@ import XRegExp from 'xregexp';
// pattern to detect the existance of a Chinese, Japanese, or Korean character in a string
// http://stackoverflow.com/questions/15033196/using-javascript-to-check-whether-a-string-contains-japanese-characters-includi
-const cjkPattern = /[\u3000-\u303f\u3040-\u309f\u30a0-\u30ff\uff00-\uff9f\u4e00-\u9faf\u3400-\u4dbf]/;
+const cjkPattern = /[\u3000-\u303f\u3040-\u309f\u30a0-\u30ff\uff00-\uff9f\u4e00-\u9faf\u3400-\u4dbf\uac00-\ud7a3]/;
// Performs formatting of user posts including highlighting mentions and search terms and converting urls, hashtags,
// @mentions and ~channels to links by taking a user's message and returning a string of formatted html. Also takes
@@ -342,7 +342,7 @@ function autolinkHashtags(text, tokens) {
return prefix + alias;
}
- return output.replace(/(^|\W)(#[a-zA-ZäöüÄÖÜß][a-zA-Z0-9äöüÄÖÜß.\-_]*)\b/g, replaceHashtagWithToken);
+ return output.replace(XRegExp.cache('(^|\\W)(#\\pL[\\pL\\d\\-_.]*[\\pL\\d])', 'g'), replaceHashtagWithToken);
}
const puncStart = XRegExp.cache('^[^\\pL\\d\\s#]+');