diff options
author | Evgeny Fadeev <evgeny.fadeev@gmail.com> | 2012-07-22 22:20:52 -0400 |
---|---|---|
committer | Evgeny Fadeev <evgeny.fadeev@gmail.com> | 2012-07-22 22:20:52 -0400 |
commit | b459613a068f73afd6a07384fd326e93ebb1ed4e (patch) | |
tree | 4b59a7b50409c0e587af899d695ed4636122eb7f /askbot/utils | |
parent | 54150a2c7c8609ad6f39d31dfc69b03da523eba7 (diff) | |
download | askbot-b459613a068f73afd6a07384fd326e93ebb1ed4e.tar.gz askbot-b459613a068f73afd6a07384fd326e93ebb1ed4e.tar.bz2 askbot-b459613a068f73afd6a07384fd326e93ebb1ed4e.zip |
updated documentation for the ALLOW_UNICODE_SLUGS setting
Diffstat (limited to 'askbot/utils')
-rw-r--r-- | askbot/utils/forms.py | 2 | ||||
-rw-r--r-- | askbot/utils/slug.py | 71 |
2 files changed, 53 insertions, 20 deletions
diff --git a/askbot/utils/forms.py b/askbot/utils/forms.py index b8ed253b..ee7adf7e 100644 --- a/askbot/utils/forms.py +++ b/askbot/utils/forms.py @@ -108,7 +108,7 @@ class UserNameField(StrippedNonEmptyCharField): raise forms.ValidationError(self.error_messages['invalid']) if username in self.RESERVED_NAMES: raise forms.ValidationError(self.error_messages['forbidden']) - if slugify(username, force_unidecode = True) == '': + if slugify(username) == '': raise forms.ValidationError(self.error_messages['meaningless']) try: user = self.db_model.objects.get( diff --git a/askbot/utils/slug.py b/askbot/utils/slug.py index 58f228da..f9e30cbf 100644 --- a/askbot/utils/slug.py +++ b/askbot/utils/slug.py @@ -5,30 +5,63 @@ the setting was added just in case - if people actually want to see unicode characters in the slug. If this is the choice slug will be simply equal to the input text """ +import re +import unicodedata from unidecode import unidecode -from django.template import defaultfilters + from django.conf import settings -import re +from django.template import defaultfilters +from django.utils.encoding import smart_unicode + + +# Extra characters outside of alphanumerics that we'll allow. +SLUG_OK = '-_~' + -def slugify(input_text, max_length=50, force_unidecode = False): +def unicode_slugify(s, ok=SLUG_OK, lower=True, spaces=False): + """Function copied from https://github.com/mozilla/unicode-slugify + because the author of the package never published it on pypi. + + Copyright notice below applies just to this function + Copyright (c) 2011, Mozilla Foundation + All rights reserved. + + L and N signify letter/number. + http://www.unicode.org/reports/tr44/tr44-4.html#GC_Values_Table + """ + rv = [] + for c in unicodedata.normalize('NFKC', smart_unicode(s)): + cat = unicodedata.category(c)[0] + if cat in 'LN' or c in ok: + rv.append(c) + if cat == 'Z': # space + rv.append(' ') + new = ''.join(rv).strip() + if not spaces: + new = re.sub('[-\s]+', '-', new) + return new.lower() if lower else new + + +def slugify(input_text, max_length=150): """custom slugify function that removes diacritic modifiers from the characters """ + if input_text == '': + return input_text + allow_unicode_slugs = getattr(settings, 'ALLOW_UNICODE_SLUGS', False) - if allow_unicode_slugs == False or force_unidecode == True: - if input_text == '': - return input_text - slug = defaultfilters.slugify(unidecode(input_text)) - while len(slug) > max_length: - # try to shorten word by word until len(slug) <= max_length - temp = slug[:slug.rfind('-')] - if len(temp) > 0: - slug = temp - else: - #we have nothing left, do not apply the last crop, - #apply the cut-off directly - slug = slug[:max_length] - break - return slug + if allow_unicode_slugs: + slug = unicode_slugify(input_text) else: - return re.sub(r'\s+', '-', input_text.strip().lower()) + slug = defaultfilters.slugify(unidecode(input_text)) + while len(slug) > max_length: + # try to shorten word by word until len(slug) <= max_length + temp = slug[:slug.rfind('-')] + if len(temp) > 0: + slug = temp + else: + #we have nothing left, do not apply the last crop, + #apply the cut-off directly + slug = slug[:max_length] + break + return slug |