summaryrefslogtreecommitdiffstats
path: root/askbot/utils
diff options
context:
space:
mode:
authorEvgeny Fadeev <evgeny.fadeev@gmail.com>2012-07-22 22:20:52 -0400
committerEvgeny Fadeev <evgeny.fadeev@gmail.com>2012-07-22 22:20:52 -0400
commitb459613a068f73afd6a07384fd326e93ebb1ed4e (patch)
tree4b59a7b50409c0e587af899d695ed4636122eb7f /askbot/utils
parent54150a2c7c8609ad6f39d31dfc69b03da523eba7 (diff)
downloadaskbot-b459613a068f73afd6a07384fd326e93ebb1ed4e.tar.gz
askbot-b459613a068f73afd6a07384fd326e93ebb1ed4e.tar.bz2
askbot-b459613a068f73afd6a07384fd326e93ebb1ed4e.zip
updated documentation for the ALLOW_UNICODE_SLUGS setting
Diffstat (limited to 'askbot/utils')
-rw-r--r--askbot/utils/forms.py2
-rw-r--r--askbot/utils/slug.py71
2 files changed, 53 insertions, 20 deletions
diff --git a/askbot/utils/forms.py b/askbot/utils/forms.py
index b8ed253b..ee7adf7e 100644
--- a/askbot/utils/forms.py
+++ b/askbot/utils/forms.py
@@ -108,7 +108,7 @@ class UserNameField(StrippedNonEmptyCharField):
raise forms.ValidationError(self.error_messages['invalid'])
if username in self.RESERVED_NAMES:
raise forms.ValidationError(self.error_messages['forbidden'])
- if slugify(username, force_unidecode = True) == '':
+ if slugify(username) == '':
raise forms.ValidationError(self.error_messages['meaningless'])
try:
user = self.db_model.objects.get(
diff --git a/askbot/utils/slug.py b/askbot/utils/slug.py
index 58f228da..f9e30cbf 100644
--- a/askbot/utils/slug.py
+++ b/askbot/utils/slug.py
@@ -5,30 +5,63 @@ the setting was added just in case - if people actually
want to see unicode characters in the slug. If this is the choice
slug will be simply equal to the input text
"""
+import re
+import unicodedata
from unidecode import unidecode
-from django.template import defaultfilters
+
from django.conf import settings
-import re
+from django.template import defaultfilters
+from django.utils.encoding import smart_unicode
+
+
+# Extra characters outside of alphanumerics that we'll allow.
+SLUG_OK = '-_~'
+
-def slugify(input_text, max_length=50, force_unidecode = False):
+def unicode_slugify(s, ok=SLUG_OK, lower=True, spaces=False):
+ """Function copied from https://github.com/mozilla/unicode-slugify
+ because the author of the package never published it on pypi.
+
+ Copyright notice below applies just to this function
+ Copyright (c) 2011, Mozilla Foundation
+ All rights reserved.
+
+ L and N signify letter/number.
+ http://www.unicode.org/reports/tr44/tr44-4.html#GC_Values_Table
+ """
+ rv = []
+ for c in unicodedata.normalize('NFKC', smart_unicode(s)):
+ cat = unicodedata.category(c)[0]
+ if cat in 'LN' or c in ok:
+ rv.append(c)
+ if cat == 'Z': # space
+ rv.append(' ')
+ new = ''.join(rv).strip()
+ if not spaces:
+ new = re.sub('[-\s]+', '-', new)
+ return new.lower() if lower else new
+
+
+def slugify(input_text, max_length=150):
"""custom slugify function that
removes diacritic modifiers from the characters
"""
+ if input_text == '':
+ return input_text
+
allow_unicode_slugs = getattr(settings, 'ALLOW_UNICODE_SLUGS', False)
- if allow_unicode_slugs == False or force_unidecode == True:
- if input_text == '':
- return input_text
- slug = defaultfilters.slugify(unidecode(input_text))
- while len(slug) > max_length:
- # try to shorten word by word until len(slug) <= max_length
- temp = slug[:slug.rfind('-')]
- if len(temp) > 0:
- slug = temp
- else:
- #we have nothing left, do not apply the last crop,
- #apply the cut-off directly
- slug = slug[:max_length]
- break
- return slug
+ if allow_unicode_slugs:
+ slug = unicode_slugify(input_text)
else:
- return re.sub(r'\s+', '-', input_text.strip().lower())
+ slug = defaultfilters.slugify(unidecode(input_text))
+ while len(slug) > max_length:
+ # try to shorten word by word until len(slug) <= max_length
+ temp = slug[:slug.rfind('-')]
+ if len(temp) > 0:
+ slug = temp
+ else:
+ #we have nothing left, do not apply the last crop,
+ #apply the cut-off directly
+ slug = slug[:max_length]
+ break
+ return slug