updated documentation for the ALLOW_UNICODE_SLUGS setting

author: Evgeny Fadeev <evgeny.fadeev@gmail.com> 2012-07-22 22:20:52 -0400
committer: Evgeny Fadeev <evgeny.fadeev@gmail.com> 2012-07-22 22:20:52 -0400
commit: b459613a068f73afd6a07384fd326e93ebb1ed4e (patch)
tree: 4b59a7b50409c0e587af899d695ed4636122eb7f /askbot/utils
parent: 54150a2c7c8609ad6f39d31dfc69b03da523eba7 (diff)
download: askbot-b459613a068f73afd6a07384fd326e93ebb1ed4e.tar.gz
askbot-b459613a068f73afd6a07384fd326e93ebb1ed4e.tar.bz2
askbot-b459613a068f73afd6a07384fd326e93ebb1ed4e.zip
2 files changed, 53 insertions, 20 deletions
diff --git a/askbot/utils/forms.py b/askbot/utils/forms.py
index b8ed253b..ee7adf7e 100644
--- a/askbot/utils/forms.py
+++ b/askbot/utils/forms.py
@@ -108,7 +108,7 @@ class UserNameField(StrippedNonEmptyCharField):
             raise forms.ValidationError(self.error_messages['invalid'])
         if username in self.RESERVED_NAMES:
             raise forms.ValidationError(self.error_messages['forbidden'])
-        if slugify(username, force_unidecode = True) == '':
+        if slugify(username) == '':
             raise forms.ValidationError(self.error_messages['meaningless'])
         try:
             user = self.db_model.objects.get(
diff --git a/askbot/utils/slug.py b/askbot/utils/slug.py
index 58f228da..f9e30cbf 100644
--- a/askbot/utils/slug.py
+++ b/askbot/utils/slug.py
@@ -5,30 +5,63 @@ the setting was added just in case - if people actually
 want to see unicode characters in the slug. If this is the choice
 slug will be simply equal to the input text
 """
+import re
+import unicodedata
 from unidecode import unidecode
-from django.template import defaultfilters
+
 from django.conf import settings
-import re
+from django.template import defaultfilters
+from django.utils.encoding import smart_unicode
+
+
+# Extra characters outside of alphanumerics that we'll allow.
+SLUG_OK = '-_~'
+
 
-def slugify(input_text, max_length=50, force_unidecode = False):
+def unicode_slugify(s, ok=SLUG_OK, lower=True, spaces=False):
+    """Function copied from https://github.com/mozilla/unicode-slugify
+    because the author of the package never published it on pypi.
+
+    Copyright notice below applies just to this function
+    Copyright (c) 2011, Mozilla Foundation
+    All rights reserved.
+
+    L and N signify letter/number.
+    http://www.unicode.org/reports/tr44/tr44-4.html#GC_Values_Table
+    """
+    rv = []
+    for c in unicodedata.normalize('NFKC', smart_unicode(s)):
+        cat = unicodedata.category(c)[0]
+        if cat in 'LN' or c in ok:
+            rv.append(c)
+        if cat == 'Z':  # space
+            rv.append(' ')
+    new = ''.join(rv).strip()
+    if not spaces:
+        new = re.sub('[-\s]+', '-', new)
+    return new.lower() if lower else new
+
+
+def slugify(input_text, max_length=150):
     """custom slugify function that
     removes diacritic modifiers from the characters
     """
+    if input_text == '':
+        return input_text
+
     allow_unicode_slugs = getattr(settings, 'ALLOW_UNICODE_SLUGS', False)
-    if allow_unicode_slugs == False or force_unidecode == True:
-        if input_text == '':
-            return input_text
-        slug = defaultfilters.slugify(unidecode(input_text))
-        while len(slug) > max_length:
-            # try to shorten word by word until len(slug) <= max_length
-            temp = slug[:slug.rfind('-')]
-            if len(temp) > 0:
-                slug = temp
-            else:
-                #we have nothing left, do not apply the last crop,
-                #apply the cut-off directly
-                slug = slug[:max_length]
-                break
-        return slug
+    if allow_unicode_slugs:
+        slug = unicode_slugify(input_text)
     else:
-        return re.sub(r'\s+', '-', input_text.strip().lower())
+        slug = defaultfilters.slugify(unidecode(input_text))
+    while len(slug) > max_length:
+        # try to shorten word by word until len(slug) <= max_length
+        temp = slug[:slug.rfind('-')]
+        if len(temp) > 0:
+            slug = temp
+        else:
+            #we have nothing left, do not apply the last crop,
+            #apply the cut-off directly
+            slug = slug[:max_length]
+            break
+    return slug
author	Evgeny Fadeev <evgeny.fadeev@gmail.com>	2012-07-22 22:20:52 -0400
committer	Evgeny Fadeev <evgeny.fadeev@gmail.com>	2012-07-22 22:20:52 -0400
commit	b459613a068f73afd6a07384fd326e93ebb1ed4e (patch)
tree	4b59a7b50409c0e587af899d695ed4636122eb7f /askbot/utils
parent	54150a2c7c8609ad6f39d31dfc69b03da523eba7 (diff)
download	askbot-b459613a068f73afd6a07384fd326e93ebb1ed4e.tar.gz askbot-b459613a068f73afd6a07384fd326e93ebb1ed4e.tar.bz2 askbot-b459613a068f73afd6a07384fd326e93ebb1ed4e.zip