user import from zendesk kinda works

author: Evgeny Fadeev <evgeny.fadeev@gmail.com> 2011-06-14 02:06:45 -0400
committer: Evgeny Fadeev <evgeny.fadeev@gmail.com> 2011-06-14 02:06:45 -0400
commit: 41cd8bd6165669eacef20215b9ae671d55111d5d (patch)
tree: ba8ce48880bf92440bab27b1a0c0530071aebefd
parent: 6cb91750ff7eb7fc2e9234a1d027e0ebe047f3e0 (diff)
download: askbot-41cd8bd6165669eacef20215b9ae671d55111d5d.tar.gz
askbot-41cd8bd6165669eacef20215b9ae671d55111d5d.tar.bz2
askbot-41cd8bd6165669eacef20215b9ae671d55111d5d.zip
3 files changed, 182 insertions, 7 deletions
diff --git a/askbot/management/commands/import_zendesk.py b/askbot/management/commands/import_zendesk.py
new file mode 100644
index 00000000..c4916f72
--- /dev/null
+++ b/askbot/management/commands/import_zendesk.py
@@ -0,0 +1,133 @@
+"""importer from cnprog, please note, that you need an exporter in the first place
+to use this command.
+If you are interested to use it - please ask Evgeny <evgeny.fadeev@gmail.com>
+"""
+import os
+import re
+import sys
+import tarfile
+import tempfile
+from datetime import datetime, date
+from django.core.management.base import BaseCommand, CommandError
+from django.db import transaction
+from lxml import etree
+from askbot import models
+from askbot.utils import console
+from askbot.utils.html import unescape
+
+#a hack, did not know how to parse timezone offset
+ZERO_TIME = datetime.strptime('00:00', '%H:%M')
+
+def get_unique_username(name_seed):
+    """returns unique user name, by modifying the
+    name if the same name exists in the database
+    until the modified name is unique
+    """
+    original_name = name_seed
+    attempt_no = 1
+    while True:
+        try:
+            models.User.objects.get(username = name_seed)
+            name_seed = original_name + str(attempt_no)
+            attempt_no += 1
+        except models.User.DoesNotExist:
+            return name_seed
+
+def get_val(elem, field_name):
+    field = elem.find(field_name)
+    try:
+        field_type = field.attrib['type']
+    except KeyError:
+        field_type = ''
+    raw_val = field.text
+    if field_type == 'boolean':
+        if raw_val == 'true':
+            return True
+        elif raw_val == 'false':
+            return False
+        else:
+            raise ValueError('"true" or "false" expected, found "%s"' % raw_val)
+    elif field_type.endswith('integer'):
+        return int(raw_val)
+    elif field_type == 'datetime':
+        raw_datetime = raw_val[:19]
+        tzoffset_sign = raw_val[19]
+        raw_tzoffset = raw_val[20:]
+        if raw_val:
+            dt = datetime.strptime(raw_datetime, '%Y-%m-%dT%H:%M:%S')
+            tzoffset_amt = datetime.strptime(raw_tzoffset, '%H:%M')
+            tzoffset = tzoffset_amt - ZERO_TIME
+            if tzoffset_sign == '-':
+                return dt - tzoffset
+            else:
+                return dt + tzoffset
+        else:
+            return None
+    else:
+        if raw_val:
+            return raw_val
+        else:
+            return ''
+
+class Command(BaseCommand):
+    def handle(self, *args, **kwargs):
+        if len(args) != 1:
+            raise CommandError('please provide path to tarred and gzipped cnprog dump')
+
+        self.tar = tarfile.open(args[0], 'r:gz')
+        
+        sys.stdout.write("Importing user accounts: ")
+        self.import_users()
+        #self.import_openid_associations()
+        #self.import_email_settings()
+
+        #self.import_question_edits()
+        #self.import_answer_edits()
+
+        #self.import_question_data()
+        #self.import_answer_data()
+
+        #self.import_comments()
+
+        #self.import_question_views()
+        #self.import_favorite_questions()
+        #self.import_marked_tags()
+
+        #self.import_votes()
+
+    def get_file(self, file_name):
+        first_item = self.tar.getnames()[0]
+        file_path = file_name
+        if not first_item.endswith('.xml'):
+            file_path = os.path.join(first_item, file_path)
+            
+        file_info = self.tar.getmember(file_path)
+        xml_file = self.tar.extractfile(file_info)
+        return etree.parse(xml_file)
+
+    @transaction.commit_manually
+    def import_users(self):
+        xml = self.get_file('users.xml')
+        added_users = 0
+        for user in xml.findall('user'):
+            #a whole bunch of fields are actually dropped now
+            #see what's available in users.xml meanings of some
+            #values there is not clear
+
+            #special treatment for the user name
+            username = unescape(get_val(user, 'name'))#unescape html entities
+            username = get_unique_username(username)
+
+            ab_user = models.User(
+                email = get_val(user, 'email'),
+                email_isvalid = get_val(user, 'is-verified'),
+                date_joined = get_val(user, 'created-at'),
+                username = username,
+                is_active = get_val(user, 'is-active'),
+            )
+            ab_user.save()
+            added_users += 1
+            console.print_action(ab_user.username)
+            transaction.commit()
+        console.print_action('%d users added' % added_users, nowipe = True)
+        transaction.commit()
diff --git a/askbot/utils/console.py b/askbot/utils/console.py
index 041fc839..470856b5 100644
--- a/askbot/utils/console.py
+++ b/askbot/utils/console.py
@@ -49,13 +49,28 @@ def open_new_file(prompt_phrase, extension = '', hint = None):
 
     return file_object
 
-def print_progress(format_string, progress):
+def print_action(action_text, nowipe = False):
+    """print the string to the standard output
+    then wipe it out to clear space
+    """
+    #for some reason sys.stdout.write does not work here
+    #when action text is unicode
+    print action_text,
+    sys.stdout.flush()
+    if nowipe == False:
+        #return to the beginning of the word
+        sys.stdout.write('\b' * len(action_text))
+        #white out the printed text
+        sys.stdout.write(' ' * len(action_text))
+        #return again
+        sys.stdout.write('\b' * len(action_text))
+    else:
+        sys.stdout.write('\n')
+
+def print_progress(elapsed, total, nowipe = False):
     """print dynamic output of progress of some
-    operation to the console and clear the output with
+    operation, in percent, to the console and clear the output with
     a backspace character to have the number increment
     in-place"""
-    output = format_string % progress
-    sys.stdout.write(output)
-    sys.stdout.flush()
-    sys.stdout.write('\b' * len(output))
-    
+    output = '%6.2f%%' % 100 * float(elapsed)/float(total)
+    print_action(output, nowipe)
diff --git a/askbot/utils/html.py b/askbot/utils/html.py
index aa8e24d8..f6c168fb 100644
--- a/askbot/utils/html.py
+++ b/askbot/utils/html.py
@@ -1,6 +1,7 @@
 """Utilities for working with HTML."""
 import html5lib
 from html5lib import sanitizer, serializer, tokenizer, treebuilders, treewalkers
+import re, htmlentitydefs
 
 class HTMLSanitizerMixin(sanitizer.HTMLSanitizerMixin):
     acceptable_elements = ('a', 'abbr', 'acronym', 'address', 'b', 'big',
@@ -49,3 +50,29 @@ def sanitize_html(html):
                                   quote_attr_values=True)
     output_generator = s.serialize(stream)
     return u''.join(output_generator)
+
+def unescape(text):
+    """source: http://effbot.org/zone/re-sub.htm#unescape-html
+    Removes HTML or XML character references and entities from a text string.
+    @param text The HTML (or XML) source text.
+    @return The plain text, as a Unicode string, if necessary.
+    """
+    def fixup(m):
+        text = m.group(0)
+        if text[:2] == "&#":
+            # character reference
+            try:
+                if text[:3] == "&#x":
+                    return unichr(int(text[3:-1], 16))
+                else:
+                    return unichr(int(text[2:-1]))
+            except ValueError:
+                pass
+        else:
+            # named entity
+            try:
+                text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
+            except KeyError:
+                pass
+        return text # leave as is
+    return re.sub("&#?\w+;", fixup, text)
author	Evgeny Fadeev <evgeny.fadeev@gmail.com>	2011-06-14 02:06:45 -0400
committer	Evgeny Fadeev <evgeny.fadeev@gmail.com>	2011-06-14 02:06:45 -0400
commit	41cd8bd6165669eacef20215b9ae671d55111d5d (patch)
tree	ba8ce48880bf92440bab27b1a0c0530071aebefd
parent	6cb91750ff7eb7fc2e9234a1d027e0ebe047f3e0 (diff)
download	askbot-41cd8bd6165669eacef20215b9ae671d55111d5d.tar.gz askbot-41cd8bd6165669eacef20215b9ae671d55111d5d.tar.bz2 askbot-41cd8bd6165669eacef20215b9ae671d55111d5d.zip