diff options
author | Evgeny Fadeev <evgeny.fadeev@gmail.com> | 2011-06-14 02:06:45 -0400 |
---|---|---|
committer | Evgeny Fadeev <evgeny.fadeev@gmail.com> | 2011-06-14 02:06:45 -0400 |
commit | 41cd8bd6165669eacef20215b9ae671d55111d5d (patch) | |
tree | ba8ce48880bf92440bab27b1a0c0530071aebefd | |
parent | 6cb91750ff7eb7fc2e9234a1d027e0ebe047f3e0 (diff) | |
download | askbot-41cd8bd6165669eacef20215b9ae671d55111d5d.tar.gz askbot-41cd8bd6165669eacef20215b9ae671d55111d5d.tar.bz2 askbot-41cd8bd6165669eacef20215b9ae671d55111d5d.zip |
user import from zendesk kinda works
-rw-r--r-- | askbot/management/commands/import_zendesk.py | 133 | ||||
-rw-r--r-- | askbot/utils/console.py | 29 | ||||
-rw-r--r-- | askbot/utils/html.py | 27 |
3 files changed, 182 insertions, 7 deletions
diff --git a/askbot/management/commands/import_zendesk.py b/askbot/management/commands/import_zendesk.py new file mode 100644 index 00000000..c4916f72 --- /dev/null +++ b/askbot/management/commands/import_zendesk.py @@ -0,0 +1,133 @@ +"""importer from cnprog, please note, that you need an exporter in the first place +to use this command. +If you are interested to use it - please ask Evgeny <evgeny.fadeev@gmail.com> +""" +import os +import re +import sys +import tarfile +import tempfile +from datetime import datetime, date +from django.core.management.base import BaseCommand, CommandError +from django.db import transaction +from lxml import etree +from askbot import models +from askbot.utils import console +from askbot.utils.html import unescape + +#a hack, did not know how to parse timezone offset +ZERO_TIME = datetime.strptime('00:00', '%H:%M') + +def get_unique_username(name_seed): + """returns unique user name, by modifying the + name if the same name exists in the database + until the modified name is unique + """ + original_name = name_seed + attempt_no = 1 + while True: + try: + models.User.objects.get(username = name_seed) + name_seed = original_name + str(attempt_no) + attempt_no += 1 + except models.User.DoesNotExist: + return name_seed + +def get_val(elem, field_name): + field = elem.find(field_name) + try: + field_type = field.attrib['type'] + except KeyError: + field_type = '' + raw_val = field.text + if field_type == 'boolean': + if raw_val == 'true': + return True + elif raw_val == 'false': + return False + else: + raise ValueError('"true" or "false" expected, found "%s"' % raw_val) + elif field_type.endswith('integer'): + return int(raw_val) + elif field_type == 'datetime': + raw_datetime = raw_val[:19] + tzoffset_sign = raw_val[19] + raw_tzoffset = raw_val[20:] + if raw_val: + dt = datetime.strptime(raw_datetime, '%Y-%m-%dT%H:%M:%S') + tzoffset_amt = datetime.strptime(raw_tzoffset, '%H:%M') + tzoffset = tzoffset_amt - ZERO_TIME + if tzoffset_sign == '-': + return dt - tzoffset + else: + return dt + tzoffset + else: + return None + else: + if raw_val: + return raw_val + else: + return '' + +class Command(BaseCommand): + def handle(self, *args, **kwargs): + if len(args) != 1: + raise CommandError('please provide path to tarred and gzipped cnprog dump') + + self.tar = tarfile.open(args[0], 'r:gz') + + sys.stdout.write("Importing user accounts: ") + self.import_users() + #self.import_openid_associations() + #self.import_email_settings() + + #self.import_question_edits() + #self.import_answer_edits() + + #self.import_question_data() + #self.import_answer_data() + + #self.import_comments() + + #self.import_question_views() + #self.import_favorite_questions() + #self.import_marked_tags() + + #self.import_votes() + + def get_file(self, file_name): + first_item = self.tar.getnames()[0] + file_path = file_name + if not first_item.endswith('.xml'): + file_path = os.path.join(first_item, file_path) + + file_info = self.tar.getmember(file_path) + xml_file = self.tar.extractfile(file_info) + return etree.parse(xml_file) + + @transaction.commit_manually + def import_users(self): + xml = self.get_file('users.xml') + added_users = 0 + for user in xml.findall('user'): + #a whole bunch of fields are actually dropped now + #see what's available in users.xml meanings of some + #values there is not clear + + #special treatment for the user name + username = unescape(get_val(user, 'name'))#unescape html entities + username = get_unique_username(username) + + ab_user = models.User( + email = get_val(user, 'email'), + email_isvalid = get_val(user, 'is-verified'), + date_joined = get_val(user, 'created-at'), + username = username, + is_active = get_val(user, 'is-active'), + ) + ab_user.save() + added_users += 1 + console.print_action(ab_user.username) + transaction.commit() + console.print_action('%d users added' % added_users, nowipe = True) + transaction.commit() diff --git a/askbot/utils/console.py b/askbot/utils/console.py index 041fc839..470856b5 100644 --- a/askbot/utils/console.py +++ b/askbot/utils/console.py @@ -49,13 +49,28 @@ def open_new_file(prompt_phrase, extension = '', hint = None): return file_object -def print_progress(format_string, progress): +def print_action(action_text, nowipe = False): + """print the string to the standard output + then wipe it out to clear space + """ + #for some reason sys.stdout.write does not work here + #when action text is unicode + print action_text, + sys.stdout.flush() + if nowipe == False: + #return to the beginning of the word + sys.stdout.write('\b' * len(action_text)) + #white out the printed text + sys.stdout.write(' ' * len(action_text)) + #return again + sys.stdout.write('\b' * len(action_text)) + else: + sys.stdout.write('\n') + +def print_progress(elapsed, total, nowipe = False): """print dynamic output of progress of some - operation to the console and clear the output with + operation, in percent, to the console and clear the output with a backspace character to have the number increment in-place""" - output = format_string % progress - sys.stdout.write(output) - sys.stdout.flush() - sys.stdout.write('\b' * len(output)) - + output = '%6.2f%%' % 100 * float(elapsed)/float(total) + print_action(output, nowipe) diff --git a/askbot/utils/html.py b/askbot/utils/html.py index aa8e24d8..f6c168fb 100644 --- a/askbot/utils/html.py +++ b/askbot/utils/html.py @@ -1,6 +1,7 @@ """Utilities for working with HTML.""" import html5lib from html5lib import sanitizer, serializer, tokenizer, treebuilders, treewalkers +import re, htmlentitydefs class HTMLSanitizerMixin(sanitizer.HTMLSanitizerMixin): acceptable_elements = ('a', 'abbr', 'acronym', 'address', 'b', 'big', @@ -49,3 +50,29 @@ def sanitize_html(html): quote_attr_values=True) output_generator = s.serialize(stream) return u''.join(output_generator) + +def unescape(text): + """source: http://effbot.org/zone/re-sub.htm#unescape-html + Removes HTML or XML character references and entities from a text string. + @param text The HTML (or XML) source text. + @return The plain text, as a Unicode string, if necessary. + """ + def fixup(m): + text = m.group(0) + if text[:2] == "&#": + # character reference + try: + if text[:3] == "&#x": + return unichr(int(text[3:-1], 16)) + else: + return unichr(int(text[2:-1])) + except ValueError: + pass + else: + # named entity + try: + text = unichr(htmlentitydefs.name2codepoint[text[1:-1]]) + except KeyError: + pass + return text # leave as is + return re.sub("&#?\w+;", fixup, text) |