From 41cd8bd6165669eacef20215b9ae671d55111d5d Mon Sep 17 00:00:00 2001 From: Evgeny Fadeev Date: Tue, 14 Jun 2011 02:06:45 -0400 Subject: user import from zendesk kinda works --- askbot/management/commands/import_zendesk.py | 133 +++++++++++++++++++++++++++ askbot/utils/console.py | 29 ++++-- askbot/utils/html.py | 27 ++++++ 3 files changed, 182 insertions(+), 7 deletions(-) create mode 100644 askbot/management/commands/import_zendesk.py diff --git a/askbot/management/commands/import_zendesk.py b/askbot/management/commands/import_zendesk.py new file mode 100644 index 00000000..c4916f72 --- /dev/null +++ b/askbot/management/commands/import_zendesk.py @@ -0,0 +1,133 @@ +"""importer from cnprog, please note, that you need an exporter in the first place +to use this command. +If you are interested to use it - please ask Evgeny +""" +import os +import re +import sys +import tarfile +import tempfile +from datetime import datetime, date +from django.core.management.base import BaseCommand, CommandError +from django.db import transaction +from lxml import etree +from askbot import models +from askbot.utils import console +from askbot.utils.html import unescape + +#a hack, did not know how to parse timezone offset +ZERO_TIME = datetime.strptime('00:00', '%H:%M') + +def get_unique_username(name_seed): + """returns unique user name, by modifying the + name if the same name exists in the database + until the modified name is unique + """ + original_name = name_seed + attempt_no = 1 + while True: + try: + models.User.objects.get(username = name_seed) + name_seed = original_name + str(attempt_no) + attempt_no += 1 + except models.User.DoesNotExist: + return name_seed + +def get_val(elem, field_name): + field = elem.find(field_name) + try: + field_type = field.attrib['type'] + except KeyError: + field_type = '' + raw_val = field.text + if field_type == 'boolean': + if raw_val == 'true': + return True + elif raw_val == 'false': + return False + else: + raise ValueError('"true" or "false" expected, found "%s"' % raw_val) + elif field_type.endswith('integer'): + return int(raw_val) + elif field_type == 'datetime': + raw_datetime = raw_val[:19] + tzoffset_sign = raw_val[19] + raw_tzoffset = raw_val[20:] + if raw_val: + dt = datetime.strptime(raw_datetime, '%Y-%m-%dT%H:%M:%S') + tzoffset_amt = datetime.strptime(raw_tzoffset, '%H:%M') + tzoffset = tzoffset_amt - ZERO_TIME + if tzoffset_sign == '-': + return dt - tzoffset + else: + return dt + tzoffset + else: + return None + else: + if raw_val: + return raw_val + else: + return '' + +class Command(BaseCommand): + def handle(self, *args, **kwargs): + if len(args) != 1: + raise CommandError('please provide path to tarred and gzipped cnprog dump') + + self.tar = tarfile.open(args[0], 'r:gz') + + sys.stdout.write("Importing user accounts: ") + self.import_users() + #self.import_openid_associations() + #self.import_email_settings() + + #self.import_question_edits() + #self.import_answer_edits() + + #self.import_question_data() + #self.import_answer_data() + + #self.import_comments() + + #self.import_question_views() + #self.import_favorite_questions() + #self.import_marked_tags() + + #self.import_votes() + + def get_file(self, file_name): + first_item = self.tar.getnames()[0] + file_path = file_name + if not first_item.endswith('.xml'): + file_path = os.path.join(first_item, file_path) + + file_info = self.tar.getmember(file_path) + xml_file = self.tar.extractfile(file_info) + return etree.parse(xml_file) + + @transaction.commit_manually + def import_users(self): + xml = self.get_file('users.xml') + added_users = 0 + for user in xml.findall('user'): + #a whole bunch of fields are actually dropped now + #see what's available in users.xml meanings of some + #values there is not clear + + #special treatment for the user name + username = unescape(get_val(user, 'name'))#unescape html entities + username = get_unique_username(username) + + ab_user = models.User( + email = get_val(user, 'email'), + email_isvalid = get_val(user, 'is-verified'), + date_joined = get_val(user, 'created-at'), + username = username, + is_active = get_val(user, 'is-active'), + ) + ab_user.save() + added_users += 1 + console.print_action(ab_user.username) + transaction.commit() + console.print_action('%d users added' % added_users, nowipe = True) + transaction.commit() diff --git a/askbot/utils/console.py b/askbot/utils/console.py index 041fc839..470856b5 100644 --- a/askbot/utils/console.py +++ b/askbot/utils/console.py @@ -49,13 +49,28 @@ def open_new_file(prompt_phrase, extension = '', hint = None): return file_object -def print_progress(format_string, progress): +def print_action(action_text, nowipe = False): + """print the string to the standard output + then wipe it out to clear space + """ + #for some reason sys.stdout.write does not work here + #when action text is unicode + print action_text, + sys.stdout.flush() + if nowipe == False: + #return to the beginning of the word + sys.stdout.write('\b' * len(action_text)) + #white out the printed text + sys.stdout.write(' ' * len(action_text)) + #return again + sys.stdout.write('\b' * len(action_text)) + else: + sys.stdout.write('\n') + +def print_progress(elapsed, total, nowipe = False): """print dynamic output of progress of some - operation to the console and clear the output with + operation, in percent, to the console and clear the output with a backspace character to have the number increment in-place""" - output = format_string % progress - sys.stdout.write(output) - sys.stdout.flush() - sys.stdout.write('\b' * len(output)) - + output = '%6.2f%%' % 100 * float(elapsed)/float(total) + print_action(output, nowipe) diff --git a/askbot/utils/html.py b/askbot/utils/html.py index aa8e24d8..f6c168fb 100644 --- a/askbot/utils/html.py +++ b/askbot/utils/html.py @@ -1,6 +1,7 @@ """Utilities for working with HTML.""" import html5lib from html5lib import sanitizer, serializer, tokenizer, treebuilders, treewalkers +import re, htmlentitydefs class HTMLSanitizerMixin(sanitizer.HTMLSanitizerMixin): acceptable_elements = ('a', 'abbr', 'acronym', 'address', 'b', 'big', @@ -49,3 +50,29 @@ def sanitize_html(html): quote_attr_values=True) output_generator = s.serialize(stream) return u''.join(output_generator) + +def unescape(text): + """source: http://effbot.org/zone/re-sub.htm#unescape-html + Removes HTML or XML character references and entities from a text string. + @param text The HTML (or XML) source text. + @return The plain text, as a Unicode string, if necessary. + """ + def fixup(m): + text = m.group(0) + if text[:2] == "&#": + # character reference + try: + if text[:3] == "&#x": + return unichr(int(text[3:-1], 16)) + else: + return unichr(int(text[2:-1])) + except ValueError: + pass + else: + # named entity + try: + text = unichr(htmlentitydefs.name2codepoint[text[1:-1]]) + except KeyError: + pass + return text # leave as is + return re.sub("&#?\w+;", fixup, text) -- cgit v1.2.3-1-g7c22 From e36fa8e7364a2d86f6751b8679470bf849a7e428 Mon Sep 17 00:00:00 2001 From: Evgeny Fadeev Date: Wed, 15 Jun 2011 02:45:56 -0400 Subject: all zendesk data read, still need to post questions and answers --- askbot/importers/zendesk/__init__.py | 0 askbot/importers/zendesk/management/__init__.py | 0 .../zendesk/management/commands/__init__.py | 0 .../zendesk/management/commands/import_zendesk.py | 238 +++++++++++++++++++++ askbot/importers/zendesk/models.py | 45 ++++ askbot/management/commands/import_zendesk.py | 133 ------------ 6 files changed, 283 insertions(+), 133 deletions(-) create mode 100644 askbot/importers/zendesk/__init__.py create mode 100644 askbot/importers/zendesk/management/__init__.py create mode 100644 askbot/importers/zendesk/management/commands/__init__.py create mode 100644 askbot/importers/zendesk/management/commands/import_zendesk.py create mode 100644 askbot/importers/zendesk/models.py delete mode 100644 askbot/management/commands/import_zendesk.py diff --git a/askbot/importers/zendesk/__init__.py b/askbot/importers/zendesk/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/askbot/importers/zendesk/management/__init__.py b/askbot/importers/zendesk/management/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/askbot/importers/zendesk/management/commands/__init__.py b/askbot/importers/zendesk/management/commands/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/askbot/importers/zendesk/management/commands/import_zendesk.py b/askbot/importers/zendesk/management/commands/import_zendesk.py new file mode 100644 index 00000000..7549efe6 --- /dev/null +++ b/askbot/importers/zendesk/management/commands/import_zendesk.py @@ -0,0 +1,238 @@ +"""importer from cnprog, please note, that you need an exporter in the first place +to use this command. +If you are interested to use it - please ask Evgeny +""" +import os +import re +import sys +import tarfile +import tempfile +from datetime import datetime, date +from django.core.management.base import BaseCommand, CommandError +from django.db import transaction +from lxml import etree +from askbot import models as askbot_models +from askbot.utils import console +from askbot.utils.html import unescape + +from askbot.importers.zendesk import models as zendesk_models + +#a hack, did not know how to parse timezone offset +ZERO_TIME = datetime.strptime('00:00', '%H:%M') + +def get_unique_username(name_seed): + """returns unique user name, by modifying the + name if the same name exists in the database + until the modified name is unique + """ + original_name = name_seed + attempt_no = 1 + while True: + try: + askbot_models.User.objects.get(username = name_seed) + name_seed = original_name + str(attempt_no) + attempt_no += 1 + except askbot_models.User.DoesNotExist: + return name_seed + +def clean_username(name_seed): + """makes sure that the name is unique + and is no longer than 30 characters""" + username = get_unique_username(name_seed) + if len(username) > 30: + username = get_unique_username(username[:28]) + if len(username) > 30: + #will allow about a million extra possible unique names + username = get_unique_username(username[:24]) + return username + +def get_val(elem, field_name): + field = elem.find(field_name) + if field is None: + return None + try: + field_type = field.attrib['type'] + except KeyError: + field_type = '' + raw_val = field.text + if raw_val is None: + return None + + if field_type == 'boolean': + if raw_val == 'true': + return True + elif raw_val == 'false': + return False + else: + raise ValueError('"true" or "false" expected, found "%s"' % raw_val) + elif field_type.endswith('integer'): + return int(raw_val) + elif field_type == 'datetime': + if raw_val is None: + return None + raw_datetime = raw_val[:19] + tzoffset_sign = raw_val[19] + raw_tzoffset = raw_val[20:] + if raw_val: + dt = datetime.strptime(raw_datetime, '%Y-%m-%dT%H:%M:%S') + tzoffset_amt = datetime.strptime(raw_tzoffset, '%H:%M') + tzoffset = tzoffset_amt - ZERO_TIME + if tzoffset_sign == '-': + return dt - tzoffset + else: + return dt + tzoffset + else: + return None + else: + return raw_val + +class Command(BaseCommand): + def handle(self, *args, **kwargs): + if len(args) != 1: + raise CommandError('please provide path to tarred and gzipped cnprog dump') + + self.tar = tarfile.open(args[0], 'r:gz') + + sys.stdout.write('Reading users.xml: ') + self.read_users() + sys.stdout.write('Reading posts.xml: ') + self.read_posts() + sys.stdout.write('Reading forums.xml: ') + self.read_forums() + + sys.stdout.write("Importing user accounts: ") + self.import_users() + #self.import_openid_associations() + #self.import_content() + + def get_file(self, file_name): + first_item = self.tar.getnames()[0] + file_path = file_name + if not first_item.endswith('.xml'): + file_path = os.path.join(first_item, file_path) + + file_info = self.tar.getmember(file_path) + xml_file = self.tar.extractfile(file_info) + return etree.parse(xml_file) + + @transaction.commit_manually + def read_xml_file(self, + file_name = None, + entry_name = None, + model = None, + fields = None, + extra_field_mappings = None + ): + """ + * file_name - is name of xml file, + * entry_name - name of entries to read from the xml file + * model - model, which is to receive data + * fields - list of field names in xml that will be translated to model fields + by simple substitiution of '-' with '_' + * extra field mappings - list of two tuples where xml field names are + translated to model fields in a special way + """ + xml = self.get_file(file_name) + items_saved = 0 + for xml_entry in xml.findall(entry_name): + instance = model() + for field in fields: + value = get_val(xml_entry, field) + model_field_name = field.replace('-', '_') + setattr(instance, model_field_name, value) + if extra_field_mappings: + for (field, model_field_name) in extra_field_mappings: + value = get_val(xml_entry, field) + setattr(instance, model_field_name, value) + instance.save() + transaction.commit() + items_saved += 1 + console.print_action('%d items' % items_saved) + console.print_action('%d items' % items_saved, nowipe = True) + + + def read_users(self): + self.read_xml_file( + file_name = 'users.xml', + entry_name = 'user', + model = zendesk_models.User, + fields = ( + 'created-at', 'is-active', 'last-login', 'name', + 'openid-url', 'organization-id', 'phone', 'restriction-id', + 'roles', 'time-zone', 'updated-at', 'uses-12-hour-clock', + 'email', 'is-verified', 'photo-url' + ), + extra_field_mappings = (('id', 'user_id'),) + ) + + def read_posts(self): + self.read_xml_file( + file_name = 'posts.xml', + entry_name = 'post', + model = zendesk_models.Post, + fields = ( + 'body', 'created-at', 'updated-at', 'entry-id', + 'forum-id', 'user-id', 'is-informative' + ), + ) + + def read_forums(self): + self.read_xml_file( + file_name = 'forums.xml', + entry_name = 'forum', + model = zendesk_models.Forum, + fields = ( + 'description', 'display-type-id', + 'entries-count', 'is-locked', + 'name', 'organization-id', + 'position', 'updated-at', + 'translation-locale-id', + 'use-for-suggestions', + 'visibility-restriction-id', + 'is-public' + ), + extra_field_mappings = (('id', 'forum_id'),) + ) + + @transaction.commit_manually + def import_users(self): + added_users = 0 + for zd_user in zendesk_models.User.objects.all(): + #a whole bunch of fields are actually dropped now + #see what's available in users.xml meanings of some + #values there is not clear + try: + ab_user = askbot_models.User.objects.get(email = zd_user.email) + except askbot_models.User.DoesNotExist: + #special treatment for the user name + raw_username = unescape(zd_user.name) + username = clean_username(raw_username) + if len(username) > 30:#nearly impossible skip such user + print "Warning: could not import user %s" % raw_username + continue + + if zd_user.email is None: + email = '' + else: + email = zd_user.email + + ab_user = askbot_models.User( + email = email, + email_isvalid = zd_user.is_verified, + date_joined = zd_user.created_at, + last_seen = zd_user.created_at,#add initial date for now + username = username, + is_active = zd_user.is_active + ) + ab_user.save() + added_users += 1 + console.print_action(ab_user.username) + zd_user.askbot_user_id = ab_user.id + zd_user.save() + transaction.commit() + console.print_action('%d users added' % added_users, nowipe = True) + + def import_content(self): + for zd_post in zendesk_models.Post.objects.all(): + if zd_post.is_processed: + continue diff --git a/askbot/importers/zendesk/models.py b/askbot/importers/zendesk/models.py new file mode 100644 index 00000000..9ef42eac --- /dev/null +++ b/askbot/importers/zendesk/models.py @@ -0,0 +1,45 @@ +from django.db import models + +class Post(models.Model): + body = models.TextField() + created_at = models.DateTimeField() + updated_at = models.DateTimeField() + entry_id = models.IntegerField() + forum_id = models.IntegerField() + user_id = models.IntegerField() + is_informative = models.BooleanField() + is_processed = models.BooleanField(default = False) + +class User(models.Model): + user_id = models.IntegerField() + askbot_user_id = models.IntegerField(null = True) + created_at = models.DateTimeField() + is_active = models.BooleanField() + last_login = models.DateTimeField(null = True) + name = models.CharField(max_length = 255) + openid_url = models.URLField(null = True) + organization_id = models.IntegerField(null = True) + phone = models.CharField(max_length = 32, null = True) + restriction_id = models.IntegerField() + roles = models.IntegerField() + time_zone = models.CharField(max_length = 255) + updated_at = models.DateTimeField() + uses_12_hour_clock = models.BooleanField() + email = models.EmailField(null = True) + is_verified = models.BooleanField() + photo_url = models.URLField() + +class Forum(models.Model): + description = models.CharField(max_length = 255, null = True) + display_type_id = models.IntegerField() + entries_count = models.IntegerField() + forum_id = models.IntegerField() + is_locked = models.BooleanField() + name = models.CharField(max_length = 255) + organization_id = models.IntegerField(null = True) + position = models.IntegerField(null = True) + updated_at = models.DateTimeField() + translation_locale_id = models.IntegerField(null = True) + use_for_suggestions = models.BooleanField() + visibility_restriction_id = models.IntegerField() + is_public = models.BooleanField() diff --git a/askbot/management/commands/import_zendesk.py b/askbot/management/commands/import_zendesk.py deleted file mode 100644 index c4916f72..00000000 --- a/askbot/management/commands/import_zendesk.py +++ /dev/null @@ -1,133 +0,0 @@ -"""importer from cnprog, please note, that you need an exporter in the first place -to use this command. -If you are interested to use it - please ask Evgeny -""" -import os -import re -import sys -import tarfile -import tempfile -from datetime import datetime, date -from django.core.management.base import BaseCommand, CommandError -from django.db import transaction -from lxml import etree -from askbot import models -from askbot.utils import console -from askbot.utils.html import unescape - -#a hack, did not know how to parse timezone offset -ZERO_TIME = datetime.strptime('00:00', '%H:%M') - -def get_unique_username(name_seed): - """returns unique user name, by modifying the - name if the same name exists in the database - until the modified name is unique - """ - original_name = name_seed - attempt_no = 1 - while True: - try: - models.User.objects.get(username = name_seed) - name_seed = original_name + str(attempt_no) - attempt_no += 1 - except models.User.DoesNotExist: - return name_seed - -def get_val(elem, field_name): - field = elem.find(field_name) - try: - field_type = field.attrib['type'] - except KeyError: - field_type = '' - raw_val = field.text - if field_type == 'boolean': - if raw_val == 'true': - return True - elif raw_val == 'false': - return False - else: - raise ValueError('"true" or "false" expected, found "%s"' % raw_val) - elif field_type.endswith('integer'): - return int(raw_val) - elif field_type == 'datetime': - raw_datetime = raw_val[:19] - tzoffset_sign = raw_val[19] - raw_tzoffset = raw_val[20:] - if raw_val: - dt = datetime.strptime(raw_datetime, '%Y-%m-%dT%H:%M:%S') - tzoffset_amt = datetime.strptime(raw_tzoffset, '%H:%M') - tzoffset = tzoffset_amt - ZERO_TIME - if tzoffset_sign == '-': - return dt - tzoffset - else: - return dt + tzoffset - else: - return None - else: - if raw_val: - return raw_val - else: - return '' - -class Command(BaseCommand): - def handle(self, *args, **kwargs): - if len(args) != 1: - raise CommandError('please provide path to tarred and gzipped cnprog dump') - - self.tar = tarfile.open(args[0], 'r:gz') - - sys.stdout.write("Importing user accounts: ") - self.import_users() - #self.import_openid_associations() - #self.import_email_settings() - - #self.import_question_edits() - #self.import_answer_edits() - - #self.import_question_data() - #self.import_answer_data() - - #self.import_comments() - - #self.import_question_views() - #self.import_favorite_questions() - #self.import_marked_tags() - - #self.import_votes() - - def get_file(self, file_name): - first_item = self.tar.getnames()[0] - file_path = file_name - if not first_item.endswith('.xml'): - file_path = os.path.join(first_item, file_path) - - file_info = self.tar.getmember(file_path) - xml_file = self.tar.extractfile(file_info) - return etree.parse(xml_file) - - @transaction.commit_manually - def import_users(self): - xml = self.get_file('users.xml') - added_users = 0 - for user in xml.findall('user'): - #a whole bunch of fields are actually dropped now - #see what's available in users.xml meanings of some - #values there is not clear - - #special treatment for the user name - username = unescape(get_val(user, 'name'))#unescape html entities - username = get_unique_username(username) - - ab_user = models.User( - email = get_val(user, 'email'), - email_isvalid = get_val(user, 'is-verified'), - date_joined = get_val(user, 'created-at'), - username = username, - is_active = get_val(user, 'is-active'), - ) - ab_user.save() - added_users += 1 - console.print_action(ab_user.username) - transaction.commit() - console.print_action('%d users added' % added_users, nowipe = True) - transaction.commit() -- cgit v1.2.3-1-g7c22 From ed06369aa64ecb8a48348a37781711e4263ba71f Mon Sep 17 00:00:00 2001 From: Evgeny Fadeev Date: Wed, 15 Jun 2011 21:39:38 -0400 Subject: zendesk import works, but need a tool to swap answer with questions --- .../zendesk/management/commands/import_zendesk.py | 169 ++++++++++++++++----- askbot/importers/zendesk/models.py | 33 ++++ 2 files changed, 163 insertions(+), 39 deletions(-) diff --git a/askbot/importers/zendesk/management/commands/import_zendesk.py b/askbot/importers/zendesk/management/commands/import_zendesk.py index 7549efe6..4229cbde 100644 --- a/askbot/importers/zendesk/management/commands/import_zendesk.py +++ b/askbot/importers/zendesk/management/commands/import_zendesk.py @@ -1,6 +1,10 @@ -"""importer from cnprog, please note, that you need an exporter in the first place -to use this command. -If you are interested to use it - please ask Evgeny +"""importer from zendesk data dump +the dump must be a tar/gzipped file, containing one directory +with all the .xml files. + +Run this command as:: + + python manage.py import_zendesk path/to/dump.tgz """ import os import re @@ -9,6 +13,7 @@ import tarfile import tempfile from datetime import datetime, date from django.core.management.base import BaseCommand, CommandError +from django.conf import settings from django.db import transaction from lxml import etree from askbot import models as askbot_models @@ -46,6 +51,57 @@ def clean_username(name_seed): username = get_unique_username(username[:24]) return username +def create_askbot_user(zd_user): + """create askbot user from zendesk user record + return askbot user or None, if there is error + """ + #special treatment for the user name + raw_username = unescape(zd_user.name) + username = clean_username(raw_username) + if len(username) > 30:#nearly impossible skip such user + print "Warning: could not import user %s" % raw_username + return None + + if zd_user.email is None: + email = '' + else: + email = zd_user.email + + ab_user = askbot_models.User( + email = email, + email_isvalid = zd_user.is_verified, + date_joined = zd_user.created_at, + last_seen = zd_user.created_at,#add initial date for now + username = username, + is_active = zd_user.is_active + ) + ab_user.save() + return ab_user + +def post_question(zendesk_post): + """posts question to askbot, using zendesk post item""" + try: + return zendesk_post.get_author().post_question( + title = zendesk_post.get_fake_title(), + body_text = zendesk_post.get_body_text(), + tags = zendesk_post.get_tag_name(), + timestamp = zendesk_post.created_at + ) + except Exception, e: + msg = unicode(e) + print "Warning: post %d dropped: %s" % (zendesk_post.post_id, msg) + +def post_answer(zendesk_post, question = None): + try: + zendesk_post.get_author().post_answer( + question = question, + body_text = zendesk_post.get_body_text(), + timestamp = zendesk_post.created_at + ) + except Exception, e: + msg = unicode(e) + print "Warning: post %d dropped: %s" % (zendesk_post.post_id, msg) + def get_val(elem, field_name): field = elem.find(field_name) if field is None: @@ -93,17 +149,17 @@ class Command(BaseCommand): self.tar = tarfile.open(args[0], 'r:gz') - sys.stdout.write('Reading users.xml: ') - self.read_users() - sys.stdout.write('Reading posts.xml: ') - self.read_posts() - sys.stdout.write('Reading forums.xml: ') - self.read_forums() - + #sys.stdout.write('Reading users.xml: ') + #self.read_users() + #sys.stdout.write('Reading posts.xml: ') + #self.read_posts() + #sys.stdout.write('Reading forums.xml: ') + #self.read_forums() + sys.stdout.write("Importing user accounts: ") self.import_users() - #self.import_openid_associations() - #self.import_content() + sys.stdout.write("Loading threads: ") + self.import_content() def get_file(self, file_name): first_item = self.tar.getnames()[0] @@ -174,6 +230,9 @@ class Command(BaseCommand): 'body', 'created-at', 'updated-at', 'entry-id', 'forum-id', 'user-id', 'is-informative' ), + extra_field_mappings = ( + ('id', 'post_id'), + ) ) def read_forums(self): @@ -201,38 +260,70 @@ class Command(BaseCommand): #a whole bunch of fields are actually dropped now #see what's available in users.xml meanings of some #values there is not clear - try: - ab_user = askbot_models.User.objects.get(email = zd_user.email) - except askbot_models.User.DoesNotExist: - #special treatment for the user name - raw_username = unescape(zd_user.name) - username = clean_username(raw_username) - if len(username) > 30:#nearly impossible skip such user - print "Warning: could not import user %s" % raw_username - continue - if zd_user.email is None: - email = '' - else: - email = zd_user.email - - ab_user = askbot_models.User( - email = email, - email_isvalid = zd_user.is_verified, - date_joined = zd_user.created_at, - last_seen = zd_user.created_at,#add initial date for now - username = username, - is_active = zd_user.is_active - ) - ab_user.save() - added_users += 1 + #if email is blank, just create a new user + if zd_user.email == '': + ab_user = create_askbot_user(zd_user) + if ab_user in None: + print 'Warning: could not create user %s ' % zd_user.name + continue console.print_action(ab_user.username) + else: + #else see if user with the same email already exists + #and only create new askbot user if email is not yet in the + #database + try: + ab_user = askbot_models.User.objects.get(email = zd_user.email) + except askbot_models.User.DoesNotExist: + ab_user = create_askbot_user(zd_user) + if ab_user is None: + continue + console.print_action(ab_user.username, nowipe = True) + added_users += 1 zd_user.askbot_user_id = ab_user.id zd_user.save() + + if zd_user.openid_url != None and \ + 'askbot.deps.django_authopenid' in settings.INSTALLED_APPS: + from askbot.deps.django_authopenid.models import UserAssociation + from askbot.deps.django_authopenid.util import get_provider_name + try: + assoc = UserAssociation( + user = ab_user, + openid_url = zd_user.openid_url, + provider_name = get_provider_name(zd_user.openid_url) + ) + assoc.save() + except: + #drop user association + pass + transaction.commit() console.print_action('%d users added' % added_users, nowipe = True) + @transaction.commit_manually def import_content(self): - for zd_post in zendesk_models.Post.objects.all(): - if zd_post.is_processed: - continue + thread_ids = zendesk_models.Post.objects.values_list( + 'entry_id', + flat = True + ).distinct() + threads_posted = 0 + for thread_id in thread_ids: + thread_entries = zendesk_models.Post.objects.filter( + entry_id = thread_id + ).order_by('created_at') + question_post = thread_entries[0] + question = post_question(question_post) + question_post.is_processed = True + question_post.save() + transaction.commit() + entry_count = thread_entries.count() + threads_posted += 1 + console.print_action(str(threads_posted)) + if entry_count > 1: + for answer_post in thread_entries[1:]: + post_answer(answer_post, question = question) + answer_post.is_processed = True + answer_post.save() + transaction.commit() + console.print_action(str(threads_posted), nowipe = True) diff --git a/askbot/importers/zendesk/models.py b/askbot/importers/zendesk/models.py index 9ef42eac..6a321915 100644 --- a/askbot/importers/zendesk/models.py +++ b/askbot/importers/zendesk/models.py @@ -1,15 +1,48 @@ +import re from django.db import models +from django.contrib.auth.models import User as DjangoUser +from django.utils.html import strip_tags +from askbot.utils.html import unescape + +TAGS = {}#internal cache for mappings forum id -> forum name class Post(models.Model): body = models.TextField() created_at = models.DateTimeField() updated_at = models.DateTimeField() entry_id = models.IntegerField() + post_id = models.IntegerField() forum_id = models.IntegerField() user_id = models.IntegerField() is_informative = models.BooleanField() is_processed = models.BooleanField(default = False) + def get_author(self): + """returns author of the post, from the Django user table""" + zendesk_user = User.objects.get(user_id = self.user_id) + return DjangoUser.objects.get(id = zendesk_user.askbot_user_id) + + def get_body_text(self): + """unescapes html entities in the body text, + saves in the internal cache and returns the value""" + if not hasattr(self, '_body_text'): + self._body_text = unescape(self.body) + return self._body_text + + def get_fake_title(self): + """extract first 10 words from the body text and strip tags""" + words = re.split(r'\s+', self.get_body_text()) + if len(words) > 10: + words = words[:10] + return strip_tags(' '.join(words)) + + def get_tag_name(self): + if self.forum_id not in TAGS: + forum = Forum.objects.get(forum_id = self.forum_id) + tag_name = re.sub(r'\s+', '-', forum.name.lower()) + TAGS[self.forum_id] = tag_name + return TAGS[self.forum_id] + class User(models.Model): user_id = models.IntegerField() askbot_user_id = models.IntegerField(null = True) -- cgit v1.2.3-1-g7c22 From a199515ba60f8a74faabff5753714d211a7a13b0 Mon Sep 17 00:00:00 2001 From: Evgeny Fadeev Date: Thu, 16 Jun 2011 19:04:57 -0400 Subject: added option to allow swapping answer with question --- askbot/conf/forum_data_rules.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/askbot/conf/forum_data_rules.py b/askbot/conf/forum_data_rules.py index 3392278b..6ce169fa 100644 --- a/askbot/conf/forum_data_rules.py +++ b/askbot/conf/forum_data_rules.py @@ -63,6 +63,20 @@ settings.register( ) ) +settings.register( + livesettings.BooleanValue( + FORUM_DATA_RULES, + 'ALLOW_SWAPPING_ANSWER_WITH_QUESTION', + default = False, + description = _('Allow swapping answer with question'), + help_text = _( + 'This setting will help import data from other forums ' + 'such as zendesk, when automatic ' + 'data import fails to detect the original question correctly.' + ) + ) +) + settings.register( livesettings.IntegerValue( FORUM_DATA_RULES, -- cgit v1.2.3-1-g7c22 From d3c84e5fe9751434f4d3ad4afb0eace8d9219a44 Mon Sep 17 00:00:00 2001 From: Evgeny Fadeev Date: Fri, 17 Jun 2011 04:01:58 -0400 Subject: added swap answer with question feature --- askbot/conf/forum_data_rules.py | 2 +- askbot/models/answer.py | 46 +++++++++++++++++++++++++ askbot/models/question.py | 36 +++++++++++++++++--- askbot/skins/default/media/js/post.js | 51 ++++++++++++++++++++++++++++ askbot/skins/default/templates/question.html | 6 ++++ askbot/urls.py | 5 +++ askbot/views/commands.py | 20 ++++++++++- 7 files changed, 160 insertions(+), 6 deletions(-) diff --git a/askbot/conf/forum_data_rules.py b/askbot/conf/forum_data_rules.py index 6ce169fa..dc801d41 100644 --- a/askbot/conf/forum_data_rules.py +++ b/askbot/conf/forum_data_rules.py @@ -66,7 +66,7 @@ settings.register( settings.register( livesettings.BooleanValue( FORUM_DATA_RULES, - 'ALLOW_SWAPPING_ANSWER_WITH_QUESTION', + 'ALLOW_SWAPPING_QUESTION_WITH_ANSWER', default = False, description = _('Allow swapping answer with question'), help_text = _( diff --git a/askbot/models/answer.py b/askbot/models/answer.py index c268551d..09b5a444 100644 --- a/askbot/models/answer.py +++ b/askbot/models/answer.py @@ -161,6 +161,52 @@ class Answer(content.Content, DeletableContent): self.question.last_activity_by = edited_by self.question.save() + def repost_as_question(self, new_title = None): + """posts answer as question, together with all the comments + while preserving time stamps and authors + does not delete the answer itself though + """ + revisions = self.revisions.all().order_by('revised_at') + rev0 = revisions[0] + new_question = rev0.author.post_question( + title = new_title, + body_text = rev0.text, + tags = self.question.tagnames, + wiki = self.question.wiki, + is_anonymous = self.question.is_anonymous, + timestamp = rev0.revised_at + ) + if len(revisions) > 1: + for rev in revisions[1:]: + rev.author.edit_question( + question = new_question, + body_text = rev.text, + revision_comment = rev.summary, + timestamp = rev.revised_at + ) + for comment in self.comments.all(): + comment.content_object = new_question + comment.save() + return new_question + + def swap_with_question(self, new_title = None): + """swaps answer with the question it belongs to and + sets the title of question to ``new_title`` + """ + #1) make new question by using new title, tags of old question + # and the answer body, as well as the authors of all revisions + # and repost all the comments + new_question = self.repost_as_question(new_title = new_title) + + #2) post question (all revisions and comments) as answer + new_answer = self.question.repost_as_answer(question = new_question) + + #3) assign all remaining answers to the new question + self.question.answers.update(question = new_question) + self.question.delete() + self.delete() + return new_question + def add_revision(self, author=None, revised_at=None, text=None, comment=None): #todo: this may be identical to Question.add_revision if None in (author, revised_at, text): diff --git a/askbot/models/question.py b/askbot/models/question.py index 41579c11..7a2be48c 100644 --- a/askbot/models/question.py +++ b/askbot/models/question.py @@ -457,15 +457,13 @@ class Question(content.Content, DeletableContent): self.save() def update_favorite_count(self): - """ - update favourite_count for given question + """update favourite_count for given question """ self.favourite_count = FavoriteQuestion.objects.filter( question=self ).count() self.save() - def get_similar_questions(self): """ Get 10 similar questions for given one. @@ -602,6 +600,31 @@ class Question(content.Content, DeletableContent): return False + def repost_as_answer(self, question = None): + """posts question as answer to another question, + but does not delete the question, + but moves all the comments to the new answer""" + revisions = self.revisions.all().order_by('revised_at') + rev0 = revisions[0] + new_answer = rev0.author.post_answer( + question = question, + body_text = rev0.text, + wiki = self.wiki, + timestamp = rev0.revised_at + ) + if len(revisions) > 1: + for rev in revisions: + rev.author.edit_answer( + answer = new_answer, + body_text = rev.text, + revision_comment = rev.summary, + timestamp = rev.revised_at + ) + for comment in self.comments.all(): + comment.content_object = new_answer + comment.save() + return new_answer + def delete(self): super(Question, self).delete() try: @@ -782,7 +805,12 @@ class Question(content.Content, DeletableContent): if no_slug == True: return url else: - return url + django_urlquote(slugify(self.title)) + return url + django_urlquote(self.slug) + + def _get_slug(self): + return slugify(self.title) + + slug = property(_get_slug) def has_favorite_by_user(self, user): if not user.is_authenticated(): diff --git a/askbot/skins/default/media/js/post.js b/askbot/skins/default/media/js/post.js index 0d84508e..a228e9ce 100644 --- a/askbot/skins/default/media/js/post.js +++ b/askbot/skins/default/media/js/post.js @@ -1394,11 +1394,62 @@ var socialSharing = function(){ } }(); +/** + * @constructor + * @extends {SimpleControl} + */ +var QASwapper = function(){ + SimpleControl.call(this); + this._ans_id = null; +}; +inherits(QASwapper, SimpleControl); + +QASwapper.prototype.decorate = function(element){ + this._element = element; + this._ans_id = parseInt(element.attr('id').split('-').pop()); + var me = this; + this.setHandler(function(){ + me.startSwapping(); + }); +}; + +QASwapper.prototype.startSwapping = function(){ + while (true){ + var title = prompt(gettext('Please enter question title (>10 characters)')); + if (title.length >= 10){ + var data = {new_title: title, answer_id: this._ans_id}; + $.ajax({ + type: "POST", + cache: false, + dataType: "json", + url: askbot['urls']['swap_question_with_answer'], + data: data, + success: function(data){ + var url_template = askbot['urls']['question_url_template']; + new_question_url = url_template.replace( + '{{QuestionID}}', + data['id'] + ).replace( + '{{questionSlug}}', + data['slug'] + ); + window.location.href = new_question_url; + } + }); + break; + } + } +}; + $(document).ready(function() { $('[id^="comments-for-"]').each(function(index, element){ var comments = new PostCommentsWidget(); comments.decorate(element); }); + $('[id^="swap-question-with-answer-"]').each(function(idx, element){ + var swapper = new QASwapper(); + swapper.decorate($(element)); + }); questionRetagger.init(); socialSharing.init(); }); diff --git a/askbot/skins/default/templates/question.html b/askbot/skins/default/templates/question.html index 6b51c756..b9f3edc5 100644 --- a/askbot/skins/default/templates/question.html +++ b/askbot/skins/default/templates/question.html @@ -264,6 +264,11 @@ {% endspaceless %} {% endif %} + {% if settings.ALLOW_SWAPPING_QUESTION_WITH_ANSWER %}{{ pipe() }} + + {% trans %}swap with question{% endtrans %} + + {% endif %}
{{ @@ -441,6 +446,7 @@ askbot['urls']['question_url_template'] = scriptUrl + '{% trans %}question/{% endtrans %}{{ "{{QuestionID}}/{{questionSlug}}" }}';{# yes it needs to be that whacky #} askbot['urls']['user_signin'] = '{{ settings.LOGIN_URL }}'; askbot['urls']['vote_url_template'] = scriptUrl + '{% trans %}questions/{% endtrans %}{{ "{{QuestionID}}/" }}{% trans %}vote/{% endtrans %}'; + askbot['urls']['swap_question_with_answer'] = '{% url swap_question_with_answer %}'; askbot['messages']['addComment'] = '{% trans %}add comment{% endtrans %}'; {% if settings.SAVE_COMMENT_ON_ENTER %} askbot['settings']['saveCommentOnEnter'] = true; diff --git a/askbot/urls.py b/askbot/urls.py index 99ff6300..714ce4ca 100644 --- a/askbot/urls.py +++ b/askbot/urls.py @@ -170,6 +170,11 @@ urlpatterns = patterns('', views.commands.get_tag_list, name = 'get_tag_list' ), + url( + r'^swap-question-with-answer/', + views.commands.swap_question_with_answer, + name = 'swap_question_with_answer' + ), url( r'^%s$' % _('subscribe-for-tags/'), views.commands.subscribe_for_tags, diff --git a/askbot/views/commands.py b/askbot/views/commands.py index 9b9e7af5..5b7e8f18 100644 --- a/askbot/views/commands.py +++ b/askbot/views/commands.py @@ -9,7 +9,7 @@ from django.conf import settings as django_settings from django.core import exceptions from django.core.urlresolvers import reverse from django.contrib.auth.decorators import login_required -from django.http import HttpResponse, HttpResponseRedirect +from django.http import HttpResponse, HttpResponseRedirect, Http404 from django.forms import ValidationError from django.shortcuts import get_object_or_404 from django.views.decorators import csrf @@ -518,6 +518,24 @@ def reopen(request, id):#re-open question request.user.message_set.create(message = unicode(e)) return HttpResponseRedirect(question.get_absolute_url()) + +@decorators.ajax_only +def swap_question_with_answer(request): + """receives two json parameters - answer id + and new question title + the view is made to be used only by the site administrator + or moderators + """ + if request.user.is_authenticated(): + if request.user.is_administrator() or request.user.is_moderator(): + answer = models.Answer.objects.get(id = request.POST['answer_id']) + new_question = answer.swap_with_question(new_title = request.POST['new_title']) + return { + 'id': new_question.id, + 'slug': new_question.slug + } + raise Http404 + #askbot-user communication system def read_message(request):#marks message a read if request.method == "POST": -- cgit v1.2.3-1-g7c22 From 90b0d11c5c2f2cde507bbfc204e49befdecf4540 Mon Sep 17 00:00:00 2001 From: Evgeny Fadeev Date: Fri, 17 Jun 2011 18:08:42 -0400 Subject: added documentation for zendesk import --- askbot/doc/source/import-data.rst | 32 ++++++++++++++++++++++++++------ askbot/doc/source/index.rst | 2 +- 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/askbot/doc/source/import-data.rst b/askbot/doc/source/import-data.rst index c2cfc1b3..e0158d81 100644 --- a/askbot/doc/source/import-data.rst +++ b/askbot/doc/source/import-data.rst @@ -4,17 +4,37 @@ Import other forums into Askbot =============================== -At this time only StackExchange import is supported. +Askbot supports importing of data from StackExchange and Zendesk. -There are two ways to import your StackExchange dump into Askbot: +.. warning:: + If your database contains any data prior to importing, please back it up before proceeding. + +StackExchange +============= + +Add `askbot.importers.stackexchange` to the list of `INSTALLED_APPS` list in your `settings.py`, then run:: + python manage.py syncdb + +Then there will be two ways to import your StackExchange dump: * via the web at url `/import-data/`, relative to your forum installation * using a management command:: python manage.py load_stackexchange /path/to/your-se-data.zip -Before importing the data, an entry `askbot.importers.stackexchange` must be added to -the `INSTALLED_APPS` list in your `settings.py` file and a command `python manage.py syncdb` run -to initialize the stackexchange tables. - In the case your database is not empty at the beginning of the process - **please do back it up**. + +Zendesk +======= +Add `askbot.importers.zendesk` to the list of `INSTALLED_APPS` in the `settings.py`, +run `python manage.py syncdb`. + +Prepare your zendesk files: put all your .xml files into one directory and tar-zip it:: + + mkdir somedir + mv *.xml somedir #select the zendesk xml files and move them to the directory + tar cvfz zendesk.tgz somedir #name of the tgz file is not important + +Then run the import script:: + + python manage.py import_zendesk zendesk.tgz #file name is the parameter diff --git a/askbot/doc/source/index.rst b/askbot/doc/source/index.rst index c8580a46..2653b2df 100644 --- a/askbot/doc/source/index.rst +++ b/askbot/doc/source/index.rst @@ -19,7 +19,7 @@ at the forum_ or by email at admin@askbot.org Create and configure the site files Initialize the database tables Deploy on a webserver - Import data (StackExchange) + Import data (StackExchange & ZenDesk) Appendix A: Maintenance procedures Appendix B: Sending email to askbot Appendix C: Optional modules -- cgit v1.2.3-1-g7c22 From f721f09f9a5c69177d761019aab1700e7dd7497a Mon Sep 17 00:00:00 2001 From: Evgeny Fadeev Date: Fri, 17 Jun 2011 19:25:21 -0400 Subject: added dummy transaction module to ease debugging of code under transaction control --- askbot/utils/dummy_transaction.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 askbot/utils/dummy_transaction.py diff --git a/askbot/utils/dummy_transaction.py b/askbot/utils/dummy_transaction.py new file mode 100644 index 00000000..87ba38e7 --- /dev/null +++ b/askbot/utils/dummy_transaction.py @@ -0,0 +1,24 @@ +"""Dummy transaction module, use instead of :mod:`django.db.transaction` +when you want to debug code that would normally run under transaction management. +Usage:: + + from askbot.utils import dummy_transaction as transaction + + @transaction.commit_manually + def do_something(): + #your code making changes to the database + transaction.commit() + return +""" +import functools + +def commit_manually(func): + """fake ``commit_manually`` decorator""" + @functools.wraps(func) + def wrapper(*args, **kwargs): + return func(*args, **kwargs) + return wrapper + +def commit(): + """fake transaction commit""" + pass -- cgit v1.2.3-1-g7c22 From 19f18a8f599eae03bf73e404ab2ed7687ac9ff24 Mon Sep 17 00:00:00 2001 From: Evgeny Fadeev Date: Fri, 17 Jun 2011 19:32:05 -0400 Subject: striptagged and escaped the meta keywords injection from the question template --- askbot/skins/default/templates/question.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/askbot/skins/default/templates/question.html b/askbot/skins/default/templates/question.html index b9f3edc5..4114865b 100644 --- a/askbot/skins/default/templates/question.html +++ b/askbot/skins/default/templates/question.html @@ -3,7 +3,7 @@ {% block title %}{% spaceless %}{{ question.get_question_title() }}{% endspaceless %}{% endblock %} {% block meta_description %} - + {% endblock %} {% block keywords %}{{question.tagname_meta_generator()}}{% endblock %} {% block forestyle %} -- cgit v1.2.3-1-g7c22 From 845f5960a312ae21319b7eda2e584304bb48045f Mon Sep 17 00:00:00 2001 From: Evgeny Fadeev Date: Fri, 17 Jun 2011 22:28:30 -0400 Subject: show swap question with answer link only to admins and moderators --- askbot/doc/source/import-data.rst | 10 +++++++++- askbot/models/__init__.py | 4 ++++ askbot/skins/default/templates/question.html | 2 +- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/askbot/doc/source/import-data.rst b/askbot/doc/source/import-data.rst index e0158d81..44b902b5 100644 --- a/askbot/doc/source/import-data.rst +++ b/askbot/doc/source/import-data.rst @@ -13,6 +13,7 @@ StackExchange ============= Add `askbot.importers.stackexchange` to the list of `INSTALLED_APPS` list in your `settings.py`, then run:: + python manage.py syncdb Then there will be two ways to import your StackExchange dump: @@ -22,7 +23,6 @@ Then there will be two ways to import your StackExchange dump: python manage.py load_stackexchange /path/to/your-se-data.zip -In the case your database is not empty at the beginning of the process - **please do back it up**. Zendesk ======= @@ -38,3 +38,11 @@ Prepare your zendesk files: put all your .xml files into one directory and tar-z Then run the import script:: python manage.py import_zendesk zendesk.tgz #file name is the parameter + +.. note:: + It is possible that import script will make some mistakes in determining + which post in the group is the question, due to some specifics of zendesk + data format. If so, please enable feature + "Forum data rules"->"allow switching question with answer" + in :ref:`live settings ` and use it in an admin or a moderator + account. diff --git a/askbot/models/__init__.py b/askbot/models/__init__.py index 74211c23..8fa01fd2 100644 --- a/askbot/models/__init__.py +++ b/askbot/models/__init__.py @@ -1373,6 +1373,9 @@ def user_add_missing_askbot_subscriptions(self): def user_is_moderator(self): return (self.status == 'm' and self.is_administrator() == False) +def user_is_administrator_or_moderator(self): + return (self.is_administrator() or self.is_moderator()) + def user_is_suspended(self): return (self.status == 's') @@ -1950,6 +1953,7 @@ User.add_to_class('mark_tags', user_mark_tags) User.add_to_class('update_response_counts', user_update_response_counts) User.add_to_class('can_have_strong_url', user_can_have_strong_url) User.add_to_class('is_administrator', user_is_administrator) +User.add_to_class('is_administrator_or_moderator', user_is_administrator_or_moderator) User.add_to_class('set_admin_status', user_set_admin_status) User.add_to_class('remove_admin_status', user_remove_admin_status) User.add_to_class('is_moderator', user_is_moderator) diff --git a/askbot/skins/default/templates/question.html b/askbot/skins/default/templates/question.html index 4114865b..d0eadaef 100644 --- a/askbot/skins/default/templates/question.html +++ b/askbot/skins/default/templates/question.html @@ -264,7 +264,7 @@ {% endspaceless %} {% endif %} - {% if settings.ALLOW_SWAPPING_QUESTION_WITH_ANSWER %}{{ pipe() }} + {% if settings.ALLOW_SWAPPING_QUESTION_WITH_ANSWER and request.user.is_authenticated() and request.user.is_administrator_or_moderator() %}{{ pipe() }} {% trans %}swap with question{% endtrans %} -- cgit v1.2.3-1-g7c22