diff options
author | Evgeny Fadeev <evgeny.fadeev@gmail.com> | 2011-06-15 21:39:38 -0400 |
---|---|---|
committer | Evgeny Fadeev <evgeny.fadeev@gmail.com> | 2011-06-15 21:39:38 -0400 |
commit | ed06369aa64ecb8a48348a37781711e4263ba71f (patch) | |
tree | a493ac8060b0fe91c5ddc4b1e5b93a10c3486474 | |
parent | e36fa8e7364a2d86f6751b8679470bf849a7e428 (diff) | |
download | askbot-ed06369aa64ecb8a48348a37781711e4263ba71f.tar.gz askbot-ed06369aa64ecb8a48348a37781711e4263ba71f.tar.bz2 askbot-ed06369aa64ecb8a48348a37781711e4263ba71f.zip |
zendesk import works, but need a tool to swap answer with questions
-rw-r--r-- | askbot/importers/zendesk/management/commands/import_zendesk.py | 169 | ||||
-rw-r--r-- | askbot/importers/zendesk/models.py | 33 |
2 files changed, 163 insertions, 39 deletions
diff --git a/askbot/importers/zendesk/management/commands/import_zendesk.py b/askbot/importers/zendesk/management/commands/import_zendesk.py index 7549efe6..4229cbde 100644 --- a/askbot/importers/zendesk/management/commands/import_zendesk.py +++ b/askbot/importers/zendesk/management/commands/import_zendesk.py @@ -1,6 +1,10 @@ -"""importer from cnprog, please note, that you need an exporter in the first place -to use this command. -If you are interested to use it - please ask Evgeny <evgeny.fadeev@gmail.com> +"""importer from zendesk data dump +the dump must be a tar/gzipped file, containing one directory +with all the .xml files. + +Run this command as:: + + python manage.py import_zendesk path/to/dump.tgz """ import os import re @@ -9,6 +13,7 @@ import tarfile import tempfile from datetime import datetime, date from django.core.management.base import BaseCommand, CommandError +from django.conf import settings from django.db import transaction from lxml import etree from askbot import models as askbot_models @@ -46,6 +51,57 @@ def clean_username(name_seed): username = get_unique_username(username[:24]) return username +def create_askbot_user(zd_user): + """create askbot user from zendesk user record + return askbot user or None, if there is error + """ + #special treatment for the user name + raw_username = unescape(zd_user.name) + username = clean_username(raw_username) + if len(username) > 30:#nearly impossible skip such user + print "Warning: could not import user %s" % raw_username + return None + + if zd_user.email is None: + email = '' + else: + email = zd_user.email + + ab_user = askbot_models.User( + email = email, + email_isvalid = zd_user.is_verified, + date_joined = zd_user.created_at, + last_seen = zd_user.created_at,#add initial date for now + username = username, + is_active = zd_user.is_active + ) + ab_user.save() + return ab_user + +def post_question(zendesk_post): + """posts question to askbot, using zendesk post item""" + try: + return zendesk_post.get_author().post_question( + title = zendesk_post.get_fake_title(), + body_text = zendesk_post.get_body_text(), + tags = zendesk_post.get_tag_name(), + timestamp = zendesk_post.created_at + ) + except Exception, e: + msg = unicode(e) + print "Warning: post %d dropped: %s" % (zendesk_post.post_id, msg) + +def post_answer(zendesk_post, question = None): + try: + zendesk_post.get_author().post_answer( + question = question, + body_text = zendesk_post.get_body_text(), + timestamp = zendesk_post.created_at + ) + except Exception, e: + msg = unicode(e) + print "Warning: post %d dropped: %s" % (zendesk_post.post_id, msg) + def get_val(elem, field_name): field = elem.find(field_name) if field is None: @@ -93,17 +149,17 @@ class Command(BaseCommand): self.tar = tarfile.open(args[0], 'r:gz') - sys.stdout.write('Reading users.xml: ') - self.read_users() - sys.stdout.write('Reading posts.xml: ') - self.read_posts() - sys.stdout.write('Reading forums.xml: ') - self.read_forums() - + #sys.stdout.write('Reading users.xml: ') + #self.read_users() + #sys.stdout.write('Reading posts.xml: ') + #self.read_posts() + #sys.stdout.write('Reading forums.xml: ') + #self.read_forums() + sys.stdout.write("Importing user accounts: ") self.import_users() - #self.import_openid_associations() - #self.import_content() + sys.stdout.write("Loading threads: ") + self.import_content() def get_file(self, file_name): first_item = self.tar.getnames()[0] @@ -174,6 +230,9 @@ class Command(BaseCommand): 'body', 'created-at', 'updated-at', 'entry-id', 'forum-id', 'user-id', 'is-informative' ), + extra_field_mappings = ( + ('id', 'post_id'), + ) ) def read_forums(self): @@ -201,38 +260,70 @@ class Command(BaseCommand): #a whole bunch of fields are actually dropped now #see what's available in users.xml meanings of some #values there is not clear - try: - ab_user = askbot_models.User.objects.get(email = zd_user.email) - except askbot_models.User.DoesNotExist: - #special treatment for the user name - raw_username = unescape(zd_user.name) - username = clean_username(raw_username) - if len(username) > 30:#nearly impossible skip such user - print "Warning: could not import user %s" % raw_username - continue - if zd_user.email is None: - email = '' - else: - email = zd_user.email - - ab_user = askbot_models.User( - email = email, - email_isvalid = zd_user.is_verified, - date_joined = zd_user.created_at, - last_seen = zd_user.created_at,#add initial date for now - username = username, - is_active = zd_user.is_active - ) - ab_user.save() - added_users += 1 + #if email is blank, just create a new user + if zd_user.email == '': + ab_user = create_askbot_user(zd_user) + if ab_user in None: + print 'Warning: could not create user %s ' % zd_user.name + continue console.print_action(ab_user.username) + else: + #else see if user with the same email already exists + #and only create new askbot user if email is not yet in the + #database + try: + ab_user = askbot_models.User.objects.get(email = zd_user.email) + except askbot_models.User.DoesNotExist: + ab_user = create_askbot_user(zd_user) + if ab_user is None: + continue + console.print_action(ab_user.username, nowipe = True) + added_users += 1 zd_user.askbot_user_id = ab_user.id zd_user.save() + + if zd_user.openid_url != None and \ + 'askbot.deps.django_authopenid' in settings.INSTALLED_APPS: + from askbot.deps.django_authopenid.models import UserAssociation + from askbot.deps.django_authopenid.util import get_provider_name + try: + assoc = UserAssociation( + user = ab_user, + openid_url = zd_user.openid_url, + provider_name = get_provider_name(zd_user.openid_url) + ) + assoc.save() + except: + #drop user association + pass + transaction.commit() console.print_action('%d users added' % added_users, nowipe = True) + @transaction.commit_manually def import_content(self): - for zd_post in zendesk_models.Post.objects.all(): - if zd_post.is_processed: - continue + thread_ids = zendesk_models.Post.objects.values_list( + 'entry_id', + flat = True + ).distinct() + threads_posted = 0 + for thread_id in thread_ids: + thread_entries = zendesk_models.Post.objects.filter( + entry_id = thread_id + ).order_by('created_at') + question_post = thread_entries[0] + question = post_question(question_post) + question_post.is_processed = True + question_post.save() + transaction.commit() + entry_count = thread_entries.count() + threads_posted += 1 + console.print_action(str(threads_posted)) + if entry_count > 1: + for answer_post in thread_entries[1:]: + post_answer(answer_post, question = question) + answer_post.is_processed = True + answer_post.save() + transaction.commit() + console.print_action(str(threads_posted), nowipe = True) diff --git a/askbot/importers/zendesk/models.py b/askbot/importers/zendesk/models.py index 9ef42eac..6a321915 100644 --- a/askbot/importers/zendesk/models.py +++ b/askbot/importers/zendesk/models.py @@ -1,15 +1,48 @@ +import re from django.db import models +from django.contrib.auth.models import User as DjangoUser +from django.utils.html import strip_tags +from askbot.utils.html import unescape + +TAGS = {}#internal cache for mappings forum id -> forum name class Post(models.Model): body = models.TextField() created_at = models.DateTimeField() updated_at = models.DateTimeField() entry_id = models.IntegerField() + post_id = models.IntegerField() forum_id = models.IntegerField() user_id = models.IntegerField() is_informative = models.BooleanField() is_processed = models.BooleanField(default = False) + def get_author(self): + """returns author of the post, from the Django user table""" + zendesk_user = User.objects.get(user_id = self.user_id) + return DjangoUser.objects.get(id = zendesk_user.askbot_user_id) + + def get_body_text(self): + """unescapes html entities in the body text, + saves in the internal cache and returns the value""" + if not hasattr(self, '_body_text'): + self._body_text = unescape(self.body) + return self._body_text + + def get_fake_title(self): + """extract first 10 words from the body text and strip tags""" + words = re.split(r'\s+', self.get_body_text()) + if len(words) > 10: + words = words[:10] + return strip_tags(' '.join(words)) + + def get_tag_name(self): + if self.forum_id not in TAGS: + forum = Forum.objects.get(forum_id = self.forum_id) + tag_name = re.sub(r'\s+', '-', forum.name.lower()) + TAGS[self.forum_id] = tag_name + return TAGS[self.forum_id] + class User(models.Model): user_id = models.IntegerField() askbot_user_id = models.IntegerField(null = True) |