summaryrefslogtreecommitdiffstats
path: root/askbot/management
diff options
context:
space:
mode:
authorEvgeny Fadeev <evgeny.fadeev@gmail.com>2013-04-08 06:28:37 -0400
committerEvgeny Fadeev <evgeny.fadeev@gmail.com>2013-04-08 06:28:37 -0400
commit0906b8d3e2e0c4097cb4ad5f1d21c2bf411c179e (patch)
tree0165ba4a25668fb8daa216134c4f38f492819b5c /askbot/management
parentc9b07025ceb5ff50dcae2d845bcdc63efb000d7d (diff)
downloadaskbot-0906b8d3e2e0c4097cb4ad5f1d21c2bf411c179e.tar.gz
askbot-0906b8d3e2e0c4097cb4ad5f1d21c2bf411c179e.tar.bz2
askbot-0906b8d3e2e0c4097cb4ad5f1d21c2bf411c179e.zip
added import from Jive forums to Askbot
Diffstat (limited to 'askbot/management')
-rw-r--r--askbot/management/commands/askbot_import_jive.py139
1 files changed, 139 insertions, 0 deletions
diff --git a/askbot/management/commands/askbot_import_jive.py b/askbot/management/commands/askbot_import_jive.py
new file mode 100644
index 00000000..90bbfd98
--- /dev/null
+++ b/askbot/management/commands/askbot_import_jive.py
@@ -0,0 +1,139 @@
+from askbot import models
+from askbot.conf import settings as askbot_settings
+from askbot.utils.console import ProgressBar
+from askbot.utils.slug import slugify
+from bs4 import BeautifulSoup
+from django.core.management.base import BaseCommand, CommandError
+from django.db import transaction
+from django.forms import EmailField, ValidationError
+from datetime import datetime
+
+"""
+Jive --> CategoryList --> Category --> ForumList --> Forum
+ <Name>ouaou</Name>
+ <CreationDate>2008-05-06-0249</CreationDate>
+ <ModifiedDate/>
+ <ThreadList>
+ <Thread id="4046">
+ <CreationDate>2013/03/08 01:50:42.54 CST</CreationDate>
+ <ModifiedDate>2013/03/12 23:44:45.528 CDT</ModifiedDate>
+ <Message id="16809">
+ <Subject>Need help setting up mirror space</Subject>
+ <Body>Body text</Body>
+ <Username>jfawcett</Username>
+ <CreationDate>2013/03/08 01:50:42.54 CST</CreationDate>
+ <ModifiedDate>2013/03/08 01:50:42.54 CST</ModifiedDate>
+ <MessageList>
+ </MessageList>
+ </Message>
+ </Thread>
+"""
+
+def parse_date(date_str):
+ return datetime.strptime(date_str[:-8], '%Y/%m/%d %H:%M:%S')
+
+class Command(BaseCommand):
+ args = '<jive-dump.xml>'
+
+ def __init__(self, *args, **kwargs):
+ super(Command, self).__init__(*args, **kwargs)
+ #relax certain settings
+ askbot_settings.update('LIMIT_ONE_ANSWER_PER_USER', False)
+ askbot_settings.update('MAX_COMMENT_LENGTH', 1000000)
+ askbot_settings.update('MIN_REP_TO_LEAVE_COMMENTS', 1)
+ self.bad_email_count = 0
+
+ def handle(self, *args, **kwargs):
+ assert len(args) == 1, 'Dump file name is required'
+ xml = open(args[0], 'r').read()
+ try:
+ import lxml
+ soup = BeautifulSoup(xml, 'lxml')
+ except ImportError:
+ soup = BeautifulSoup(xml)
+
+ self.import_users(soup.find_all('user'))
+ self.import_forums(soup.find_all('forum'))
+
+ @transaction.commit_manually
+ def import_users(self, user_soup):
+ """import users from jive to askbot"""
+
+ message = 'Importing users:'
+ for user in ProgressBar(iter(user_soup), len(user_soup), message):
+ username = user.find('username').text
+ real_name = user.find('name').text
+ try:
+ email = EmailField().clean(user.find('email').text)
+ except ValidationError:
+ email = 'unknown%d@example.com' % self.bad_email_count
+ self.bad_email_count += 1
+
+ joined_timestamp = parse_date(user.find('creationdate').text)
+ user = models.User(
+ username=username,
+ email=email,
+ real_name=real_name,
+ date_joined=joined_timestamp
+ )
+ user.save()
+ transaction.commit()
+
+ def import_forums(self, forum_soup):
+ """import forums by associating each with a special tag,
+ and then importing all threads for the tag"""
+ admin = models.User.objects.get(id=1)
+ for forum in forum_soup:
+ threads_soup = forum.find_all('thread')
+ self.import_threads(threads_soup, forum.find('name').text)
+
+ @transaction.commit_manually
+ def import_threads(self, threads, tag_name):
+ message = 'Importing threads for %s' % tag_name
+ for thread in ProgressBar(iter(threads), len(threads), message):
+ self.import_thread(thread, tag_name)
+ transaction.commit()
+
+ def import_thread(self, thread, tag_name):
+ """import individual thread"""
+ question_soup = thread.message
+ title, body, timestamp, user = self.parse_post(question_soup)
+ #post question
+ question = user.post_question(
+ title=title,
+ body_text=body,
+ timestamp=timestamp,
+ tags=tag_name
+ )
+ #post answers
+ if not question_soup.messagelist:
+ return
+
+ for answer_soup in question_soup.messagelist.find_all('message', recursive=False):
+ title, body, timestamp, user = self.parse_post(answer_soup)
+ answer = user.post_answer(
+ question=question,
+ body_text=body,
+ timestamp=timestamp
+ )
+ comments = answer_soup.find_all('message')
+ for comment in comments:
+ title, body, timestamp, user = self.parse_post(comment)
+ user.post_comment(
+ parent_post=answer,
+ body_text=body,
+ timestamp=timestamp
+ )
+
+ def parse_post(self, post):
+ title = post.find('subject').text
+ added_at = parse_date(post.find('creationdate').text)
+ username = post.find('username').text
+ try:
+ user = models.User.objects.get(username=username)
+ except models.User.DoesNotExist:
+ email = 'unknown%d@example.com' % self.bad_email_count
+ self.bad_email_count += 1
+ user = models.User(username=username, email=email)
+ user.save()
+ return title, post.text, added_at, user