From 5c02a9b8fc82a5b017ff64ccdfff30cbc2191845 Mon Sep 17 00:00:00 2001 From: Evgeny Fadeev Date: Tue, 13 Nov 2012 23:41:39 -0300 Subject: hopefully fixed the stackexchange importer --- .../management/commands/load_stackexchange.py | 71 ++++++++++++++++++---- askbot/models/repute.py | 2 +- 2 files changed, 59 insertions(+), 14 deletions(-) diff --git a/askbot/importers/stackexchange/management/commands/load_stackexchange.py b/askbot/importers/stackexchange/management/commands/load_stackexchange.py index 600d00b1..313bab13 100644 --- a/askbot/importers/stackexchange/management/commands/load_stackexchange.py +++ b/askbot/importers/stackexchange/management/commands/load_stackexchange.py @@ -6,6 +6,7 @@ import sys from unidecode import unidecode import zipfile from datetime import datetime +from django.conf import settings as django_settings from django.core.management.base import BaseCommand, CommandError import askbot.importers.stackexchange.parse_models as se_parser from xml.etree import ElementTree as et @@ -24,9 +25,11 @@ except ImportError: from askbot.models.message import Message as DjangoMessage from django.utils.translation import ugettext as _ +from askbot.utils.console import ProgressBar from askbot.utils.slug import slugify from askbot.models.badges import award_badges_signal, award_badges from askbot.importers.stackexchange.management import is_ready as importer_is_ready +from optparse import make_option #from markdown2 import Markdown #markdowner = Markdown(html4tags=True) @@ -285,12 +288,43 @@ class X(object):# return slugify(cls.badge_exceptions.get(name, name).lower()) class Command(BaseCommand): - help = 'Loads StackExchange data from unzipped directory of XML files into the ASKBOT database' + help = """Loads StackExchange data from SE dump .zip file +it may be helpful to split this procedure in two:\n +* read the dump (with option --read-se-dump) +* transfer data to askbot (with option --process-data) +""" args = 'se_dump_dir' + option_list = BaseCommand.option_list + ( + make_option('-r', '--read-dump', + action='store_true', + dest='read_dump', + default=False, + help='Only read the the dump' + ), + make_option('-p', '--process-data', + action='store_true', + dest='process_data', + default=False, + help='Only process the data, assuming that the dump is loaded' + ) + ) + @transaction.commit_manually def handle(self, *arg, **kwarg): + if django_settings.DEBUG: + raise CommandError( + 'Please set DEBUG to False in the settings.py to reduce RAM usage' + ) + + #process the command line arguments, if given + if kwarg['read_dump'] is False and kwarg['process_data'] is False: + #make them both true as a hack to simulate a condition where + #no flags selected means the same as both are indeed selected + kwarg['read_dump'] = True + kwarg['process_data'] = True + askbot_settings.update('LIMIT_ONE_ANSWER_PER_USER', False) if not importer_is_ready(): @@ -306,16 +340,23 @@ class Command(BaseCommand): if len(arg) < 1 or not os.path.isfile(arg[0]): raise CommandError('Error: first argument must be a zip file with the SE forum data') - self.zipfile = self.open_dump(arg[0]) - #read the data into SE tables - for item in xml_read_order: - time_before = datetime.now() - self.load_xml_file(item) - transaction.commit() - time_after = datetime.now() - if DEBUGME == True: - print time_after - time_before - print HEAP.heap() + if kwarg['read_dump']: + self.zipfile = self.open_dump(arg[0]) + #read the data into SE tables + for item in xml_read_order: + time_before = datetime.now() + self.load_xml_file(item) + transaction.commit() + time_after = datetime.now() + if DEBUGME == True: + print time_after - time_before + print HEAP.heap() + + if kwarg['process_data'] is False: + #that means we just wanted to load the xml dump to + #do the second step in another go in order to have + #more ram for the transfer of data from SE to Askbot databases + return #this is important so that when we clean up messages #automatically generated by the procedures below @@ -651,7 +692,9 @@ class Command(BaseCommand): c_group = [] #this loop groups revisions by revision id, then calls process function #for the revision grup (elementary revisions posted at once) - for se_rev in se_revs.iterator(): + message = 'Processing revisions' + count = se_revs.count() + for se_rev in ProgressBar(se_revs.iterator(), count, message): if se_rev.revision_guid == c_guid: c_group.append(se_rev) else: @@ -853,7 +896,9 @@ class Command(BaseCommand): return xml_file_basename + '.xml' def transfer_users(self): - for se_u in se.User.objects.all().iterator(): + se_users = se.User.objects.all() + count = se_users.count() + for se_u in ProgressBar(se_users.iterator(), count): #if se_u.id == -1:#skip the Community user # continue u = askbot.User() diff --git a/askbot/models/repute.py b/askbot/models/repute.py index a6e9d7d1..e48773e6 100644 --- a/askbot/models/repute.py +++ b/askbot/models/repute.py @@ -223,7 +223,7 @@ class Repute(models.Model): return _('Changed by moderator. Reason: %(reason)s') \ % {'reason':self.comment} else: - delta = self.positive - self.negative + delta = self.positive + self.negative#.negative is < 0 so we add! link_title_data = { 'points': abs(delta), 'username': self.user.username, -- cgit v1.2.3-1-g7c22