summaryrefslogtreecommitdiffstats
path: root/stackexchange/management/commands/load_stackexchange.py
diff options
context:
space:
mode:
Diffstat (limited to 'stackexchange/management/commands/load_stackexchange.py')
-rw-r--r--stackexchange/management/commands/load_stackexchange.py266
1 files changed, 233 insertions, 33 deletions
diff --git a/stackexchange/management/commands/load_stackexchange.py b/stackexchange/management/commands/load_stackexchange.py
index 5673b9f0..11b0efc9 100644
--- a/stackexchange/management/commands/load_stackexchange.py
+++ b/stackexchange/management/commands/load_stackexchange.py
@@ -12,6 +12,8 @@ import stackexchange.models as se
from forum.forms import EditUserEmailFeedsForm
from forum.utils.html import sanitize_html
from django.conf import settings
+from django.contrib.auth.models import Message as DjangoMessage
+from django.utils.translation import ugettext as _
#from markdown2 import Markdown
#markdowner = Markdown(html4tags=True)
@@ -55,14 +57,15 @@ class X(object):#
'UpMod':'upvote',
'DownMod':'downvote',
'AcceptedByOriginator':'accept_answer',
- 'Offensive','flag_post',
- 'Favorite','toggle_favorite_question',
+ 'Offensive':'flag_post',
+ 'Favorite':'toggle_favorite_question',
}
#these modes cannot be mixed
+ #only wiki is assumed to be mixable
exclusive_revision_modes = (
- 'initial','edit','lock','unlock',
- 'migrate','close','reopen','merge',
+ 'initial','edit','rollback','lock',
+ 'migrate','close','merge','delete',
)
#badges whose names don't match exactly, but
@@ -84,15 +87,49 @@ class X(object):#
'Rollback Body':'rollback',
'Rollback Tags':'rollback',
'Post Closed':'close',
- 'Post Reopened':'reopen',
+ 'Post Reopened':'close',
'Post Deleted':'delete',
- 'Post Undeleted':'undelete',
+ 'Post Undeleted':'delete',
'Post Locked':'lock',
- 'Post Unlocked':'unlock',
+ 'Post Unlocked':'lock',
'Community Owned':'wiki',
'Post Migrated':'migrate',
'Question Merged':'merge',
}
+
+ close_reason_map = {
+ 1:1,#duplicate
+ 2:2,#off-topic
+ 3:3,#subjective and argumentative
+ 4:4,#not a real question
+ 5:7,#offensive
+ 6:6,#irrelevant or outdated question
+ 7:9,#too localized
+ 10:8,#spam
+ }
+
+ @classmethod
+ def get_message_text(cls, se_m):
+ """try to intelligently translate
+ SE message to OSQA so that it makese sense in
+ our context
+ """
+ #todo: properly translate messages
+ #todo: maybe work through more instances of messages
+ if se_m.message_type.name == 'Badge Notification':
+ return se_m.text
+ else:
+ if 'you are now an administrator' in se_m.text:
+ return _('Congratulations, you are now an Administrator')
+ elif re.search(r'^You have \d+ new',se_m.text):
+ bits = se_m.text.split('.')
+ text = bits[0]
+ if se_m.user.id == -1:
+ return None
+ url = cls.get_user(se_m.user).get_profile_url()
+ return '<a href="%s?sort=responses">%s</a>' % (url,text)
+ return None
+
@classmethod
def get_post(cls, se_post):
#todo: fix this hack - either in-memory id association table
@@ -106,6 +143,12 @@ class X(object):#
raise Exception('unknown post type %s' % post_type)
@classmethod
+ def get_close_reason(cls, se_reason):
+ #todo: this is a guess - have not seen real data
+ se_reason = int(se_reason)
+ return cls.close_reason_map[se_reason]
+
+ @classmethod
def get_user(cls, se_user):
#todo: same as get_post
return osqa.User.objects.get(id=USER[se_user.id].id)
@@ -236,19 +279,76 @@ class Command(BaseCommand):
table_name = self.get_table_name(xml)
self.load_xml_file(xml_path, table_name)
+ #this is important so that when we clean up messages
+ #automatically generated by the procedures below
+ #we do not delete old messages
+ #todo: unfortunately this may need to be redone
+ #when we upgrade to django 1.2 and definitely by 1.4 when
+ #the current message system will be replaced with the
+ #django messages framework
+ self.save_osqa_message_id_list()
+
#transfer data into OSQA tables
+ print 'Transferring users...',
+ sys.stdout.flush()
self.transfer_users()
+ print 'done.'
+ print 'Transferring content edits...',
+ sys.stdout.flush()
self.transfer_question_and_answer_activity()
+ print 'done.'
+ print 'Transferring view counts...',
+ sys.stdout.flush()
self.transfer_question_view_counts()
+ print 'done.'
+ print 'Transferring comments...',
+ sys.stdout.flush()
self.transfer_comments()
+ print 'done.'
+ print 'Transferring badges and badge awards...',
+ sys.stdout.flush()
self.transfer_badges()
- self.transfer_votes()
- self.transfer_favorites()
- self.transfer_tag_preferences()
+ print 'done.'
+ print 'Transferring votes...',
+ sys.stdout.flush()
+ self.transfer_votes()#includes favorites, accepts and flags
+ print 'done.'
+
+ self.cleanup_messages()#delete autogenerated messages
+ self.transfer_messages()
+
+ #todo: these are not clear how to go about
self.transfer_update_subscriptions()
- self.transfer_flags()
+ self.transfer_tag_preferences()
self.transfer_meta_pages()
+ def save_osqa_message_id_list(self):
+ id_list = list(DjangoMessage.objects.all().values('id'))
+ self._osqa_message_id_list = id_list
+
+ def cleanup_messages(self):
+ """deletes messages generated by the load process
+ """
+ id_list = self._osqa_message_id_list
+ mset = DjangoMessage.objects.all().exclude(id__in=id_list)
+ mset.delete()
+
+ def transfer_messages(self):
+ """transfers some messages from
+ SE to OSQA
+ """
+ for m in se.Message.objects.all():
+ if m.is_read:
+ continue
+ if m.user.id == -1:
+ continue
+ u = X.get_user(m.user)
+ text = X.get_message_text(m)
+ if text:
+ u.message_set.create(
+ message=text,
+ )
+
def _process_post_initial_revision_group(self, rev_group):
title = None
@@ -298,7 +398,7 @@ class Command(BaseCommand):
def _process_post_edit_revision_group(self, rev_group):
#question apply edit
- (title, text, tags, wiki) = (None, None, None, False)
+ (title, text, tags) = (None, None, None)
for rev in rev_group:
rev_type = rev.post_history_type.name
if rev_type == 'Edit Title':
@@ -308,7 +408,7 @@ class Command(BaseCommand):
elif rev_type == 'Edit Tags':
tags = X.clean_tags(rev.text)
elif rev_type == 'Community Owned':
- wiki = True
+ pass
else:
raise Exception('unexpected revision type %s' % rev_type)
@@ -327,39 +427,141 @@ class Command(BaseCommand):
text = text,
comment = comment,
tags = tags,
- wiki = wiki
)
elif post_type == 'Answer':
a = ANSWER[rev0.post.id]
- #todo: wiki will probably be lost here
a.apply_edit(
edited_at = edited_at,
edited_by = edited_by,
text = text,
comment = comment,
- wiki = wiki
)
- def _process_post_action_revision_group(self, rev_group):
- #this is odd - there were no edit actions like these
- #closed, reopened, etc in homeschoolers sample
- print 'Warning: these content revisions were not processed'
- print 'please give us your sample and we will write code to import it'
- print ';'.join([rev.post_history_type.name for rev in rev_group])
+ def _make_post_wiki(self, rev_group):
+ #todo: untested
+ for rev in rev_group:
+ if rev.post_history_type.name == 'Community Owned':
+ p = X.get_post(rev.post)
+ u = X.get_user(rev.user)
+ t = rev.creation_date
+ p.wiki = True
+ p.wikified_at = t
+ p.wikified_by = u
+ self.mark_activity(p,u,t)
+ p.save()
+ return
+
+ def mark_activity(self,p,u,t):
+ """p,u,t - post, user, timestamp
+ """
+ if isinstance(p, osqa.Question):
+ p.last_activity_by = u
+ p.last_activity_at = t
+ elif isinstance(p, osqa.Answer):
+ p.question.last_activity_by = u
+ p.question.last_activity_at = t
+ p.question.save()
+
+ def _process_post_rollback_revision_group(self, rev_group):
+ #todo: don't know what to do here as there were no
+ #such data available
+ pass
+
+ def _process_post_lock_revision_group(self, rev_group):
+ #todo: untested
+ for rev in rev_group:
+ rev_type = rev.post_history_type.name
+ if rev_type.endswith('ocked'):
+ t = rev.creation_date
+ u = X.get_user(rev.user)
+ p = X.get_post(rev.post)
+ if rev_type == 'Post Locked':
+ p.locked = True
+ p.locked_by = u
+ p.locked_at = t
+ elif rev_type == 'Post Unlocked':
+ p.locked = False
+ p.locked_by = None
+ p.locked_at = None
+ else:
+ return
+ self.mark_activity(p,u,t)
+ p.save()
+ return
+
+ def _process_post_close_revision_group(self, rev_group):
+ #todo: untested
+ for rev in rev_group:
+ if rev.post.post_type.name != 'Question':
+ return
+ rev_type = rev.post_history_type.name
+ if rev_type in ('Post Closed', 'Post Reopened'):
+ t = rev.creation_date
+ u = X.get_user(rev.user)
+ p = X.get_post(rev.post)
+ if rev_type == 'Post Closed':
+ p.closed = True
+ p.closed_at = t
+ p.closed_by = u
+ p.close_reason = X.get_close_reason(rev.text)
+ elif rev_type == 'Post Reopened':
+ p.closed = False
+ p.closed_at = None
+ p.closed_by = None
+ p.close_reason = None
+ self.mark_activity(p,u,t)
+ p.save()
+ return
+
+ def _process_post_delete_revision_group(self, rev_group):
+ #todo: untested
+ for rev in rev_group:
+ rev_type = rev.post_history_type.name
+ if rev_type.endswith('eleted'):
+ t = rev.creation_date
+ u = X.get_user(rev.user)
+ p = X.get_post(rev.post)
+ if rev_type == 'Post Deleted':
+ p.deleted = True
+ p.deleted_at = t
+ p.deleted_by = u
+ elif rev_type == 'Post Undeleted':
+ p.deleted = False
+ p.deleted_at = None
+ p.deleted_by = None
+ self.mark_activity(p,u,t)
+ p.save()
+ return
def _process_post_revision_group(self, rev_group):
#determine revision type
+ #'initial','edit','rollback','lock',
+ #'migrate','close','merge','delete',
rev_types = X.get_post_revision_group_types(rev_group)
- #initial,edit,lock,unlock,
- #migrate,close,reopen,merge,wiki
if 'initial' in rev_types:
self._process_post_initial_revision_group(rev_group)
elif 'edit' in rev_types:
self._process_post_edit_revision_group(rev_group)
+ elif 'rollback' in rev_types:
+ self._process_post_rollback_revision_group(rev_group)
+ elif 'lock' in rev_types:
+ self._process_post_lock_revision_group(rev_group)
+ elif 'close' in rev_types:
+ self._process_post_close_revision_group(rev_group)
+ elif 'delete' in rev_types:
+ self._process_post_delete_revision_group(rev_group)
else:
- self._process_post_action_revision_group(rev_group)
+ pass
+ #todo: rollback, lock, close and delete are
+ #not tested
+ #merge and migrate actions are ignored
+ #wiki is mixable with other groups, so process it in addition
+ if 'wiki' in rev_types:
+ self._make_post_wiki(rev_group)
def transfer_tag_preferences(self):
+ #todo: figure out where these are stored in SE
+ #maybe in se.User.preferences_raw?
pass
def transfer_question_and_answer_activity(self):
@@ -384,6 +586,8 @@ class Command(BaseCommand):
c_group = []
c_group.append(se_rev)
c_guid = se_rev.revision_guid
+ if len(c_group) != 0:
+ self._process_post_revision_group(c_group)
def transfer_comments(self):
for se_c in se.PostComment.objects.all():
@@ -477,18 +681,14 @@ class Command(BaseCommand):
u = X.get_user(v.user)
p = X.get_post(v.post)
m = X.vote_actions[vote_type]
- vote_method = getattr(osqa.User, m['on'])
+ vote_method = getattr(osqa.User, m)
vote_method(u, p, timestamp = v.creation_date)
if v.deletion_date:
vote_method(u, p, timestamp = v.deletion_date, cancel=True)
- def transfer_favorites(self):
- pass
-
def transfer_update_subscriptions(self):
- pass
-
- def transfer_flags(self):
+ #todo: not clear where this is stored in SE
+ #maybe in se.User.preferences_raw?
pass
def transfer_meta_pages(self):
@@ -548,7 +748,7 @@ class Command(BaseCommand):
if se_u.open_id is None and se_u.email is None:
print 'Warning: SE user %d is not recoverable (no email or openid)'
- u.reputation = se_u.reputation
+ u.reputation = 1#se_u.reputation, it's actually re-computed
u.last_seen = se_u.last_access_date
u.email = X.get_email(se_u.email)
u.location = X.blankable(se_u.location)