summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xforum/auth.py98
-rwxr-xr-xforum/authentication/__init__.py4
-rwxr-xr-xforum/const.py8
-rwxr-xr-xforum/forms.py12
-rwxr-xr-xforum/management/commands/once_award_badges.py2
-rwxr-xr-xforum/models/__init__.py151
-rwxr-xr-xforum/models/answer.py70
-rwxr-xr-xforum/models/base.py14
-rwxr-xr-xforum/models/meta.py4
-rwxr-xr-xforum/models/question.py129
-rwxr-xr-xforum/models/repute.py36
-rwxr-xr-xforum/models/user.py2
-rwxr-xr-xforum/views/commands.py34
-rwxr-xr-xforum/views/writers.py150
-rwxr-xr-xlog/README.TXT2
-rw-r--r--settings.py8
-rwxr-xr-xsettings_local.py.dist3
-rw-r--r--stackexchange/ANOMALIES14
-rw-r--r--stackexchange/README62
-rw-r--r--stackexchange/__init__.py0
-rw-r--r--stackexchange/management/__init__.py0
-rw-r--r--stackexchange/management/commands/__init__.py0
-rw-r--r--stackexchange/management/commands/load_stackexchange.py804
-rw-r--r--stackexchange/models.py266
-rw-r--r--stackexchange/parse_models.py225
25 files changed, 1851 insertions, 247 deletions
diff --git a/forum/auth.py b/forum/auth.py
index 3533b9ce..5d6e71c4 100755
--- a/forum/auth.py
+++ b/forum/auth.py
@@ -5,16 +5,14 @@ The actions a User is authorised to perform are dependent on their reputation
and superuser status.
"""
import datetime
-from django.contrib.contenttypes.models import ContentType
from django.utils.translation import ugettext as _
from django.db import transaction
from models import Repute
from models import Question
from models import Answer
+from models import mark_offensive, delete_post_or_answer
from const import TYPE_REPUTATION
import logging
-question_type = ContentType.objects.get_for_model(Question)
-answer_type = ContentType.objects.get_for_model(Answer)
VOTE_UP = 15
FLAG_OFFENSIVE = 15
@@ -198,7 +196,9 @@ def calculate_reputation(origin, offset):
return 1
@transaction.commit_on_success
-def onFlaggedItem(item, post, user):
+def onFlaggedItem(item, post, user, timestamp=None):
+ if timestamp is None:
+ timestamp = datetime.datetime.now()
item.save()
post.offensive_flag_count = post.offensive_flag_count + 1
@@ -209,12 +209,12 @@ def onFlaggedItem(item, post, user):
post.author.save()
question = post
- if ContentType.objects.get_for_model(post) == answer_type:
+ if isinstance(post, Answer):
question = post.question
reputation = Repute(user=post.author,
negative=int(REPUTATION_RULES['lose_by_flagged']),
- question=question, reputed_at=datetime.datetime.now(),
+ question=question, reputed_at=timestamp,
reputation_type=-4,
reputation=post.author.reputation)
reputation.save()
@@ -228,7 +228,7 @@ def onFlaggedItem(item, post, user):
reputation = Repute(user=post.author,
negative=int(REPUTATION_RULES['lose_by_flagged_lastrevision_3_times']),
question=question,
- reputed_at=datetime.datetime.now(),
+ reputed_at=timestamp,
reputation_type=-6,
reputation=post.author.reputation)
reputation.save()
@@ -241,21 +241,28 @@ def onFlaggedItem(item, post, user):
reputation = Repute(user=post.author,
negative=int(REPUTATION_RULES['lose_by_flagged_lastrevision_5_times']),
question=question,
- reputed_at=datetime.datetime.now(),
+ reputed_at=timestamp,
reputation_type=-7,
reputation=post.author.reputation)
reputation.save()
post.deleted = True
- #post.deleted_at = datetime.datetime.now()
+ #post.deleted_at = timestamp
#post.deleted_by = Admin
post.save()
-
+ mark_offensive.send(
+ sender=post.__class__,
+ instance=post,
+ mark_by=user
+ )
@transaction.commit_on_success
-def onAnswerAccept(answer, user):
+def onAnswerAccept(answer, user, timestamp=None):
+ if timestamp is None:
+ timestamp = datetime.datetime.now()
+
answer.accepted = True
- answer.accepted_at = datetime.datetime.now()
+ answer.accepted_at = timestamp
answer.question.answer_accepted = True
answer.save()
answer.question.save()
@@ -266,7 +273,7 @@ def onAnswerAccept(answer, user):
reputation = Repute(user=answer.author,
positive=int(REPUTATION_RULES['gain_by_answer_accepted']),
question=answer.question,
- reputed_at=datetime.datetime.now(),
+ reputed_at=timestamp,
reputation_type=2,
reputation=answer.author.reputation)
reputation.save()
@@ -277,13 +284,15 @@ def onAnswerAccept(answer, user):
reputation = Repute(user=user,
positive=int(REPUTATION_RULES['gain_by_accepting_answer']),
question=answer.question,
- reputed_at=datetime.datetime.now(),
+ reputed_at=timestamp,
reputation_type=3,
reputation=user.reputation)
reputation.save()
@transaction.commit_on_success
-def onAnswerAcceptCanceled(answer, user):
+def onAnswerAcceptCanceled(answer, user, timestamp=None):
+ if timestamp is None:
+ timestamp = datetime.datetime.now()
answer.accepted = False
answer.accepted_at = None
answer.question.answer_accepted = False
@@ -296,7 +305,7 @@ def onAnswerAcceptCanceled(answer, user):
reputation = Repute(user=answer.author,
negative=int(REPUTATION_RULES['lose_by_accepted_answer_cancled']),
question=answer.question,
- reputed_at=datetime.datetime.now(),
+ reputed_at=timestamp,
reputation_type=-2,
reputation=answer.author.reputation)
reputation.save()
@@ -307,13 +316,15 @@ def onAnswerAcceptCanceled(answer, user):
reputation = Repute(user=user,
negative=int(REPUTATION_RULES['lose_by_canceling_accepted_answer']),
question=answer.question,
- reputed_at=datetime.datetime.now(),
+ reputed_at=timestamp,
reputation_type=-1,
reputation=user.reputation)
reputation.save()
@transaction.commit_on_success
-def onUpVoted(vote, post, user):
+def onUpVoted(vote, post, user, timestamp=None):
+ if timestamp is None:
+ timestamp = datetime.datetime.now()
vote.save()
post.vote_up_count = int(post.vote_up_count) + 1
@@ -322,25 +333,28 @@ def onUpVoted(vote, post, user):
if not post.wiki:
author = post.author
- if Repute.objects.get_reputation_by_upvoted_today(author) < int(REPUTATION_RULES['scope_per_day_by_upvotes']):
+ todays_rep_gain = Repute.objects.get_reputation_by_upvoted_today(author)
+ if todays_rep_gain < int(REPUTATION_RULES['scope_per_day_by_upvotes']):
author.reputation = calculate_reputation(author.reputation,
int(REPUTATION_RULES['gain_by_upvoted']))
author.save()
question = post
- if ContentType.objects.get_for_model(post) == answer_type:
+ if isinstance(post, Answer):
question = post.question
reputation = Repute(user=author,
positive=int(REPUTATION_RULES['gain_by_upvoted']),
question=question,
- reputed_at=datetime.datetime.now(),
+ reputed_at=timestamp,
reputation_type=1,
reputation=author.reputation)
reputation.save()
@transaction.commit_on_success
-def onUpVotedCanceled(vote, post, user):
+def onUpVotedCanceled(vote, post, user, timestamp=None):
+ if timestamp is None:
+ timestamp = datetime.datetime.now()
vote.delete()
post.vote_up_count = int(post.vote_up_count) - 1
@@ -356,19 +370,21 @@ def onUpVotedCanceled(vote, post, user):
author.save()
question = post
- if ContentType.objects.get_for_model(post) == answer_type:
+ if isinstance(post, Answer):
question = post.question
reputation = Repute(user=author,
negative=int(REPUTATION_RULES['lose_by_upvote_canceled']),
question=question,
- reputed_at=datetime.datetime.now(),
+ reputed_at=timestamp,
reputation_type=-8,
reputation=author.reputation)
reputation.save()
@transaction.commit_on_success
-def onDownVoted(vote, post, user):
+def onDownVoted(vote, post, user, timestamp=None):
+ if timestamp is None:
+ timestamp = datetime.datetime.now()
vote.save()
post.vote_down_count = int(post.vote_down_count) + 1
@@ -382,13 +398,13 @@ def onDownVoted(vote, post, user):
author.save()
question = post
- if ContentType.objects.get_for_model(post) == answer_type:
+ if isinstance(post, Answer):
question = post.question
reputation = Repute(user=author,
negative=int(REPUTATION_RULES['lose_by_downvoted']),
question=question,
- reputed_at=datetime.datetime.now(),
+ reputed_at=timestamp,
reputation_type=-3,
reputation=author.reputation)
reputation.save()
@@ -400,13 +416,15 @@ def onDownVoted(vote, post, user):
reputation = Repute(user=user,
negative=int(REPUTATION_RULES['lose_by_downvoting']),
question=question,
- reputed_at=datetime.datetime.now(),
+ reputed_at=timestamp,
reputation_type=-5,
reputation=user.reputation)
reputation.save()
@transaction.commit_on_success
-def onDownVotedCanceled(vote, post, user):
+def onDownVotedCanceled(vote, post, user, timestamp=None):
+ if timestamp is None:
+ timestamp = datetime.datetime.now()
vote.delete()
post.vote_down_count = int(post.vote_down_count) - 1
@@ -422,13 +440,13 @@ def onDownVotedCanceled(vote, post, user):
author.save()
question = post
- if ContentType.objects.get_for_model(post) == answer_type:
+ if isinstance(post, Answer):
question = post.question
reputation = Repute(user=author,
positive=int(REPUTATION_RULES['gain_by_downvote_canceled']),
question=question,
- reputed_at=datetime.datetime.now(),
+ reputed_at=timestamp,
reputation_type=4,
reputation=author.reputation)
reputation.save()
@@ -440,12 +458,13 @@ def onDownVotedCanceled(vote, post, user):
reputation = Repute(user=user,
positive=int(REPUTATION_RULES['gain_by_canceling_downvote']),
question=question,
- reputed_at=datetime.datetime.now(),
+ reputed_at=timestamp,
reputation_type=5,
reputation=user.reputation)
reputation.save()
-def onDeleteCanceled(post, user):
+#here timestamp is not used, I guess added for consistency
+def onDeleteCanceled(post, user, timestamp=None):
post.deleted = False
post.deleted_by = None
post.deleted_at = None
@@ -462,10 +481,12 @@ def onDeleteCanceled(post, user):
tag.deleted_at = None
tag.save()
-def onDeleted(post, user):
+def onDeleted(post, user, timestamp=None):
+ if timestamp is None:
+ timestamp = datetime.datetime.now()
post.deleted = True
post.deleted_by = user
- post.deleted_at = datetime.datetime.now()
+ post.deleted_at = timestamp
post.save()
if isinstance(post, Question):
@@ -473,7 +494,7 @@ def onDeleted(post, user):
if tag.used_count == 1:
tag.deleted = True
tag.deleted_by = user
- tag.deleted_at = datetime.datetime.now()
+ tag.deleted_at = timestamp
else:
tag.used_count = tag.used_count - 1
tag.save()
@@ -496,3 +517,8 @@ def onDeleted(post, user):
elif isinstance(post, Answer):
Question.objects.update_answer_count(post.question)
logging.debug('updated answer count to %d' % post.question.answer_count)
+ delete_post_or_answer.send(
+ sender=post.__class__,
+ instance=post,
+ delete_by=user
+ )
diff --git a/forum/authentication/__init__.py b/forum/authentication/__init__.py
index 5326c45c..75099303 100755
--- a/forum/authentication/__init__.py
+++ b/forum/authentication/__init__.py
@@ -7,7 +7,7 @@ class ConsumerAndContext():
self.id = id
self.consumer = consumer()
- context.id = id
+ context.id = id #add extra field to context
self.context = context
consumers = dict([
@@ -24,4 +24,4 @@ contexts = dict([
AUTH_PROVIDERS = dict([
(name, ConsumerAndContext(name, consumers[name], contexts[name])) for name in consumers.keys()
if name in contexts
- ]) \ No newline at end of file
+ ])
diff --git a/forum/const.py b/forum/const.py
index ce81acb2..39db5ad4 100755
--- a/forum/const.py
+++ b/forum/const.py
@@ -8,12 +8,12 @@ CLOSE_REASONS = (
(1, _('duplicate question')),
(2, _('question is off-topic or not relevant')),
(3, _('too subjective and argumentative')),
- (4, _('is not an answer to the question')),
+ (4, _('not a real question')),
(5, _('the question is answered, right answer was accepted')),
- (6, _('problem is not reproducible or outdated')),
- #(7, u'太局部、本地化的问题',)
- (7, _('question contains offensive inappropriate, or malicious remarks')),
+ (6, _('question is not relevant or outdated')),
+ (7, _('question contains offensive or malicious remarks')),
(8, _('spam or advertising')),
+ (9, _('too localized')),
)
TYPE_REPUTATION = (
diff --git a/forum/forms.py b/forum/forms.py
index 2260bfe5..6f91acfc 100755
--- a/forum/forms.py
+++ b/forum/forms.py
@@ -13,6 +13,7 @@ from django.conf import settings
from django.contrib.contenttypes.models import ContentType
import logging
+
class TitleField(forms.CharField):
def __init__(self, *args, **kwargs):
super(TitleField, self).__init__(*args, **kwargs)
@@ -41,7 +42,6 @@ class EditorField(forms.CharField):
def clean(self, value):
if len(value) < 10:
raise forms.ValidationError(_('question content must be > 10 characters'))
-
return value
class TagNamesField(forms.CharField):
@@ -185,6 +185,7 @@ class EditQuestionForm(forms.Form):
tags = TagNamesField()
summary = SummaryField()
+ #todo: this is odd that this form takes question as an argument
def __init__(self, question, revision, *args, **kwargs):
super(EditQuestionForm, self).__init__(*args, **kwargs)
self.fields['title'].initial = revision.title
@@ -305,10 +306,11 @@ class EditUserEmailFeedsForm(forms.Form):
return self
def reset(self):
- self.cleaned_data['all_questions'] = 'n'
- self.cleaned_data['asked_by_me'] = 'n'
- self.cleaned_data['answered_by_me'] = 'n'
- self.cleaned_data['individually_selected'] = 'n'
+ if self.is_bound:
+ self.cleaned_data['all_questions'] = 'n'
+ self.cleaned_data['asked_by_me'] = 'n'
+ self.cleaned_data['answered_by_me'] = 'n'
+ self.cleaned_data['individually_selected'] = 'n'
self.initial = self.NO_EMAIL_INITIAL
return self
diff --git a/forum/management/commands/once_award_badges.py b/forum/management/commands/once_award_badges.py
index 8c913348..372eb3aa 100755
--- a/forum/management/commands/once_award_badges.py
+++ b/forum/management/commands/once_award_badges.py
@@ -337,7 +337,7 @@ class Command(BaseCommand):
if user_id not in awarded_users:
user = get_object_or_404(User, id=user_id)
- award = Award(user=user, badge=badge)
+ award = Award(user=user, badge=badge)#todo: will this work with content_object null?
award.save()
awarded_users.append(user_id)
finally:
diff --git a/forum/models/__init__.py b/forum/models/__init__.py
index 01086213..f4850025 100755
--- a/forum/models/__init__.py
+++ b/forum/models/__init__.py
@@ -7,6 +7,20 @@ from repute import Badge, Award, Repute
import re
from base import *
+import datetime
+from django.contrib.contenttypes.models import ContentType
+
+#todo: move to a separate file?
+# custom signals
+tags_updated = django.dispatch.Signal(providing_args=["question"])
+edit_question_or_answer = django.dispatch.Signal(providing_args=["instance", "modified_by"])
+delete_post_or_answer = django.dispatch.Signal(providing_args=["instance", "deleted_by"])
+mark_offensive = django.dispatch.Signal(providing_args=["instance", "mark_by"])
+user_updated = django.dispatch.Signal(providing_args=["instance", "updated_by"])
+user_logged_in = django.dispatch.Signal(providing_args=["session"])
+
+#todo: must go after signals
+from forum import auth
# User extend properties
QUESTIONS_PER_PAGE_CHOICES = (
@@ -73,15 +87,6 @@ User.add_to_class('tag_filter_setting',
)
User.add_to_class('get_absolute_url', user_get_absolute_url)
-# custom signal
-tags_updated = django.dispatch.Signal(providing_args=["question"])
-edit_question_or_answer = django.dispatch.Signal(providing_args=["instance", "modified_by"])
-delete_post_or_answer = django.dispatch.Signal(providing_args=["instance", "deleted_by"])
-mark_offensive = django.dispatch.Signal(providing_args=["instance", "mark_by"])
-user_updated = django.dispatch.Signal(providing_args=["instance", "updated_by"])
-user_logged_in = django.dispatch.Signal(providing_args=["session"])
-
-
def get_messages(self):
messages = []
for m in self.message_set.all():
@@ -100,6 +105,126 @@ def get_profile_link(self):
logging.debug('in get profile link %s' % profile_link)
return mark_safe(profile_link)
+#series of methods for user vote-type commands
+#same call signature func(self, post, timestamp=None, cancel=None)
+#note that none of these have business logic checks internally
+#these functions are used by the forum app and
+#by the data importer jobs from say stackexchange, where internal rules
+#may be different
+#maybe if we do use business rule checks here - we should add
+#some flag allowing to bypass them for things like the data importers
+def toggle_favorite_question(self, question, timestamp=None, cancel=False):
+ """cancel has no effect here, but is important for the SE loader
+ it is hoped that toggle will work and data will be consistent
+ but there is no guarantee, maybe it's better to be more strict
+ about processing the "cancel" option
+ another strange thing is that this function unlike others below
+ returns a value
+ """
+ try:
+ fave = FavoriteQuestion.objects.get(question=question, user=self)
+ fave.delete()
+ result = False
+ except FavoriteQuestion.DoesNotExist:
+ fave = FavoriteQuestion(
+ question = question,
+ user = self,
+ added_at = timestamp,
+ )
+ fave.save()
+ result = True
+ Question.objects.update_favorite_count(question)
+ return result
+
+#"private" wrapper function that applies post upvotes/downvotes and cancelations
+def _process_vote(user, post, timestamp=None, cancel=False, vote_type=None):
+ post_type = ContentType.objects.get_for_model(post)
+ #get or create the vote object
+ #return with noop in some situations
+ try:
+ vote = Vote.objects.get(
+ user = user,
+ content_type = post_type,
+ object_id = post.id,
+ )
+ except Vote.DoesNotExist:
+ vote = None
+ if cancel:
+ if vote == None:
+ return
+ elif vote.is_opposite(vote_type):
+ return
+ else:
+ #we would call vote.delete() here
+ #but for now all that is handled by the
+ #legacy forum.auth functions
+ #vote.delete()
+ pass
+ else:
+ if vote == None:
+ vote = Vote(
+ user = user,
+ content_object = post,
+ vote = vote_type,
+ voted_at = timestamp,
+ )
+ elif vote.is_opposite(vote_type):
+ vote.vote = vote_type
+ else:
+ return
+
+ #do the actual work
+ if vote_type == Vote.VOTE_UP:
+ if cancel:
+ auth.onUpVotedCanceled(vote, post, user, timestamp)
+ else:
+ auth.onUpVoted(vote, post, user, timestamp)
+ elif vote_type == Vote.VOTE_DOWN:
+ if cancel:
+ auth.onDownVotedCanceled(vote, post, user, timestamp)
+ else:
+ auth.onDonwVoted(vote, post, user, timestamp)
+
+def upvote(self, post, timestamp=None, cancel=False):
+ _process_vote(
+ self,post,
+ timestamp=timestamp,
+ cancel=cancel,
+ vote_type=Vote.VOTE_UP
+ )
+
+def downvote(self, post, timestamp=None, cancel=False):
+ _process_vote(
+ self,post,
+ timestamp=timestamp,
+ cancel=cancel,
+ vote_type=Vote.VOTE_DOWN
+ )
+
+def accept_answer(self, answer, timestamp=None, cancel=False):
+ if cancel:
+ auth.onAnswerAcceptCanceled(answer, self, timestamp=timestamp)
+ else:
+ auth.onAnswerAccept(answer, self, timestamp=timestamp)
+
+def flag_post(self, post, timestamp=None, cancel=False):
+ if cancel:#todo: can't unflag?
+ return
+ if post.flagged_items.filter(user=user).count() > 0:
+ return
+ else:
+ flag = FlaggedItem(
+ user = self,
+ content_object = post,
+ flagged_at = timestamp,
+ )
+ auth.onFlaggedItem(flag, post, user, timestamp=timestamp)
+
+User.add_to_class('toggle_favorite_question', toggle_favorite_question)
+User.add_to_class('upvote', upvote)
+User.add_to_class('downvote', downvote)
+User.add_to_class('accept_answer', accept_answer)
+User.add_to_class('flag_post', flag_post)
User.add_to_class('get_profile_url', get_profile_url)
User.add_to_class('get_profile_link', get_profile_link)
User.add_to_class('get_messages', get_messages)
@@ -122,9 +247,7 @@ def record_answer_event(instance, created, **kwargs):
if created:
q_author = instance.question.author
found_match = False
- #print 'going through %d messages' % q_author.message_set.all().count()
for m in q_author.message_set.all():
- #print m.message
match = record_answer_event_re.search(m.message)
if match:
found_match = True
@@ -134,15 +257,11 @@ def record_answer_event(instance, created, **kwargs):
cnt = 1
m.message = u"You have received %d <a href=\"%s?sort=responses\">new responses</a>."\
% (cnt+1, q_author.get_profile_url())
- #print 'updated message'
- #print m.message
m.save()
break
if not found_match:
msg = u"You have received a <a href=\"%s?sort=responses\">new response</a>."\
% q_author.get_profile_url()
- #print 'new message'
- #print msg
q_author.message_set.create(message=msg)
activity = Activity(user=instance.author, \
@@ -370,7 +489,7 @@ __all__ = [
'ValidationHash',
'AuthKeyUserAssociation',
- 'User'
+ 'User',
]
diff --git a/forum/models/answer.py b/forum/models/answer.py
index 8f44108f..3fd08d98 100755
--- a/forum/models/answer.py
+++ b/forum/models/answer.py
@@ -1,4 +1,11 @@
from base import *
+#todo: take care of copy-past markdowner stuff maybe make html automatic field?
+from forum.const import CONST
+from markdown2 import Markdown
+from django.utils.html import strip_tags
+from forum.utils.html import sanitize_html
+import datetime
+markdowner = Markdown(html4tags=True)
from question import Question
@@ -9,7 +16,7 @@ class AnswerManager(models.Manager):
author = author,
added_at = added_at,
wiki = wiki,
- html = text
+ html = sanitize_html(markdowner.convert(text)),
)
if answer.wiki:
answer.last_edited_by = answer.author
@@ -18,21 +25,19 @@ class AnswerManager(models.Manager):
answer.save()
+ answer.add_revision(
+ revised_by=author,
+ revised_at=added_at,
+ text=text,
+ comment=CONST['default_version'],
+ )
+
#update question data
question.last_activity_at = added_at
question.last_activity_by = author
question.save()
Question.objects.update_answer_count(question)
- AnswerRevision.objects.create(
- answer = answer,
- revision = 1,
- author = author,
- revised_at = added_at,
- summary = CONST['default_version'],
- text = text
- )
-
#set notification/delete
if email_notify:
if author not in question.followed_by.all():
@@ -43,6 +48,7 @@ class AnswerManager(models.Manager):
question.followed_by.remove(author)
except:
pass
+ return answer
#GET_ANSWERS_FROM_USER_QUESTIONS = u'SELECT answer.* FROM answer INNER JOIN question ON answer.question_id = question.id WHERE question.author_id =%s AND answer.author_id <> %s'
def get_answers_from_question(self, question, user=None):
@@ -76,6 +82,50 @@ class Answer(Content, DeletableContent):
class Meta(Content.Meta):
db_table = u'answer'
+ def apply_edit(self, edited_at=None, edited_by=None, text=None, comment=None, wiki=False):
+
+ if text is None:
+ text = self.get_latest_revision().text
+ if edited_at is None:
+ edited_at = datetime.datetime.now()
+ if edited_by is None:
+ raise Exception('edited_by is required')
+
+ self.last_edited_at = edited_at
+ self.last_edited_by = edited_by
+ self.html = sanitize_html(markdowner.convert(text))
+ #todo: bug wiki has no effect here
+ self.save()
+
+ self.add_revision(
+ revised_by=edited_by,
+ revised_at=edited_at,
+ text=text,
+ comment=comment
+ )
+
+ self.question.last_activity_at = edited_at
+ self.question.last_activity_by = edited_by
+ self.question.save()
+
+ def add_revision(self, revised_by=None, revised_at=None, text=None, comment=None):
+ if None in (revised_by, revised_at, text):
+ raise Exception('arguments revised_by, revised_at and text are required')
+ rev_no = self.revisions.all().count() + 1
+ if comment in (None, ''):
+ if rev_no == 1:
+ comment = CONST['default_version']
+ else:
+ comment = 'No.%s Revision' % rev_no
+ return AnswerRevision.objects.create(
+ answer=self,
+ author=revised_by,
+ revised_at=revised_at,
+ text=text,
+ summary=comment,
+ revision=rev_no
+ )
+
def get_user_vote(self, user):
if user.__class__.__name__ == "AnonymousUser":
return None
diff --git a/forum/models/base.py b/forum/models/base.py
index 2a8f3df9..fb66ff1b 100755
--- a/forum/models/base.py
+++ b/forum/models/base.py
@@ -127,6 +127,18 @@ class Content(models.Model):
comments = self.comments.all().order_by('id')
return comments
+ def add_comment(self, comment=None, user=None, added_at=None):
+ if added_at is None:
+ added_at = datetime.datetime.now()
+ if None in (comment ,user):
+ raise Exception('arguments comment and user are required')
+
+ Comment = models.get_model('forum','Comment')#todo: forum hardcoded
+ comment = Comment(content_object=self, comment=comment, user=user, added_at=added_at)
+ comment.save()
+ self.comment_count = self.comment_count + 1
+ self.save()
+
def post_get_last_update_info(self):
when = self.added_at
who = self.author
@@ -139,4 +151,4 @@ class Content(models.Model):
if c.added_at > when:
when = c.added_at
who = c.user
- return when, who \ No newline at end of file
+ return when, who
diff --git a/forum/models/meta.py b/forum/models/meta.py
index af4a9f3d..114d2130 100755
--- a/forum/models/meta.py
+++ b/forum/models/meta.py
@@ -48,6 +48,10 @@ class Vote(MetaContent, UserContent):
def is_downvote(self):
return self.vote == self.VOTE_DOWN
+ def is_opposite(self, vote_type):
+ assert(vote_type in (self.VOTE_UP, self.VOTE_DOWN))
+ return self.vote != vote_type
+
class FlaggedItemManager(models.Manager):
def get_flagged_items_count_today(self, user):
diff --git a/forum/models/question.py b/forum/models/question.py
index f351dff0..280c87c0 100755
--- a/forum/models/question.py
+++ b/forum/models/question.py
@@ -1,10 +1,18 @@
from base import *
from tag import Tag
+from forum.const import CONST
+from forum.utils.html import sanitize_html
+from markdown2 import Markdown
+from django.utils.html import strip_tags
+import datetime
+markdowner = Markdown(html4tags=True)
from forum.utils.lists import LazyList
class QuestionManager(models.Manager):
- def create_new(self, title=None,author=None,added_at=None, wiki=False,tagnames=None,summary=None, text=None):
+ def create_new(cls, title=None,author=None,added_at=None, wiki=False,tagnames=None, text=None):
+ html = sanitize_html(markdowner.convert(text))
+ summary = strip_tags(html)[:120]
question = Question(
title = title,
author = author,
@@ -13,7 +21,7 @@ class QuestionManager(models.Manager):
last_activity_by = author,
wiki = wiki,
tagnames = tagnames,
- html = text,
+ html = html,
summary = summary
)
if question.wiki:
@@ -23,16 +31,11 @@ class QuestionManager(models.Manager):
question.save()
- # create the first revision
- QuestionRevision.objects.create(
- question = question,
- revision = 1,
- title = question.title,
- author = author,
- revised_at = added_at,
- tagnames = question.tagnames,
- summary = CONST['default_version'],
- text = text
+ question.add_revision(
+ author=author,
+ text=text,
+ comment=CONST['default_version'],
+ revised_at=added_at,
)
return question
@@ -69,6 +72,9 @@ class QuestionManager(models.Manager):
return False
+ #todo: why not make this into a method of class Question?
+ # also it is actually strange - why do we need the answer_count
+ # field if the count depends on who is requesting this?
def update_answer_count(self, question):
"""
Executes an UPDATE query to update denormalised data with the
@@ -153,6 +159,105 @@ class Question(Content, DeletableContent):
except Exception:
logging.debug('problem pinging google did you register you sitemap with google?')
+ def retag(self, retagged_by=None, retagged_at=None, tagnames=None):
+ if None in (retagged_by, retagged_at, tagnames):
+ raise Exception('arguments retagged_at, retagged_by and tagnames are required')
+ # Update the Question itself
+ self.tagnames = tagnames
+ self.last_edited_at = retagged_at
+ self.last_activity_at = retagged_at
+ self.last_edited_by = retagged_by
+ self.last_activity_by = retagged_by
+
+ # Update the Question's tag associations
+ tags_updated = self.objects.update_tags(self,
+ form.cleaned_data['tags'], request.user)
+
+ # Create a new revision
+ latest_revision = self.get_latest_revision()
+ QuestionRevision.objects.create(
+ question = self,
+ title = latest_revision.title,
+ author = retagged_by,
+ revised_at = retagged_at,
+ tagnames = tagnames,
+ summary = CONST['retagged'],
+ text = latest_revision.text
+ )
+ # send tags updated singal
+ tags_updated.send(sender=question.__class__, question=self)
+
+ def apply_edit(self, edited_at=None, edited_by=None, title=None,\
+ text=None, comment=None, tags=None, wiki=False):
+
+ latest_revision = self.get_latest_revision()
+ #a hack to allow partial edits - important for SE loader
+ if title is None:
+ title = self.title
+ if text is None:
+ text = latest_revision.text
+ if tags is None:
+ tags = latest_revision.tagnames
+
+ if edited_by is None:
+ raise Exception('parameter edited_by is required')
+
+ if edited_at is None:
+ edited_at = datetime.datetime.now()
+
+ #todo: have this copy-paste in few places
+ html = sanitize_html(markdowner.convert(text))
+ question_summary = strip_tags(html)[:120]
+
+ # Update the Question itself
+ self.title = title
+ self.last_edited_at = edited_at
+ self.last_activity_at = edited_at
+ self.last_edited_by = edited_by
+ self.last_activity_by = edited_by
+ self.tagnames = tags
+ self.summary = question_summary
+ self.html = html
+
+ #wiki is an eternal trap whence there is no exit
+ if self.wiki == False and wiki == True:
+ self.wiki = True
+
+ self.save()
+
+ # Update the Question tag associations
+ if latest_revision.tagnames != tags:
+ tags_updated = Question.objects.update_tags(self, tags, edited_by)
+
+ # Create a new revision
+ self.add_revision(
+ author = edited_by,
+ text = text,
+ revised_at = edited_at,
+ comment = comment,
+ )
+
+ def add_revision(self,author=None, text=None, comment=None, revised_at=None):
+ if None in (author, text, comment):
+ raise Exception('author, text and revised_at are required arguments')
+ rev_no = self.revisions.all().count() + 1
+ if comment in (None, ''):
+ if rev_no == 1:
+ comment = CONST['default_version']
+ else:
+ comment = 'No.%s Revision' % rev_no
+
+ return QuestionRevision.objects.create(
+ question = self,
+ revision = rev_no,
+ title = self.title,
+ author = author,
+ revised_at = revised_at,
+ tagnames = self.tagnames,
+ summary = comment,
+ text = text
+ )
+
def save(self, **kwargs):
"""
Overridden to manually manage addition of tags when the object
diff --git a/forum/models/repute.py b/forum/models/repute.py
index 533f1323..5e42542f 100755
--- a/forum/models/repute.py
+++ b/forum/models/repute.py
@@ -81,24 +81,26 @@ class ReputeManager(models.Manager):
by upvoted(also substracted from upvoted canceled). This is because we need
to prohibit gaming system by upvoting/cancel again and again.
"""
- if user is not None:
- today = datetime.date.today()
- sums = self.filter(models.Q(reputation_type=1) | models.Q(reputation_type=-8),
- user=user, reputed_at__range=(today, today + datetime.timedelta(1))). \
- aggregate(models.Sum('positive'), models.Sum('negative'))
-
- positive = sums['positive__sum']
- negative = sums['negative__sum']
-
- if positive is None:
- positive = 0
-
- if negative is None:
- negative = 0
-
- return positive + negative
- else:
+ if user is None:
return 0
+ else:
+ today = datetime.date.today()
+ tomorrow = today + datetime.timedelta(1)
+ rep_types = (1,-8)
+ sums = self.filter(models.Q(reputation_type__in=(1,-8)),
+ user=user,
+ reputed_at__range=(today, tomorrow),
+ ).aggregate(models.Sum('positive'), models.Sum('negative'))
+ if sums:
+ pos = sums['positive__sum']
+ neg = sums['negative__sum']
+ if pos is None:
+ pos = 0
+ if neg is None:
+ neg = 0
+ return pos + neg
+ else:
+ return 0
class Repute(models.Model):
"""The reputation histories for user"""
diff --git a/forum/models/user.py b/forum/models/user.py
index 9b8936e5..6d871bf4 100755
--- a/forum/models/user.py
+++ b/forum/models/user.py
@@ -126,7 +126,7 @@ class ValidationHash(models.Model):
class AuthKeyUserAssociation(models.Model):
key = models.CharField(max_length=255,null=False,unique=True)
- provider = models.CharField(max_length=64)
+ provider = models.CharField(max_length=64)#string 'yahoo', 'google', etc.
user = models.ForeignKey(User, related_name="auth_keys")
added_at = models.DateTimeField(default=datetime.datetime.now)
diff --git a/forum/views/commands.py b/forum/views/commands.py
index 88c2c077..ca6569e2 100755
--- a/forum/views/commands.py
+++ b/forum/views/commands.py
@@ -13,7 +13,7 @@ from django.contrib.auth.decorators import login_required
from forum.utils.decorators import ajax_method, ajax_login_required
import logging
-def vote(request, id):#refactor - pretty incomprehensible view used by various ajax calls
+def vote(request, id):#todo: pretty incomprehensible view used by various ajax calls
#issues: this subroutine is too long, contains many magic numbers and other issues
#it's called "vote" but many actions processed here have nothing to do with voting
"""
@@ -106,23 +106,12 @@ def vote(request, id):#refactor - pretty incomprehensible view used by various a
# favorite
elif vote_type == '4':
has_favorited = False
- fav_questions = FavoriteQuestion.objects.filter(question=question)
- # if the same question has been favorited before, then delete it
- if fav_questions is not None:
- for item in fav_questions:
- if item.user == request.user:
- item.delete()
- response_data['status'] = 1
- response_data['count'] = len(fav_questions) - 1
- if response_data['count'] < 0:
- response_data['count'] = 0
- has_favorited = True
- # if above deletion has not been executed, just insert a new favorite question
- if not has_favorited:
- new_item = FavoriteQuestion(question=question, user=request.user)
- new_item.save()
- response_data['count'] = FavoriteQuestion.objects.filter(question=question).count()
- Question.objects.update_favorite_count(question)
+ fave = request.user.toggle_favorite_question(question)
+ response_data['count'] = FavoriteQuestion.objects.filter(
+ question = question
+ ).count()
+ if fave == False:
+ response_data['status'] = 1
elif vote_type in ['1', '2', '5', '6']:
post_id = id
@@ -141,7 +130,13 @@ def vote(request, id):#refactor - pretty incomprehensible view used by various a
elif not __can_vote(vote_score, request.user):
response_data['allowed'] = -2
elif post.votes.filter(user=request.user).count() > 0:
+ #todo: I think we have a bug here
+ #we need to instead select vote on that particular post
+ #not just the latest vote, although it is a good shortcut.
+ #The problem is that this vote is deleted in one of
+ #the on...Canceled() functions
vote = post.votes.filter(user=request.user)[0]
+ # get latest vote by the current user
# unvote should be less than certain time
if (datetime.datetime.now().day - vote.voted_at.day) >= auth.VOTE_RULES['scope_deny_unvote_days']:
response_data['status'] = 2
@@ -189,8 +184,6 @@ def vote(request, id):#refactor - pretty incomprehensible view used by various a
item = FlaggedItem(user=request.user, content_object=post, flagged_at=datetime.datetime.now())
auth.onFlaggedItem(item, post, request.user)
response_data['count'] = post.offensive_flag_count
- # send signal when question or answer be marked offensive
- mark_offensive.send(sender=post.__class__, instance=post, mark_by=request.user)
elif vote_type in ['9', '10']:
post = question
post_id = id
@@ -206,7 +199,6 @@ def vote(request, id):#refactor - pretty incomprehensible view used by various a
response_data['status'] = 1
else:
auth.onDeleted(post, request.user)
- delete_post_or_answer.send(sender=post.__class__, instance=post, delete_by=request.user)
elif vote_type == '11':#subscribe q updates
user = request.user
if user.is_authenticated():
diff --git a/forum/views/writers.py b/forum/views/writers.py
index 2b2461de..a9406fdc 100755
--- a/forum/views/writers.py
+++ b/forum/views/writers.py
@@ -13,8 +13,6 @@ from django.utils.translation import ugettext as _
from django.core.urlresolvers import reverse
from django.core.exceptions import PermissionDenied
-from forum.utils.html import sanitize_html
-from markdown2 import Markdown
from forum.forms import *
from forum.models import *
from forum.auth import *
@@ -34,8 +32,6 @@ QUESTIONS_PAGE_SIZE = 10
# used in answers
ANSWERS_PAGE_SIZE = 10
-markdowner = Markdown(html4tags=True)
-
def upload(request):#ajax upload file to a question or answer
class FileTypeNotAllow(Exception):
pass
@@ -94,12 +90,16 @@ def ask(request):#view used to ask a new question
if form.is_valid():
added_at = datetime.datetime.now()
+ #todo: move this to clean_title
title = strip_tags(form.cleaned_data['title'].strip())
wiki = form.cleaned_data['wiki']
+ #todo: move this to clean_tagnames
tagnames = form.cleaned_data['tags'].strip()
text = form.cleaned_data['text']
- html = sanitize_html(markdowner.convert(text))
- summary = strip_tags(html)[:120]
+
+ #todo: move this to AskForm.clean_text
+ #todo: make custom MarkDownField
+ text = form.cleaned_data['text']
if request.user.is_authenticated():
author = request.user
@@ -110,14 +110,14 @@ def ask(request):#view used to ask a new question
added_at = added_at,
wiki = wiki,
tagnames = tagnames,
- summary = summary,
- text = sanitize_html(markdowner.convert(text))
+ text = text,
)
return HttpResponseRedirect(question.get_absolute_url())
else:
request.session.flush()
session_key = request.session.session_key
+ summary = strip_tags(text)[:120]
question = AnonymousQuestion(
session_key = session_key,
title = title,
@@ -162,32 +162,11 @@ def _retag_question(request, question):#non-url subview of edit question - just
form = RetagQuestionForm(question, request.POST)
if form.is_valid():
if form.has_changed():
- latest_revision = question.get_latest_revision()
- retagged_at = datetime.datetime.now()
- # Update the Question itself
- Question.objects.filter(id=question.id).update(
- tagnames = form.cleaned_data['tags'],
- last_edited_at = retagged_at,
- last_edited_by = request.user,
- last_activity_at = retagged_at,
- last_activity_by = request.user
- )
- # Update the Question's tag associations
- tags_updated = Question.objects.update_tags(question,
- form.cleaned_data['tags'], request.user)
- # Create a new revision
- QuestionRevision.objects.create(
- question = question,
- title = latest_revision.title,
- author = request.user,
- revised_at = retagged_at,
- tagnames = form.cleaned_data['tags'],
- summary = CONST['retagged'],
- text = latest_revision.text
+ question.retag(
+ retagged_by = request.user,
+ retagged_at = datetime.datetime.now(),
+ tagnames = form.cleaned_data['tags'],
)
- # send tags updated singal
- tags_updated.send(sender=question.__class__, question=question)
-
return HttpResponseRedirect(question.get_absolute_url())
else:
form = RetagQuestionForm(question)
@@ -201,7 +180,7 @@ def _edit_question(request, question):#non-url subview of edit_question - just e
latest_revision = question.get_latest_revision()
revision_form = None
if request.method == 'POST':
- if 'select_revision' in request.POST:
+ if 'select_revision' in request.POST:#revert-type edit
# user has changed revistion number
revision_form = RevisionForm(question, latest_revision, request.POST)
if revision_form.is_valid():
@@ -211,60 +190,26 @@ def _edit_question(request, question):#non-url subview of edit_question - just e
revision=revision_form.cleaned_data['revision']))
else:
form = EditQuestionForm(question, latest_revision, request.POST)
- else:
+ else:#new content edit
# Always check modifications against the latest revision
form = EditQuestionForm(question, latest_revision, request.POST)
if form.is_valid():
- html = sanitize_html(markdowner.convert(form.cleaned_data['text']))
if form.has_changed():
edited_at = datetime.datetime.now()
- tags_changed = (latest_revision.tagnames !=
- form.cleaned_data['tags'])
- tags_updated = False
- # Update the Question itself
- updated_fields = {
- 'title': form.cleaned_data['title'],
- 'last_edited_at': edited_at,
- 'last_edited_by': request.user,
- 'last_activity_at': edited_at,
- 'last_activity_by': request.user,
- 'tagnames': form.cleaned_data['tags'],
- 'summary': strip_tags(html)[:120],
- 'html': html,
- }
-
- # only save when it's checked
- # because wiki doesn't allow to be edited if last version has been enabled already
- # and we make sure this in forms.
- if ('wiki' in form.cleaned_data and
- form.cleaned_data['wiki']):
- updated_fields['wiki'] = True
- updated_fields['wikified_at'] = edited_at
-
- Question.objects.filter(
- id=question.id).update(**updated_fields)
- # Update the Question's tag associations
- if tags_changed:
- tags_updated = Question.objects.update_tags(
- question, form.cleaned_data['tags'], request.user)
- # Create a new revision
- revision = QuestionRevision(
- question = question,
- title = form.cleaned_data['title'],
- author = request.user,
- revised_at = edited_at,
- tagnames = form.cleaned_data['tags'],
- text = form.cleaned_data['text'],
+ edited_by = request.user
+ question.apply_edit(
+ edited_at = edited_at,
+ edited_by = edited_by,
+ title = form.cleaned_data['title'],
+ text = form.cleaned_data['text'],
+ #todo: summary name clash in question and question revision
+ comment = form.cleaned_data['summary'],
+ tags = form.cleaned_data['tags'],
+ wiki = form.cleaned_data.get('wiki',False),
)
- if form.cleaned_data['summary']:
- revision.summary = form.cleaned_data['summary']
- else:
- revision.summary = 'No.%s Revision' % latest_revision.revision
- revision.save()
return HttpResponseRedirect(question.get_absolute_url())
else:
-
revision_form = RevisionForm(question, latest_revision)
form = EditQuestionForm(question, latest_revision)
return render_to_response('question_edit.html', {
@@ -297,33 +242,15 @@ def edit_answer(request, id):
else:
form = EditAnswerForm(answer, latest_revision, request.POST)
if form.is_valid():
- html = sanitize_html(markdowner.convert(form.cleaned_data['text']))
if form.has_changed():
edited_at = datetime.datetime.now()
- updated_fields = {
- 'last_edited_at': edited_at,
- 'last_edited_by': request.user,
- 'html': html,
- }
- Answer.objects.filter(id=answer.id).update(**updated_fields)
-
- revision = AnswerRevision(
- answer=answer,
- author=request.user,
- revised_at=edited_at,
- text=form.cleaned_data['text']
- )
-
- if form.cleaned_data['summary']:
- revision.summary = form.cleaned_data['summary']
- else:
- revision.summary = 'No.%s Revision' % latest_revision.revision
- revision.save()
-
- answer.question.last_activity_at = edited_at
- answer.question.last_activity_by = request.user
- answer.question.save()
-
+ answer.apply_edit(
+ edited_at = edited_at,
+ edited_by = request.user,
+ text = form.cleaned_data['text'],
+ comment = form.cleaned_data['summary'],
+ wiki = False,#todo: fix this there is no "wiki" field on "edit answer"
+ )
return HttpResponseRedirect(answer.get_absolute_url())
else:
revision_form = RevisionForm(answer, latest_revision)
@@ -349,18 +276,16 @@ def answer(request, id):#process a new answer
author=request.user,
added_at=update_time,
wiki=wiki,
- text=sanitize_html(markdowner.convert(text)),
+ text=text,
email_notify=form.cleaned_data['email_notify']
)
else:
request.session.flush()
- html = sanitize_html(markdowner.convert(text))
- summary = strip_tags(html)[:120]
anon = AnonymousAnswer(
question=question,
wiki=wiki,
text=text,
- summary=summary,
+ summary=strip_tags(text)[:120],
session_key=request.session.session_key,
ip_addr=request.META['REMOTE_ADDR'],
)
@@ -412,11 +337,10 @@ def __comments(request, obj, type):#non-view generic ajax handler to load commen
response = __generate_comments_json(obj, type, user)
elif request.method == "POST":
if auth.can_add_comments(user,obj):
- comment_data = request.POST.get('comment')
- comment = Comment(content_object=obj, comment=comment_data, user=request.user)
- comment.save()
- obj.comment_count = obj.comment_count + 1
- obj.save()
+ obj.add_comment(
+ comment = request.POST.get('comment'),
+ user = request.user,
+ )
response = __generate_comments_json(obj, type, user)
else:
response = HttpResponseForbidden(mimetype="application/json")
diff --git a/log/README.TXT b/log/README.TXT
index 54247a82..9c51276d 100755
--- a/log/README.TXT
+++ b/log/README.TXT
@@ -1 +1 @@
-this file is just a placeholder so the empty directory is not ignored by version control \ No newline at end of file
+this file is just a placeholder so the empty directory is not ignored by version control
diff --git a/settings.py b/settings.py
index a84ba4a6..5e147a4d 100644
--- a/settings.py
+++ b/settings.py
@@ -71,13 +71,7 @@ INSTALLED_APPS = [
'debug_toolbar',
#'django_evolution',
'forum',
+ #'stackexchange',#prototype of SE loader
]
AUTHENTICATION_BACKENDS = ['django.contrib.auth.backends.ModelBackend',]
-
-#from forum.settings import setup_settings
-#import settings
-#setup_settings(settings)
-
-#print INSTALLED_APPS
-
diff --git a/settings_local.py.dist b/settings_local.py.dist
index 5651fec7..2b23c010 100755
--- a/settings_local.py.dist
+++ b/settings_local.py.dist
@@ -55,6 +55,9 @@ EMAIL_HOST='osqa.net'
EMAIL_PORT='25'
EMAIL_USE_TLS=False
+#HACK - anonymous user email - for email-less users
+ANONYMOUS_USER_EMAIL = 'anonymous@osqa.net'
+
#LOCALIZATIONS
TIME_ZONE = 'America/New_York'
diff --git a/stackexchange/ANOMALIES b/stackexchange/ANOMALIES
new file mode 100644
index 00000000..05a7dbdb
--- /dev/null
+++ b/stackexchange/ANOMALIES
@@ -0,0 +1,14 @@
+* several user accounts with same email
+* users with no openid
+* users with no email (hack: gravatar set to settings.ANONYMOUS_USER_EMAIL)
+* users with no screen name
+* users with no email and no screen name (25% in homeschool)
+* tag preferences are not stored explicitly (interesting/ignored)
+ maybe they are in se.User.preferences_raw
+ but the data there is not marked up and is kind of cryptic
+* we don't have Community user. SE has one with id=-1
+ this id may break the load script
+ potential break places are anywhere where is X.get_user() call
+ issues may happen with larger data sets where activity
+ of user "Community" is somehow reflected in a way
+ that load_stackexchange does not take care of
diff --git a/stackexchange/README b/stackexchange/README
new file mode 100644
index 00000000..64d8f5fb
--- /dev/null
+++ b/stackexchange/README
@@ -0,0 +1,62 @@
+this app's function will be to:
+
+* install it's own tables (#todo: not yet automated)
+* read SE xml dump into DjangoDB (automated)
+* populate osqa database (automated)
+* remove SE tables (#todo: not done yet)
+
+Current process to load SE data into OSQA is:
+==============================================
+
+1) backup database
+
+2) unzip SE dump into dump_dir (any directory name)
+ you may want to make sure that your dump directory in .gitignore file
+ so that you don't publish it by mistake
+
+3) enable 'stackexchange' in the list of installed apps (probably aready in settings.py)
+
+4) (optional - create models.py for SE, which is included anyway) run:
+
+ #a) run in-place removal of xml namspace prefix to make parsing easier
+ perl -pi -w -e 's/xs://g' $SE_DUMP_PATH/xsd/*.xsd
+ cd stackexchange
+ python parse_models.py $SE_DUMP_PATH/xsd/*.xsd > models.py
+
+5) Install stackexchange models (as well as any other missing models)
+ python manage.py syncdb
+
+6) make sure that osqa badges are installed
+ if not, run (example for mysql):
+
+ mysql -u user -p dbname < sql_scripts/badges.sql
+
+7) load SE data:
+
+ python manage.py load_stackexchange dump_dir
+
+ if anything doesn't go right - run 'python manage.py flush' and repeat
+ steps 6 and 7
+
+NOTES:
+============
+
+Here is the load script that I used for the testing
+it assumes that SE dump has been unzipped inside the tmp directory
+
+ #!/bin/sh$
+ python manage.py flush
+ #delete all data
+ mysql -u osqa -p osqa < sql_scripts/badges.sql
+ python manage.py load_stackexchange tmp
+
+Untested parts are tagged with comments starting with
+#todo:
+
+The test set did not have all the usage cases of StackExchange represented so
+it may break with other sets.
+
+The job takes some time to run, especially
+content revisions and votes - may be optimized
+
+Some of the fringe cases are described in file stackexchange/ANOMALIES
diff --git a/stackexchange/__init__.py b/stackexchange/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/stackexchange/__init__.py
diff --git a/stackexchange/management/__init__.py b/stackexchange/management/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/stackexchange/management/__init__.py
diff --git a/stackexchange/management/commands/__init__.py b/stackexchange/management/commands/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/stackexchange/management/commands/__init__.py
diff --git a/stackexchange/management/commands/load_stackexchange.py b/stackexchange/management/commands/load_stackexchange.py
new file mode 100644
index 00000000..afe4b9ea
--- /dev/null
+++ b/stackexchange/management/commands/load_stackexchange.py
@@ -0,0 +1,804 @@
+from django.core.management.base import BaseCommand
+from django.template.defaultfilters import slugify #todo: adopt unicode-aware slugify
+#todo: http://stackoverflow.com/questions/837828/how-to-use-a-slug-in-django
+import os
+import re
+import sys
+import stackexchange.parse_models as se_parser
+from xml.etree import ElementTree as et
+from django.db import models
+import forum.models as osqa
+import stackexchange.models as se
+from forum.forms import EditUserEmailFeedsForm
+from forum.utils.html import sanitize_html
+from django.conf import settings
+from django.contrib.auth.models import Message as DjangoMessage
+from django.utils.translation import ugettext as _
+#from markdown2 import Markdown
+#markdowner = Markdown(html4tags=True)
+
+xml_read_order = (
+ 'VoteTypes','UserTypes','Users','Users2Votes',
+ 'Badges','Users2Badges','CloseReasons','FlatPages',
+ 'MessageTypes','PostHistoryTypes','PostTypes','SchemaVersion',
+ 'Settings','SystemMessages','ThemeResources','ThemeTextResources',
+ 'ThrottleBucket','UserHistoryTypes','UserHistory',
+ 'Users2Badges','VoteTypes','Users2Votes','MessageTypes',
+ 'Posts','Posts2Votes','PostHistory','PostComments',
+ 'ModeratorMessages','Messages','Comments2Votes',
+ )
+
+#association tables SE item id --> OSQA item id
+#table associations are implied
+#todo: there is an issue that these may be inconsistent with the database
+USER = {}#SE User.id --> django(OSQA) User.id
+QUESTION = {}
+ANSWER = {}
+NAMESAKE_COUNT = {}# number of times user name was used - for X.get_screen_name
+
+class X(object):#
+ """class with methods for handling some details
+ of SE --> OSQA mapping
+ """
+ badge_type_map = {'1':'gold','2':'silver','3':'bronze'}
+
+ osqa_supported_id_providers = (
+ 'google','yahoo','aol','myopenid',
+ 'flickr','technorati',
+ 'wordpress','blogger','livejournal',
+ 'claimid','vidoop','verisign',
+ 'openidurl','facebook','local',
+ 'twitter' #oauth is not on this list, b/c it has no own url
+ )
+
+ #map SE VoteType -> osqa.User vote method
+ #created methods with the same call structure in osqa.User
+ #User.<vote_method>(post, timestamp=None, cancel=False)
+ vote_actions = {
+ 'UpMod':'upvote',
+ 'DownMod':'downvote',
+ 'AcceptedByOriginator':'accept_answer',
+ 'Offensive':'flag_post',
+ 'Favorite':'toggle_favorite_question',
+ }
+
+ #these modes cannot be mixed
+ #only wiki is assumed to be mixable
+ exclusive_revision_modes = (
+ 'initial','edit','rollback','lock',
+ 'migrate','close','merge','delete',
+ )
+
+ #badges whose names don't match exactly, but
+ #present in both SE and OSQA
+ badge_exceptions = {# SE --> OSQA
+ 'Citizen Patrol':'Citizen patrol',#single #todo: why sentence case?
+ 'Strunk &amp; White':'Strunk & White',#single
+ 'Civic Duty':'Civic duty',
+ }
+
+ revision_type_map = {
+ 'Initial Title':'initial',
+ 'Initial Body':'initial',
+ 'Initial Tags':'initial',
+ 'Edit Title':'edit',
+ 'Edit Body':'edit',
+ 'Edit Tags':'edit',
+ 'Rollback Title':'rollback',
+ 'Rollback Body':'rollback',
+ 'Rollback Tags':'rollback',
+ 'Post Closed':'close',
+ 'Post Reopened':'close',
+ 'Post Deleted':'delete',
+ 'Post Undeleted':'delete',
+ 'Post Locked':'lock',
+ 'Post Unlocked':'lock',
+ 'Community Owned':'wiki',
+ 'Post Migrated':'migrate',
+ 'Question Merged':'merge',
+ }
+
+ close_reason_map = {
+ 1:1,#duplicate
+ 2:2,#off-topic
+ 3:3,#subjective and argumentative
+ 4:4,#not a real question
+ 5:7,#offensive
+ 6:6,#irrelevant or outdated question
+ 7:9,#too localized
+ 10:8,#spam
+ }
+
+ @classmethod
+ def get_message_text(cls, se_m):
+ """try to intelligently translate
+ SE message to OSQA so that it makese sense in
+ our context
+ """
+ #todo: properly translate messages
+ #todo: maybe work through more instances of messages
+ if se_m.message_type.name == 'Badge Notification':
+ return se_m.text
+ else:
+ if 'you are now an administrator' in se_m.text:
+ return _('Congratulations, you are now an Administrator')
+ elif re.search(r'^You have \d+ new',se_m.text):
+ bits = se_m.text.split('.')
+ text = bits[0]
+ if se_m.user.id == -1:
+ return None
+ url = cls.get_user(se_m.user).get_profile_url()
+ return '<a href="%s?sort=responses">%s</a>' % (url,text)
+ return None
+
+ @classmethod
+ def get_post(cls, se_post):
+ #todo: fix this hack - either in-memory id association table
+ #or use database to store these associations
+ post_type = se_post.post_type.name
+ if post_type == 'Question':
+ return osqa.Question.objects.get(id=QUESTION[se_post.id].id)
+ elif post_type == 'Answer':
+ return osqa.Answer.objects.get(id=ANSWER[se_post.id].id)
+ else:
+ raise Exception('unknown post type %s' % post_type)
+
+ @classmethod
+ def get_close_reason(cls, se_reason):
+ #todo: this is a guess - have not seen real data
+ se_reason = int(se_reason)
+ return cls.close_reason_map[se_reason]
+
+ @classmethod
+ def get_user(cls, se_user):
+ #todo: same as get_post
+ return osqa.User.objects.get(id=USER[se_user.id].id)
+
+ @classmethod
+ def get_post_revision_group_types(cls, rev_group):
+ rev_types = {}
+ for rev in rev_group:
+ rev_type = cls.get_post_revision_type(rev)
+ rev_types[rev_type] = 1
+ rev_types = rev_types.keys()
+
+ #make sure that exclusive rev modes are not mixed
+ exclusive = cls.exclusive_revision_modes
+ if len(rev_types) > 1 and all([t in exclusive for t in rev_types]):
+ tstr = ','.join(rev_types)
+ gstr = ','.join([str(rev.id) for rev in rev_group])
+ msg = 'incompatible revision types %s in PostHistory %s' % (tstr,gstr)
+ raise Exception(msg)
+ return rev_types
+
+ @classmethod
+ def clean_tags(cls, tags):
+ tags = re.subn(r'\s+',' ',tags.strip())[0]
+ bits = tags.split(' ')
+ tags = ' '.join([bit[1:-1] for bit in bits])
+ tags = re.subn(r'\xf6','-',tags)[0]
+ return tags
+
+ @classmethod
+ def get_screen_name(cls, name):
+ """always returns unique screen name
+ even if there are multiple users in SE
+ with the same exact screen name
+ """
+ if name is None:
+ name = 'anonymous'
+ name = name.strip()
+ name = re.subn(r'\s+',' ',name)[0]#remove repeating spaces
+
+ try:
+ u = osqa.User.objects.get(username = name)
+ try:
+ if u.location:
+ name += ', %s' % u.location
+ if name in NAMESAKE_COUNT:
+ NAMESAKE_COUNT[name] += 1
+ name += ' %d' % NAMESAKE_COUNT[name]
+ else:
+ NAMESAKE_COUNT[name] = 1
+ except osqa.User.DoesNotExist:
+ pass
+ except osqa.User.DoesNotExist:
+ NAMESAKE_COUNT[name] = 1
+ return name
+
+ @classmethod
+ def get_email(cls, email):#todo: fix fringe case - user did not give email!
+ if email is None:
+ return settings.ANONYMOUS_USER_EMAIL
+ else:
+ assert(email != '')
+ return email
+
+ @classmethod
+ def get_post_revision_type(cls, rev):
+ rev_name = rev.post_history_type.name
+ rev_type = cls.revision_type_map.get(rev_name, None)
+ if rev_type is None:
+ raise Exception('dont understand revision type %s' % rev)
+ return rev_type
+
+ #crude method of getting id provider name from the url
+ @classmethod
+ def get_openid_provider_name(cls, openid_url):
+ openid_str = str(openid_url)
+ bits = openid_str.split('/')
+ base_url = bits[2] #assume this is base url
+ url_bits = base_url.split('.')
+ provider_name = url_bits[-2].lower()
+ if provider_name not in cls.osqa_supported_id_providers:
+ raise Exception('could not determine login provider for %s' % openid_url)
+ return provider_name
+
+ @staticmethod
+ def blankable(input):
+ if input is None:
+ return ''
+ else:
+ return input
+
+ @classmethod
+ def parse_badge_summary(cls, badge_summary):
+ (gold,silver,bronze) = (0,0,0)
+ if badge_summary:
+ if len(badge_summary) > 3:
+ print 'warning: guessing that badge summary is comma separated'
+ print 'have %s' % badge_summary
+ bits = badge_summary.split(',')
+ else:
+ bits = [badge_summary]
+ for bit in bits:
+ m = re.search(r'^(?P<type>[1-3])=(?P<count>\d+)$', bit)
+ if not m:
+ raise Exception('could not parse badge summary: %s' % badge_summary)
+ else:
+ badge_type = cls.badge_type_map[m.groupdict()['type']]
+ locals()[badge_type] = int(m.groupdict()['count'])
+ return (gold,silver,bronze)
+
+ @classmethod
+ def get_badge_name(cls, name):
+ return cls.badge_exceptions.get(name, name)
+
+class Command(BaseCommand):
+ help = 'Loads StackExchange data from unzipped directory of XML files into the OSQA database'
+ args = 'se_dump_dir'
+
+ def handle(self, *arg, **kwarg):
+ if len(arg) < 1 or not os.path.isdir(arg[0]):
+ print 'Error: first argument must be a directory with all the SE *.xml files'
+ sys.exit(1)
+
+ self.dump_path = arg[0]
+ #read the data into SE tables
+ for xml in xml_read_order:
+ xml_path = self.get_xml_path(xml)
+ table_name = self.get_table_name(xml)
+ self.load_xml_file(xml_path, table_name)
+
+ #this is important so that when we clean up messages
+ #automatically generated by the procedures below
+ #we do not delete old messages
+ #todo: unfortunately this may need to be redone
+ #when we upgrade to django 1.2 and definitely by 1.4 when
+ #the current message system will be replaced with the
+ #django messages framework
+ self.save_osqa_message_id_list()
+
+ #transfer data into OSQA tables
+ print 'Transferring users...',
+ sys.stdout.flush()
+ self.transfer_users()
+ print 'done.'
+ print 'Transferring content edits...',
+ sys.stdout.flush()
+ self.transfer_question_and_answer_activity()
+ print 'done.'
+ print 'Transferring view counts...',
+ sys.stdout.flush()
+ self.transfer_question_view_counts()
+ print 'done.'
+ print 'Transferring comments...',
+ sys.stdout.flush()
+ self.transfer_comments()
+ print 'done.'
+ print 'Transferring badges and badge awards...',
+ sys.stdout.flush()
+ self.transfer_badges()
+ print 'done.'
+ print 'Transferring votes...',
+ sys.stdout.flush()
+ self.transfer_votes()#includes favorites, accepts and flags
+ print 'done.'
+
+ self.cleanup_messages()#delete autogenerated messages
+ self.transfer_messages()
+
+ #todo: these are not clear how to go about
+ self.transfer_update_subscriptions()
+ self.transfer_tag_preferences()
+ self.transfer_meta_pages()
+
+ def save_osqa_message_id_list(self):
+ id_list = list(DjangoMessage.objects.all().values('id'))
+ self._osqa_message_id_list = id_list
+
+ def cleanup_messages(self):
+ """deletes messages generated by the load process
+ """
+ id_list = self._osqa_message_id_list
+ mset = DjangoMessage.objects.all().exclude(id__in=id_list)
+ mset.delete()
+
+ def transfer_messages(self):
+ """transfers some messages from
+ SE to OSQA
+ """
+ for m in se.Message.objects.all():
+ if m.is_read:
+ continue
+ if m.user.id == -1:
+ continue
+ u = X.get_user(m.user)
+ text = X.get_message_text(m)
+ if text:
+ u.message_set.create(
+ message=text,
+ )
+
+ def _process_post_initial_revision_group(self, rev_group):
+
+ title = None
+ text = None
+ tags = None
+ wiki = False
+ author = USER[rev_group[0].user.id]
+ added_at = rev_group[0].creation_date
+
+ for rev in rev_group:
+ rev_type = rev.post_history_type.name
+ if rev_type == 'Initial Title':
+ title = rev.text
+ elif rev_type == 'Initial Body':
+ text = rev.text
+ elif rev_type == 'Initial Tags':
+ tags = X.clean_tags(rev.text)
+ elif rev_type == 'Community Owned':
+ wiki = True
+ else:
+ raise Exception('unexpected revision type %s' % rev_type)
+
+ post_type = rev_group[0].post.post_type.name
+ if post_type == 'Question':
+ q = osqa.Question.objects.create_new(
+ title = title,
+ author = author,
+ added_at = added_at,
+ wiki = wiki,
+ tagnames = tags,
+ text = text
+ )
+ QUESTION[rev_group[0].post.id] = q
+ elif post_type == 'Answer':
+ q = X.get_post(rev_group[0].post.parent)
+ a = osqa.Answer.objects.create_new(
+ question = q,
+ author = author,
+ added_at = added_at,
+ wiki = wiki,
+ text = text,
+ )
+ ANSWER[rev_group[0].post.id] = a
+ else:
+ post_id = rev_group[0].post.id
+ raise Exception('unknown post type %s for id=%d' % (post_type, post_id))
+
+ def _process_post_edit_revision_group(self, rev_group):
+ #question apply edit
+ (title, text, tags) = (None, None, None)
+ for rev in rev_group:
+ rev_type = rev.post_history_type.name
+ if rev_type == 'Edit Title':
+ title = rev.text
+ elif rev_type == 'Edit Body':
+ text = rev.text
+ elif rev_type == 'Edit Tags':
+ tags = X.clean_tags(rev.text)
+ elif rev_type == 'Community Owned':
+ pass
+ else:
+ raise Exception('unexpected revision type %s' % rev_type)
+
+ rev0 = rev_group[0]
+ edited_by = USER[rev0.user.id]
+ edited_at = rev0.creation_date
+ comment = ';'.join([rev.comment for rev in rev_group if rev.comment])
+ post_type = rev0.post.post_type.name
+
+ if post_type == 'Question':
+ q = X.get_post(rev0.post)
+ q.apply_edit(
+ edited_at = edited_at,
+ edited_by = edited_by,
+ title = title,
+ text = text,
+ comment = comment,
+ tags = tags,
+ )
+ elif post_type == 'Answer':
+ a = X.get_post(rev0.post)
+ a.apply_edit(
+ edited_at = edited_at,
+ edited_by = edited_by,
+ text = text,
+ comment = comment,
+ )
+
+ def _make_post_wiki(self, rev_group):
+ #todo: untested
+ for rev in rev_group:
+ if rev.post_history_type.name == 'Community Owned':
+ p = X.get_post(rev.post)
+ u = X.get_user(rev.user)
+ t = rev.creation_date
+ p.wiki = True
+ p.wikified_at = t
+ p.wikified_by = u
+ self.mark_activity(p,u,t)
+ p.save()
+ return
+
+ def mark_activity(self,p,u,t):
+ """p,u,t - post, user, timestamp
+ """
+ if isinstance(p, osqa.Question):
+ p.last_activity_by = u
+ p.last_activity_at = t
+ elif isinstance(p, osqa.Answer):
+ p.question.last_activity_by = u
+ p.question.last_activity_at = t
+ p.question.save()
+
+ def _process_post_rollback_revision_group(self, rev_group):
+ #todo: don't know what to do here as there were no
+ #such data available
+ pass
+
+ def _process_post_lock_revision_group(self, rev_group):
+ #todo: untested
+ for rev in rev_group:
+ rev_type = rev.post_history_type.name
+ if rev_type.endswith('ocked'):
+ t = rev.creation_date
+ u = X.get_user(rev.user)
+ p = X.get_post(rev.post)
+ if rev_type == 'Post Locked':
+ p.locked = True
+ p.locked_by = u
+ p.locked_at = t
+ elif rev_type == 'Post Unlocked':
+ p.locked = False
+ p.locked_by = None
+ p.locked_at = None
+ else:
+ return
+ self.mark_activity(p,u,t)
+ p.save()
+ return
+
+ def _process_post_close_revision_group(self, rev_group):
+ #todo: untested
+ for rev in rev_group:
+ if rev.post.post_type.name != 'Question':
+ return
+ rev_type = rev.post_history_type.name
+ if rev_type in ('Post Closed', 'Post Reopened'):
+ t = rev.creation_date
+ u = X.get_user(rev.user)
+ p = X.get_post(rev.post)
+ if rev_type == 'Post Closed':
+ p.closed = True
+ p.closed_at = t
+ p.closed_by = u
+ p.close_reason = X.get_close_reason(rev.text)
+ elif rev_type == 'Post Reopened':
+ p.closed = False
+ p.closed_at = None
+ p.closed_by = None
+ p.close_reason = None
+ self.mark_activity(p,u,t)
+ p.save()
+ return
+
+ def _process_post_delete_revision_group(self, rev_group):
+ #todo: untested
+ for rev in rev_group:
+ rev_type = rev.post_history_type.name
+ if rev_type.endswith('eleted'):
+ t = rev.creation_date
+ u = X.get_user(rev.user)
+ p = X.get_post(rev.post)
+ if rev_type == 'Post Deleted':
+ p.deleted = True
+ p.deleted_at = t
+ p.deleted_by = u
+ elif rev_type == 'Post Undeleted':
+ p.deleted = False
+ p.deleted_at = None
+ p.deleted_by = None
+ self.mark_activity(p,u,t)
+ p.save()
+ return
+
+ def _process_post_revision_group(self, rev_group):
+ #determine revision type
+ #'initial','edit','rollback','lock',
+ #'migrate','close','merge','delete',
+ rev_types = X.get_post_revision_group_types(rev_group)
+ if 'initial' in rev_types:
+ self._process_post_initial_revision_group(rev_group)
+ elif 'edit' in rev_types:
+ self._process_post_edit_revision_group(rev_group)
+ elif 'rollback' in rev_types:
+ self._process_post_rollback_revision_group(rev_group)
+ elif 'lock' in rev_types:
+ self._process_post_lock_revision_group(rev_group)
+ elif 'close' in rev_types:
+ self._process_post_close_revision_group(rev_group)
+ elif 'delete' in rev_types:
+ self._process_post_delete_revision_group(rev_group)
+ else:
+ pass
+ #todo: rollback, lock, close and delete are
+ #not tested
+ #merge and migrate actions are ignored
+ #wiki is mixable with other groups, so process it in addition
+ if 'wiki' in rev_types:
+ self._make_post_wiki(rev_group)
+
+ def transfer_tag_preferences(self):
+ #todo: figure out where these are stored in SE
+ #maybe in se.User.preferences_raw?
+ pass
+
+ def transfer_question_and_answer_activity(self):
+ """transfers all question and answer
+ edits and related status changes
+ """
+ #assuming that there are only two post types
+ se_revs = se.PostHistory.objects.all()
+ #assuming that chronologial order is correct and there
+ #will be no problems of data integrity upon insertion of records
+ se_revs = se_revs.order_by('creation_date','revision_guid')
+ #todo: ignored fringe case - no revisions
+ c_guid = se_revs[0].revision_guid
+ c_group = []
+ #this loop groups revisions by revision id, then calls process function
+ #for the revision grup (elementary revisions posted at once)
+ for se_rev in se_revs:
+ if se_rev.revision_guid == c_guid:
+ c_group.append(se_rev)
+ else:
+ self._process_post_revision_group(c_group)
+ c_group = []
+ c_group.append(se_rev)
+ c_guid = se_rev.revision_guid
+ if len(c_group) != 0:
+ self._process_post_revision_group(c_group)
+
+ def transfer_comments(self):
+ for se_c in se.PostComment.objects.all():
+ if se_c.deletion_date:
+ print 'Warning deleted comment %d dropped' % se_c.id
+ continue
+ se_post = se_c.post
+ if se_post.post_type.name == 'Question':
+ osqa_post = QUESTION[se_post.id]
+ elif se_post.post_type.name == 'Answer':
+ osqa_post = ANSWER[se_post.id]
+
+ osqa_post.add_comment(
+ comment = se_c.text,
+ added_at = se_c.creation_date,
+ user = USER[se_c.user.id]
+ )
+
+ def _install_missing_badges(self):
+ self._missing_badges = {}
+ for se_b in se.Badge.objects.all():
+ name = X.get_badge_name(se_b.name)
+ try:
+ osqa.Badge.objects.get(name=name)
+ except:
+ self._missing_badges[name] = 0
+ if len(se_b.description) > 300:
+ print 'Warning truncated description for badge %d' % se_b.id
+ osqa.Badge.objects.create(
+ name = name,
+ slug = slugify(name),
+ description = se_b.description,
+ multiple = (not se_b.single),
+ type = se_b.class_type
+ )
+
+ def _award_badges(self):
+ #note: SE does not keep information on
+ #content-related badges like osqa does
+ for se_a in se.User2Badge.objects.all():
+ if se_a.user.id == -1:
+ continue #skip community user
+ u = USER[se_a.user.id]
+ badge_name = X.get_badge_name(se_a.badge.name)
+ b = osqa.Badge.objects.get(name=badge_name)
+ if b.multiple == False:
+ if b.award_badge.count() > 0:
+ continue
+ #todo: fake content object here b/c SE does not support this
+ #todo: but osqa requires related content object
+ osqa.Award.objects.create(
+ user=u,
+ badge=b,
+ awarded_at=se_a.date,
+ content_object=u,
+ )
+ if b.name in self._missing_badges:
+ self._missing_badges[b.name] += 1
+
+ def _cleanup_badges(self):
+ d = self._missing_badges
+ unused = [name for name in d.keys() if d[name] == 0]
+ osqa.Badge.objects.filter(name__in=unused).delete()
+ installed = [name for name in d.keys() if d[name] > 0]
+ print 'Warning - following unsupported badges were installed:'
+ print ', '.join(installed)
+
+ def transfer_badges(self):
+ #note: badge level is neglected
+ #1) install missing badges
+ self._install_missing_badges()
+ #2) award badges
+ self._award_badges()
+ #3) delete unused newly installed badges
+ self._cleanup_badges()
+ pass
+
+ def transfer_question_view_counts(self):
+ for se_q in se.Post.objects.filter(post_type__name='Question'):
+ q = X.get_post(se_q)
+ q.view_count = se_q.view_count
+ q.save()
+
+
+ def transfer_votes(self):
+ for v in se.Post2Vote.objects.all():
+ vote_type = v.vote_type.name
+ if not vote_type in X.vote_actions:
+ continue
+
+ u = X.get_user(v.user)
+ p = X.get_post(v.post)
+ m = X.vote_actions[vote_type]
+ vote_method = getattr(osqa.User, m)
+ vote_method(u, p, timestamp = v.creation_date)
+ if v.deletion_date:
+ vote_method(u, p, timestamp = v.deletion_date, cancel=True)
+
+ def transfer_update_subscriptions(self):
+ #todo: not clear where this is stored in SE
+ #maybe in se.User.preferences_raw?
+ pass
+
+ def transfer_meta_pages(self):
+ #here we actually don't have anything in the database yet
+ #so we can't do this
+ pass
+
+ def load_xml_file(self, xml_path, table_name):
+ tree = et.parse(xml_path)
+ print 'loading from %s to %s' % (xml_path, table_name) ,
+ model = models.get_model('stackexchange', table_name)
+ i = 0
+ for row in tree.findall('.//row'):
+ model_entry = model()
+ i += 1
+ for col in row.getchildren():
+ field_name = se_parser.parse_field_name(col.tag)
+ field_type = model._meta.get_field(field_name)
+ field_value = se_parser.parse_value(col.text, field_type)
+ setattr(model_entry, field_name, field_value)
+ model_entry.save()
+ print '... %d objects saved' % i
+
+ def get_table_name(self,xml):
+ return se_parser.get_table_name(xml)
+
+ def get_xml_path(self, xml):
+ xml_path = os.path.join(self.dump_path, xml + '.xml')
+ if not os.path.isfile(xml_path):
+ print 'Error: file %s not found' % xml_path
+ sys.exit(1)
+ return xml_path
+
+ def transfer_users(self):
+ for se_u in se.User.objects.all():
+ if se_u.id < 1:#skip the Community user
+ continue
+ u = osqa.User()
+ u_type = se_u.user_type.name
+ if u_type == 'Administrator':
+ u.is_superuser = True
+ elif u_type == 'Moderator':
+ u.is_staff = True
+ elif u_type not in ('Unregistered', 'Registered'):
+ raise Exception('unknown user type %s' % u_type)
+
+ #if user is not registered, no association record created
+ #we do not allow posting by users who are not authenticated
+ #probably they'll just have to "recover" their account by email
+ if u_type != 'Unregistered':
+ assert(se_u.open_id)#everybody must have open_id
+ u_auth = osqa.AuthKeyUserAssociation()
+ u_auth.key = se_u.open_id
+ u_auth.provider = X.get_openid_provider_name(se_u.open_id)
+ u_auth.added_at = se_u.creation_date
+
+ if se_u.open_id is None and se_u.email is None:
+ print 'Warning: SE user %d is not recoverable (no email or openid)'
+
+ u.reputation = 1#se_u.reputation, it's actually re-computed
+ u.last_seen = se_u.last_access_date
+ u.email = X.get_email(se_u.email)
+ u.location = X.blankable(se_u.location)
+ u.date_of_birth = se_u.birthday #dattime -> date
+ u.website = X.blankable(se_u.website_url)
+ u.about = X.blankable(se_u.about_me)
+ u.last_login = se_u.last_login_date
+ u.date_joined = se_u.creation_date
+ u.is_active = True #todo: this may not be the case
+
+ u.username = X.get_screen_name(se_u.display_name)
+ u.real_name = X.blankable(se_u.real_name)
+
+ (gold,silver,bronze) = X.parse_badge_summary(se_u.badge_summary)
+ u.gold = gold
+ u.silver = silver
+ u.bronze = bronze
+
+ #todo: we don't have these fields
+ #views - number of profile views?
+ #has_replies
+ #has_message
+ #opt_in_recruit
+ #last_login_ip
+ #open_id_alt - ??
+ #preferences_raw - not clear how to use
+ #display_name_cleaned - lowercased, srtipped name
+ #timed_penalty_date
+ #phone
+
+ #don't know how to handle these - there was no usage example
+ #password_id
+ #guid
+
+ #ignored
+ #last_email_date - this translates directly to EmailFeedSetting.reported_at
+
+ #save the data
+ u.save()
+ form = EditUserEmailFeedsForm()
+ form.reset()
+ if se_u.opt_in_email == True:#set up daily subscription on "own" items
+ form.initial['individually_selected'] = 'd'
+ form.initial['asked_by_me'] = 'd'
+ form.initial['answered_by_me'] = 'd'
+ #
+ form.save(user=u, save_unbound=True)
+
+ if 'u_auth' in locals():
+ u_auth.user = u
+ u_auth.save()
+ USER[se_u.id] = u
diff --git a/stackexchange/models.py b/stackexchange/models.py
new file mode 100644
index 00000000..a30a9859
--- /dev/null
+++ b/stackexchange/models.py
@@ -0,0 +1,266 @@
+from django.db import models
+class Badge(models.Model):
+ id = models.IntegerField(primary_key=True)
+ class_type = models.IntegerField(null=True)
+ name = models.CharField(max_length=50, null=True)
+ description = models.TextField(null=True)
+ single = models.NullBooleanField(null=True)
+ secret = models.NullBooleanField(null=True)
+ tag_based = models.NullBooleanField(null=True)
+ command = models.TextField(null=True)
+ award_frequency = models.IntegerField(null=True)
+
+class CloseReason(models.Model):
+ id = models.IntegerField(primary_key=True)
+ name = models.CharField(max_length=200, null=True)
+ description = models.CharField(max_length=256, null=True)
+ display_order = models.IntegerField(null=True)
+
+class Comment2Vote(models.Model):
+ id = models.IntegerField(primary_key=True)
+ post_comment = models.ForeignKey('PostComment', related_name='Comment2Vote_by_post_comment_set', null=True)
+ vote_type = models.ForeignKey('VoteType', related_name='Comment2Vote_by_vote_type_set', null=True)
+ creation_date = models.DateTimeField(null=True)
+ user = models.ForeignKey('User', related_name='Comment2Vote_by_user_set', null=True)
+ ip_address = models.CharField(max_length=40, null=True)
+ user_display_name = models.CharField(max_length=40, null=True)
+ deletion_date = models.DateTimeField(null=True)
+
+class FlatPage(models.Model):
+ id = models.IntegerField(primary_key=True)
+ name = models.CharField(max_length=50, null=True)
+ url = models.CharField(max_length=128, null=True)
+ value = models.TextField(null=True)
+ content_type = models.CharField(max_length=50, null=True)
+ active = models.NullBooleanField(null=True)
+ use_master = models.NullBooleanField(null=True)
+
+class Message(models.Model):
+ id = models.IntegerField(primary_key=True)
+ user = models.ForeignKey('User', related_name='Message_by_user_set', null=True)
+ message_type = models.ForeignKey('MessageType', related_name='Message_by_message_type_set', null=True)
+ is_read = models.NullBooleanField(null=True)
+ creation_date = models.DateTimeField(null=True)
+ text = models.TextField(null=True)
+ post = models.ForeignKey('Post', related_name='Message_by_post_set', null=True)
+
+class MessageType(models.Model):
+ id = models.IntegerField(primary_key=True)
+ name = models.CharField(max_length=50, null=True)
+ description = models.CharField(max_length=300, null=True)
+
+class ModeratorMessage(models.Model):
+ id = models.IntegerField(primary_key=True)
+ message_type = models.ForeignKey('MessageType', related_name='ModeratorMessage_by_message_type_set', null=True)
+ creation_date = models.DateTimeField(null=True)
+ creation_ip_address = models.CharField(max_length=40, null=True)
+ text = models.TextField(null=True)
+ user = models.ForeignKey('User', related_name='ModeratorMessage_by_user_set', null=True)
+ post = models.ForeignKey('Post', related_name='ModeratorMessage_by_post_set', null=True)
+ deletion_date = models.DateTimeField(null=True)
+ deletion_user = models.ForeignKey('User', related_name='ModeratorMessage_by_deletion_user_set', null=True)
+ deletion_ip_address = models.CharField(max_length=40, null=True)
+ user_display_name = models.CharField(max_length=40, null=True)
+
+class PostComment(models.Model):
+ id = models.IntegerField(primary_key=True)
+ post = models.ForeignKey('Post', related_name='PostComment_by_post_set', null=True)
+ text = models.TextField(null=True)
+ creation_date = models.DateTimeField(null=True)
+ ip_address = models.CharField(max_length=15, null=True)
+ user = models.ForeignKey('User', related_name='PostComment_by_user_set', null=True)
+ user_display_name = models.CharField(max_length=30, null=True)
+ deletion_date = models.DateTimeField(null=True)
+ deletion_user = models.ForeignKey('User', related_name='PostComment_by_deletion_user_set', null=True)
+ score = models.IntegerField(null=True)
+
+class PostHistoryType(models.Model):
+ id = models.IntegerField(primary_key=True)
+ name = models.CharField(max_length=50, null=True)
+ description = models.CharField(max_length=300, null=True)
+
+class PostHistory(models.Model):
+ id = models.IntegerField(primary_key=True)
+ post_history_type = models.ForeignKey('PostHistoryType', related_name='PostHistory_by_post_history_type_set', null=True)
+ post = models.ForeignKey('Post', related_name='PostHistory_by_post_set', null=True)
+ revision_guid = models.CharField(max_length=64, null=True)
+ creation_date = models.DateTimeField(null=True)
+ ip_address = models.CharField(max_length=40, null=True)
+ user = models.ForeignKey('User', related_name='PostHistory_by_user_set', null=True)
+ comment = models.CharField(max_length=400, null=True)
+ text = models.TextField(null=True)
+ user_display_name = models.CharField(max_length=40, null=True)
+ user_email = models.CharField(max_length=100, null=True)
+ user_website_url = models.CharField(max_length=200, null=True)
+
+class Post2Vote(models.Model):
+ id = models.IntegerField(primary_key=True)
+ post = models.ForeignKey('Post', related_name='Post2Vote_by_post_set', null=True)
+ user = models.ForeignKey('User', related_name='Post2Vote_by_user_set', null=True)
+ vote_type = models.ForeignKey('VoteType', related_name='Post2Vote_by_vote_type_set', null=True)
+ creation_date = models.DateTimeField(null=True)
+ deletion_date = models.DateTimeField(null=True)
+ target_user = models.ForeignKey('User', related_name='Post2Vote_by_target_user_set', null=True)
+ target_rep_change = models.IntegerField(null=True)
+ voter_rep_change = models.IntegerField(null=True)
+ comment = models.CharField(max_length=150, null=True)
+ ip_address = models.CharField(max_length=40, null=True)
+ linked_post = models.ForeignKey('Post', related_name='Post2Vote_by_linked_post_set', null=True)
+
+class Post(models.Model):
+ id = models.IntegerField(primary_key=True)
+ post_type = models.ForeignKey('PostType', related_name='Post_by_post_type_set', null=True)
+ creation_date = models.DateTimeField(null=True)
+ score = models.IntegerField(null=True)
+ view_count = models.IntegerField(null=True)
+ body = models.TextField(null=True)
+ owner_user = models.ForeignKey('User', related_name='Post_by_owner_user_set', null=True)
+ last_editor_user = models.ForeignKey('User', related_name='Post_by_last_editor_user_set', null=True)
+ last_edit_date = models.DateTimeField(null=True)
+ last_activity_date = models.DateTimeField(null=True)
+ last_activity_user = models.ForeignKey('User', related_name='Post_by_last_activity_user_set', null=True)
+ parent = models.ForeignKey('self', related_name='Post_by_parent_set', null=True)
+ accepted_answer = models.ForeignKey('self', related_name='Post_by_accepted_answer_set', null=True)
+ title = models.CharField(max_length=250, null=True)
+ tags = models.CharField(max_length=150, null=True)
+ community_owned_date = models.DateTimeField(null=True)
+ history_summary = models.CharField(max_length=150, null=True)
+ answer_score = models.IntegerField(null=True)
+ answer_count = models.IntegerField(null=True)
+ comment_count = models.IntegerField(null=True)
+ favorite_count = models.IntegerField(null=True)
+ deletion_date = models.DateTimeField(null=True)
+ closed_date = models.DateTimeField(null=True)
+ locked_date = models.DateTimeField(null=True)
+ locked_duration = models.IntegerField(null=True)
+ owner_display_name = models.CharField(max_length=40, null=True)
+ last_editor_display_name = models.CharField(max_length=40, null=True)
+ bounty_amount = models.IntegerField(null=True)
+ bounty_closes = models.DateTimeField(null=True)
+ bounty_closed = models.DateTimeField(null=True)
+ last_owner_email_date = models.DateTimeField(null=True)
+
+class PostType(models.Model):
+ id = models.IntegerField(primary_key=True)
+ name = models.CharField(max_length=50, null=True)
+ description = models.CharField(max_length=300, null=True)
+
+class SchemaVersion(models.Model):
+ version = models.IntegerField(null=True)
+
+class Setting(models.Model):
+ id = models.IntegerField(primary_key=True)
+ key = models.CharField(max_length=256, null=True)
+ value = models.TextField(null=True)
+
+class SystemMessage(models.Model):
+ id = models.IntegerField(primary_key=True)
+ user = models.ForeignKey('User', related_name='SystemMessage_by_user_set', null=True)
+ creation_date = models.DateTimeField(null=True)
+ text = models.TextField(null=True)
+ deletion_date = models.DateTimeField(null=True)
+ deletion_user = models.ForeignKey('User', related_name='SystemMessage_by_deletion_user_set', null=True)
+
+class Tag(models.Model):
+ id = models.IntegerField(primary_key=True)
+ name = models.CharField(max_length=50, null=True)
+ count = models.IntegerField(null=True)
+ user = models.ForeignKey('User', related_name='Tag_by_user_set', null=True)
+ creation_date = models.DateTimeField(null=True)
+ is_moderator_only = models.NullBooleanField(null=True)
+ is_required = models.NullBooleanField(null=True)
+ aliases = models.CharField(max_length=200, null=True)
+
+class ThemeResource(models.Model):
+ id = models.IntegerField(primary_key=True)
+ name = models.CharField(max_length=50, null=True)
+ value = models.TextField(null=True)
+ content_type = models.CharField(max_length=50, null=True)
+ version = models.CharField(max_length=6, null=True)
+
+class ThemeTextResource(models.Model):
+ id = models.IntegerField(primary_key=True)
+ name = models.CharField(max_length=50, null=True)
+ value = models.TextField(null=True)
+ content_type = models.CharField(max_length=50, null=True)
+
+class ThrottleBucket(models.Model):
+ id = models.IntegerField(primary_key=True)
+ type = models.CharField(max_length=256, null=True)
+ ip_address = models.CharField(max_length=64, null=True)
+ tokens = models.IntegerField(null=True)
+ last_update = models.DateTimeField(null=True)
+
+class UserHistoryType(models.Model):
+ id = models.IntegerField(primary_key=True)
+ name = models.CharField(max_length=50, null=True)
+ description = models.CharField(max_length=300, null=True)
+
+class UserHistory(models.Model):
+ id = models.IntegerField(primary_key=True)
+ user_history_type = models.ForeignKey('UserHistoryType', related_name='UserHistory_by_user_history_type_set', null=True)
+ creation_date = models.DateTimeField(null=True)
+ ip_address = models.CharField(max_length=40, null=True)
+ user = models.ForeignKey('User', related_name='UserHistory_by_user_set', null=True)
+ comment = models.CharField(max_length=400, null=True)
+ user_display_name = models.CharField(max_length=40, null=True)
+ moderator_user = models.ForeignKey('User', related_name='UserHistory_by_moderator_user_set', null=True)
+ reputation = models.IntegerField(null=True)
+
+class User2Badge(models.Model):
+ id = models.IntegerField(primary_key=True)
+ user = models.ForeignKey('User', related_name='User2Badge_by_user_set', null=True)
+ badge = models.ForeignKey('Badge', related_name='User2Badge_by_badge_set', null=True)
+ date = models.DateTimeField(null=True)
+ comment = models.CharField(max_length=50, null=True)
+
+class User2Vote(models.Model):
+ id = models.IntegerField(primary_key=True)
+ user = models.ForeignKey('User', related_name='User2Vote_by_user_set', null=True)
+ vote_type = models.ForeignKey('VoteType', related_name='User2Vote_by_vote_type_set', null=True)
+ target_user = models.ForeignKey('User', related_name='User2Vote_by_target_user_set', null=True)
+ creation_date = models.DateTimeField(null=True)
+ deletion_date = models.DateTimeField(null=True)
+ ip_address = models.CharField(max_length=40, null=True)
+
+class User(models.Model):
+ id = models.IntegerField(primary_key=True)
+ user_type = models.ForeignKey('UserType', related_name='User_by_user_type_set', null=True)
+ open_id = models.CharField(max_length=200, null=True)
+ reputation = models.IntegerField(null=True)
+ views = models.IntegerField(null=True)
+ creation_date = models.DateTimeField(null=True)
+ last_access_date = models.DateTimeField(null=True)
+ has_replies = models.NullBooleanField(null=True)
+ has_message = models.NullBooleanField(null=True)
+ opt_in_email = models.NullBooleanField(null=True)
+ opt_in_recruit = models.NullBooleanField(null=True)
+ last_login_date = models.DateTimeField(null=True)
+ last_email_date = models.DateTimeField(null=True)
+ last_login_ip = models.CharField(max_length=15, null=True)
+ open_id_alt = models.CharField(max_length=200, null=True)
+ email = models.CharField(max_length=100, null=True)
+ display_name = models.CharField(max_length=40, null=True)
+ display_name_cleaned = models.CharField(max_length=40, null=True)
+ website_url = models.CharField(max_length=200, null=True)
+ real_name = models.CharField(max_length=100, null=True)
+ location = models.CharField(max_length=100, null=True)
+ birthday = models.DateTimeField(null=True)
+ badge_summary = models.CharField(max_length=50, null=True)
+ about_me = models.TextField(null=True)
+ preferences_raw = models.TextField(null=True)
+ timed_penalty_date = models.DateTimeField(null=True)
+ guid = models.CharField(max_length=64, null=True)
+ phone = models.CharField(max_length=20, null=True)
+ password_id = models.IntegerField(null=True)
+
+class UserType(models.Model):
+ id = models.IntegerField(primary_key=True)
+ name = models.CharField(max_length=50, null=True)
+ description = models.CharField(max_length=300, null=True)
+
+class VoteType(models.Model):
+ id = models.IntegerField(primary_key=True)
+ name = models.CharField(max_length=50, null=True)
+ description = models.CharField(max_length=300, null=True)
+
diff --git a/stackexchange/parse_models.py b/stackexchange/parse_models.py
new file mode 100644
index 00000000..64796e57
--- /dev/null
+++ b/stackexchange/parse_models.py
@@ -0,0 +1,225 @@
+from xml.etree import ElementTree as et
+import sys
+import re
+import os
+if __name__ != '__main__':#hack do not import models if run as script
+ from django.db import models
+from datetime import datetime
+
+table_prefix = ''#StackExchange or something, if needed
+date_time_format = '%Y-%m-%dT%H:%M:%S' #note that fractional part of second is lost
+time_re = re.compile(r'(\.[\d]+)?$')
+loader_app_name = os.path.dirname(__file__)
+
+types = {
+ 'unsignedByte':'models.IntegerField',
+ 'FK':'models.ForeignKey',
+ 'PK':'models.IntegerField',
+ 'string':'models.CharField',
+ 'text':'models.TextField',
+ 'int':'models.IntegerField',
+ 'boolean':'models.NullBooleanField',
+ 'dateTime':'models.DateTimeField',
+ 'base64Binary':'models.TextField',
+ 'double':'models.IntegerField',
+}
+
+def camel_to_python(camel):
+ """http://stackoverflow.com/questions/1175208/
+ """
+ s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', camel)
+ return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
+
+def singular(word):
+ if word.endswith('s'):
+ return word[:-1]
+ else:
+ return word
+
+def get_table_name(name):
+ """Determine db table name
+ from the basename of the .xml file
+ """
+ out = table_prefix
+ if name.find('2') == -1:
+ out += singular(name)
+ else:
+ bits = name.split('2')
+ bits = map(singular, bits)
+ out += '2'.join(bits)
+ return out
+
+class DjangoModel(object):
+ def __init__(self, name):
+ self.name = get_table_name(name)
+ self.fields = []
+ def add_field(self,field):
+ field.table = self
+ self.fields.append(field)
+ def __str__(self):
+ out = 'class %s(models.Model):\n' % self.name
+ for f in self.fields:
+ out += ' %s\n' % str(f)
+ return out
+
+class DjangoField(object):
+ def __init__(self, name, type, restriction = None):
+ self.name = camel_to_python(name)
+ if self.name == 'class':
+ self.name = 'class_type'#work around python keyword
+ self.type = type
+ self.table = None
+ self.restriction = restriction
+ self.relation = None
+
+ def __str__(self):
+ out = '%s = %s(' % (self.name, types[self.type])
+ if self.type == 'FK':
+ out += "'%s'" % self.relation
+ out += ", related_name='%s_by_%s_set'" % (self.table.name, self.name)
+ out += ', null=True'#nullable to make life easier
+ elif self.type == 'PK':
+ out += 'primary_key=True'
+ elif self.restriction != -1:
+ if self.type == 'string':
+ out += 'max_length=%s' % self.restriction
+ out += ', null=True'
+ else:
+ raise Exception('restriction (max_length) supported only for string type')
+ else:
+ out += 'null=True'
+ out += ')'
+ return out
+
+ def get_type(self):
+ return self.type
+
+class DjangoPK(DjangoField):
+ def __init__(self):
+ self.name = 'id'
+ self.type = 'PK'
+
+class DjangoFK(DjangoField):
+ def __init__(self, source_name):
+ bits = source_name.split('Id')
+ if len(bits) == 2 and bits[1] == '':
+ name = bits[0]
+ super(DjangoFK, self).__init__(name, 'FK')
+ self.set_relation(name)
+
+ def set_relation(self, name):
+ """some relations need to be mapped
+ to actual tables
+ """
+ self.relation = table_prefix
+ if name.endswith('User'):
+ self.relation += 'User'
+ elif name.endswith('Post'):
+ self.relation += 'Post'
+ elif name in ('AcceptedAnswer','Parent'):
+ self.relation = 'self' #self-referential Post model
+ else:
+ self.relation += name
+ def get_relation(self):
+ return self.relation
+
+def get_col_type(col):
+ type = col.get('type')
+ restriction = -1
+ if type == None:
+ type_e = col.find('.//simpleType/restriction')
+ type = type_e.get('base')
+ try:
+ restriction = int(type_e.getchildren()[0].get('value'))
+ except:
+ restriction = -1
+ if restriction > 400:
+ type = 'text'
+ restriction = -1
+ return type, restriction
+
+def make_field_from_xml_tree(xml_element):
+ """used by the model parser
+ here we need to be detailed about field types
+ because this defines the database schema
+ """
+ name = xml_element.get('name')
+ if name == 'LinkedVoteId':#not used
+ return None
+ if name == 'Id':
+ field = DjangoPK()
+ elif name.endswith('Id') and name not in ('OpenId','PasswordId'):
+ field = DjangoFK(name)
+ elif name.endswith('GUID'):
+ field = DjangoField(name, 'string', 64)
+ else:
+ type, restriction = get_col_type(xml_element)
+ field = DjangoField(name, type, restriction)
+ return field
+
+def parse_field_name(input):
+ """used by the data reader
+
+ The problem is that I've scattered
+ code for determination of field name over three classes:
+ DjangoField, DjangoPK and DjangoFK
+ so the function actually cretes fake field objects
+ many time over
+ """
+ if input == 'Id':
+ return DjangoPK().name
+ elif input in ('OpenId', 'PasswordId'):
+ return DjangoField(input, 'string', 7).name#happy fake field
+ elif input.endswith('Id'):
+ return DjangoFK(input).name#real FK field
+ else:
+ return DjangoField(input, 'string', 7).name#happy fake field
+
+def parse_value(input, field_object):
+ if isinstance(field_object, models.ForeignKey):
+ try:
+ id = int(input)
+ except:
+ raise Exception('non-numeric foreign key %s' % input)
+ related_model = field_object.rel.to
+ try:
+ return related_model.objects.get(id=id)
+ except related_model.DoesNotExist:
+ obj = related_model(id=id)
+ obj.save()#save fake empty object
+ return obj
+ elif isinstance(field_object, models.IntegerField):
+ try:
+ return int(input)
+ except:
+ raise Exception('expected integer, found %s' % input)
+ elif isinstance(field_object, models.CharField):
+ return input
+ elif isinstance(field_object, models.TextField):
+ return input
+ elif isinstance(field_object, models.BooleanField):
+ try:
+ return bool(input)
+ except:
+ raise Exception('boolean value expected %s found' % input)
+ elif isinstance(field_object, models.DateTimeField):
+ input = time_re.sub('', input)
+ try:
+ return datetime.strptime(input, date_time_format)
+ except:
+ raise Exception('datetime expected "%s" found' % input)
+
+print 'from django.db import models'
+for file in sys.argv:
+ if '.xsd' in file:
+ tname = os.path.basename(file).replace('.xsd','')
+ tree = et.parse(file)
+
+ model = DjangoModel(tname)
+
+ row = tree.find('.//sequence')
+ for col in row.getchildren():
+ field = make_field_from_xml_tree(col)
+ if field:
+ model.add_field(field)
+ print model