summaryrefslogtreecommitdiffstats
path: root/stackexchange
diff options
context:
space:
mode:
authorEvgeny Fadeev <evgeny.fadeev@gmail.com>2010-03-04 21:47:03 -0500
committerEvgeny Fadeev <evgeny.fadeev@gmail.com>2010-03-04 21:47:03 -0500
commitb807b27c8c3ca1a27c1467ca54885f04ed4d6fa0 (patch)
treedbaab41324014da414d831adad48455b3c26608a /stackexchange
parent153fe986ea1c25d6c09b89368d71103aa76f0f23 (diff)
downloadaskbot-b807b27c8c3ca1a27c1467ca54885f04ed4d6fa0.tar.gz
askbot-b807b27c8c3ca1a27c1467ca54885f04ed4d6fa0.tar.bz2
askbot-b807b27c8c3ca1a27c1467ca54885f04ed4d6fa0.zip
SE data seems to load into stackexchange tables
Diffstat (limited to 'stackexchange')
-rw-r--r--stackexchange/README10
-rw-r--r--stackexchange/management/__init__.py0
-rw-r--r--stackexchange/management/commands/__init__.py0
-rw-r--r--stackexchange/management/commands/load_stackexchange.py59
-rw-r--r--stackexchange/models.py502
-rw-r--r--stackexchange/parse_models.py145
6 files changed, 450 insertions, 266 deletions
diff --git a/stackexchange/README b/stackexchange/README
index 3eb1fea9..f842e891 100644
--- a/stackexchange/README
+++ b/stackexchange/README
@@ -1,14 +1,14 @@
this app's function will be to:
* install it's own tables <--- done
-* read SE xml dump into DjangoDB
+* read SE xml dump into DjangoDB <--- done
* populate osqa database
* remove SE tables
So far models are automatically created via:
1) add 'stackexchange' to the list of installed apps
-2) type commands
+2) run:
#in-place removal of xml namspace prefix to make parsing easier
perl -pi -w -e 's/xs://g' $SE_DUMP_PATH/xsd/*.xsd
@@ -17,3 +17,9 @@ So far models are automatically created via:
python parse_models.py $SE_DUMP_PATH/xsd/*.xsd > models.py
cd ..
python manage.py syncdb
+
+3) run:
+
+ python manage.py load_stackexchange
+
+ does not yet populate final data
diff --git a/stackexchange/management/__init__.py b/stackexchange/management/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/stackexchange/management/__init__.py
diff --git a/stackexchange/management/commands/__init__.py b/stackexchange/management/commands/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/stackexchange/management/commands/__init__.py
diff --git a/stackexchange/management/commands/load_stackexchange.py b/stackexchange/management/commands/load_stackexchange.py
new file mode 100644
index 00000000..42ccc9f9
--- /dev/null
+++ b/stackexchange/management/commands/load_stackexchange.py
@@ -0,0 +1,59 @@
+from django.core.management.base import BaseCommand
+import os
+import sys
+import stackexchange.parse_models as se_parser
+from xml.etree import ElementTree as et
+from django.db import models
+
+xml_read_order = (
+ 'VoteTypes','UserTypes','Users','Users2Votes',
+ 'Badges','Users2Badges','CloseReasons','FlatPages',
+ 'MessageTypes','PostHistoryTypes','PostTypes','SchemaVersion',
+ 'Settings','SystemMessages','ThemeResources','ThemeTextResources',
+ 'ThrottleBucket','UserHistoryTypes','UserHistory',
+ 'Users2Badges','VoteTypes','Users2Votes','MessageTypes',
+ 'Posts','Posts2Votes','PostHistory','PostComments',
+ 'ModeratorMessages','Messages','Comments2Votes',
+ )
+
+
+class Command(BaseCommand):
+ help = 'Loads StackExchange data from unzipped directory of XML files into the OSQA database'
+ args = 'se_dump_dir'
+
+ def handle(self, *arg, **kwarg):
+ if len(arg) < 1 or not os.path.isdir(arg[0]):
+ print 'Error: first argument must be a directory with all the SE *.xml files'
+ sys.exit(1)
+
+ self.dump_path = arg[0]
+ for xml in xml_read_order:
+ xml_path = self.get_xml_path(xml)
+ table_name = self.get_table_name(xml)
+ self.load_xml_file(xml_path, table_name)
+
+ def load_xml_file(self, xml_path, table_name):
+ tree = et.parse(xml_path)
+ print 'loading from %s to %s' % (xml_path, table_name) ,
+ model = models.get_model('stackexchange', table_name)
+ i = 0
+ for row in tree.findall('.//row'):
+ model_entry = model()
+ i += 1
+ for col in row.getchildren():
+ field_name = se_parser.parse_field_name(col.tag)
+ field_type = model._meta.get_field(field_name)
+ field_value = se_parser.parse_value(col.text, field_type)
+ setattr(model_entry, field_name, field_value)
+ model_entry.save()
+ print '... %d objects saved' % i
+
+ def get_table_name(self,xml):
+ return se_parser.get_table_name(xml)
+
+ def get_xml_path(self, xml):
+ xml_path = os.path.join(self.dump_path, xml + '.xml')
+ if not os.path.isfile(xml_path):
+ print 'Error: file %s not found' % xml_path
+ sys.exit(1)
+ return xml_path
diff --git a/stackexchange/models.py b/stackexchange/models.py
index 28b2dda6..a30a9859 100644
--- a/stackexchange/models.py
+++ b/stackexchange/models.py
@@ -1,240 +1,266 @@
from django.db import models
-class StackExchangeBadge(models.Model):
- class_type = models.IntegerField()
- name = models.CharField(max_length=50)
- description = models.TextField()
- single = models.BooleanField()
- secret = models.BooleanField()
- tag_based = models.BooleanField()
- command = models.TextField()
- award_frequency = models.IntegerField()
-
-class StackExchangeCloseReason(models.Model):
- name = models.CharField(max_length=200)
- description = models.CharField(max_length=256)
- display_order = models.IntegerField()
-
-class StackExchangeComment2Vote(models.Model):
- post_comment = models.ForeignKey('StackExchangePostComment', related_name='StackExchangeComment2Vote_post_comment_set', null=True)
- vote_type = models.ForeignKey('StackExchangeVoteType', related_name='StackExchangeComment2Vote_vote_type_set', null=True)
- creation_date = models.DateTimeField()
- user = models.ForeignKey('StackExchangeUser', related_name='StackExchangeComment2Vote_user_set', null=True)
- ip_address = models.CharField(max_length=40)
- user_display_name = models.CharField(max_length=40)
- deletion_date = models.DateTimeField()
-
-class StackExchangeFlatPage(models.Model):
- name = models.CharField(max_length=50)
- url = models.CharField(max_length=128)
- value = models.TextField()
- content_type = models.CharField(max_length=50)
- active = models.BooleanField()
- use_master = models.BooleanField()
-
-class StackExchangeMessage(models.Model):
- user = models.ForeignKey('StackExchangeUser', related_name='StackExchangeMessage_user_set', null=True)
- message_type = models.ForeignKey('StackExchangeMessageType', related_name='StackExchangeMessage_message_type_set', null=True)
- is_read = models.BooleanField()
- creation_date = models.DateTimeField()
- text = models.TextField()
- post = models.ForeignKey('StackExchangePost', related_name='StackExchangeMessage_post_set', null=True)
-
-class StackExchangeMessageType(models.Model):
- name = models.CharField(max_length=50)
- description = models.CharField(max_length=300)
-
-class StackExchangeModeratorMessage(models.Model):
- message_type = models.ForeignKey('StackExchangeMessageType', related_name='StackExchangeModeratorMessage_message_type_set', null=True)
- creation_date = models.DateTimeField()
- creation_ip_address = models.CharField(max_length=40)
- text = models.TextField()
- user = models.ForeignKey('StackExchangeUser', related_name='StackExchangeModeratorMessage_user_set', null=True)
- post = models.ForeignKey('StackExchangePost', related_name='StackExchangeModeratorMessage_post_set', null=True)
- deletion_date = models.DateTimeField()
- deletion_user = models.ForeignKey('StackExchangeUser', related_name='StackExchangeModeratorMessage_deletion_user_set', null=True)
- deletion_ip_address = models.CharField(max_length=40)
- user_display_name = models.CharField(max_length=40)
-
-class StackExchangePostComment(models.Model):
- post = models.ForeignKey('StackExchangePost', related_name='StackExchangePostComment_post_set', null=True)
- text = models.TextField()
- creation_date = models.DateTimeField()
- ip_address = models.CharField(max_length=15)
- user = models.ForeignKey('StackExchangeUser', related_name='StackExchangePostComment_user_set', null=True)
- user_display_name = models.CharField(max_length=30)
- deletion_date = models.DateTimeField()
- deletion_user = models.ForeignKey('StackExchangeUser', related_name='StackExchangePostComment_deletion_user_set', null=True)
- score = models.IntegerField()
-
-class StackExchangePostHistoryType(models.Model):
- name = models.CharField(max_length=50)
- description = models.CharField(max_length=300)
-
-class StackExchangePostHistory(models.Model):
- post_history_type = models.ForeignKey('StackExchangePostHistoryType', related_name='StackExchangePostHistory_post_history_type_set', null=True)
- post = models.ForeignKey('StackExchangePost', related_name='StackExchangePostHistory_post_set', null=True)
- revision_guid = models.CharField(max_length=64)
- creation_date = models.DateTimeField()
- ip_address = models.CharField(max_length=40)
- user = models.ForeignKey('StackExchangeUser', related_name='StackExchangePostHistory_user_set', null=True)
- comment = models.CharField(max_length=400)
- text = models.TextField()
- user_display_name = models.CharField(max_length=40)
- user_email = models.CharField(max_length=100)
- user_website_url = models.CharField(max_length=200)
-
-class StackExchangePost2Vote(models.Model):
- post = models.ForeignKey('StackExchangePost', related_name='StackExchangePost2Vote_post_set', null=True)
- user = models.ForeignKey('StackExchangeUser', related_name='StackExchangePost2Vote_user_set', null=True)
- vote_type = models.ForeignKey('StackExchangeVoteType', related_name='StackExchangePost2Vote_vote_type_set', null=True)
- creation_date = models.DateTimeField()
- deletion_date = models.DateTimeField()
- target_user = models.ForeignKey('StackExchangeUser', related_name='StackExchangePost2Vote_target_user_set', null=True)
- target_rep_change = models.IntegerField()
- voter_rep_change = models.IntegerField()
- comment = models.CharField(max_length=150)
- ip_address = models.CharField(max_length=40)
- linked_post = models.ForeignKey('StackExchangePost', related_name='StackExchangePost2Vote_linked_post_set', null=True)
-
-class StackExchangePost(models.Model):
- post_type = models.ForeignKey('StackExchangePostType', related_name='StackExchangePost_post_type_set', null=True)
- creation_date = models.DateTimeField()
- score = models.IntegerField()
- view_count = models.IntegerField()
- body = models.TextField()
- owner_user = models.ForeignKey('StackExchangeUser', related_name='StackExchangePost_owner_user_set', null=True)
- last_editor_user = models.ForeignKey('StackExchangeUser', related_name='StackExchangePost_last_editor_user_set', null=True)
- last_edit_date = models.DateTimeField()
- last_activity_date = models.DateTimeField()
- last_activity_user = models.ForeignKey('StackExchangeUser', related_name='StackExchangePost_last_activity_user_set', null=True)
- parent = models.ForeignKey('self', related_name='StackExchangePost_parent_set', null=True)
- accepted_answer = models.ForeignKey('self', related_name='StackExchangePost_accepted_answer_set', null=True)
- title = models.CharField(max_length=250)
- tags = models.CharField(max_length=150)
- community_owned_date = models.DateTimeField()
- history_summary = models.CharField(max_length=150)
- answer_score = models.IntegerField()
- answer_count = models.IntegerField()
- comment_count = models.IntegerField()
- favorite_count = models.IntegerField()
- deletion_date = models.DateTimeField()
- closed_date = models.DateTimeField()
- locked_date = models.DateTimeField()
- locked_duration = models.IntegerField()
- owner_display_name = models.CharField(max_length=40)
- last_editor_display_name = models.CharField(max_length=40)
- bounty_amount = models.IntegerField()
- bounty_closes = models.DateTimeField()
- bounty_closed = models.DateTimeField()
- last_owner_email_date = models.DateTimeField()
-
-class StackExchangePostType(models.Model):
- name = models.CharField(max_length=50)
- description = models.CharField(max_length=300)
-
-class StackExchangeSchemaVersion(models.Model):
- version = models.IntegerField()
-
-class StackExchangeSetting(models.Model):
- key = models.CharField(max_length=256)
- value = models.TextField()
-
-class StackExchangeSystemMessage(models.Model):
- user = models.ForeignKey('StackExchangeUser', related_name='StackExchangeSystemMessage_user_set', null=True)
- creation_date = models.DateTimeField()
- text = models.TextField()
- deletion_date = models.DateTimeField()
- deletion_user = models.ForeignKey('StackExchangeUser', related_name='StackExchangeSystemMessage_deletion_user_set', null=True)
-
-class StackExchangeTag(models.Model):
- name = models.CharField(max_length=50)
- count = models.IntegerField()
- user = models.ForeignKey('StackExchangeUser', related_name='StackExchangeTag_user_set', null=True)
- creation_date = models.DateTimeField()
- is_moderator_only = models.BooleanField()
- is_required = models.BooleanField()
- aliases = models.CharField(max_length=200)
-
-class StackExchangeThemeResource(models.Model):
- name = models.CharField(max_length=50)
- value = models.TextField()
- content_type = models.CharField(max_length=50)
- version = models.CharField(max_length=6)
-
-class StackExchangeThemeTextResource(models.Model):
- name = models.CharField(max_length=50)
- value = models.TextField()
- content_type = models.CharField(max_length=50)
-
-class StackExchangeThrottleBucket(models.Model):
- type = models.CharField(max_length=256)
- ip_address = models.CharField(max_length=64)
- tokens = models.IntegerField()
- last_update = models.DateTimeField()
-
-class StackExchangeUserHistoryType(models.Model):
- name = models.CharField(max_length=50)
- description = models.CharField(max_length=300)
-
-class StackExchangeUserHistory(models.Model):
- user_history_type = models.ForeignKey('StackExchangeUserHistoryType', related_name='StackExchangeUserHistory_user_history_type_set', null=True)
- creation_date = models.DateTimeField()
- ip_address = models.CharField(max_length=40)
- user = models.ForeignKey('StackExchangeUser', related_name='StackExchangeUserHistory_user_set', null=True)
- comment = models.CharField(max_length=400)
- user_display_name = models.CharField(max_length=40)
- moderator_user = models.ForeignKey('StackExchangeUser', related_name='StackExchangeUserHistory_moderator_user_set', null=True)
- reputation = models.IntegerField()
-
-class StackExchangeUser2Badge(models.Model):
- user = models.ForeignKey('StackExchangeUser', related_name='StackExchangeUser2Badge_user_set', null=True)
- badge = models.ForeignKey('StackExchangeBadge', related_name='StackExchangeUser2Badge_badge_set', null=True)
- date = models.DateTimeField()
- comment = models.CharField(max_length=50)
-
-class StackExchangeUser2Vote(models.Model):
- user = models.ForeignKey('StackExchangeUser', related_name='StackExchangeUser2Vote_user_set', null=True)
- vote_type = models.ForeignKey('StackExchangeVoteType', related_name='StackExchangeUser2Vote_vote_type_set', null=True)
- target_user = models.ForeignKey('StackExchangeUser', related_name='StackExchangeUser2Vote_target_user_set', null=True)
- creation_date = models.DateTimeField()
- deletion_date = models.DateTimeField()
- ip_address = models.CharField(max_length=40)
-
-class StackExchangeUser(models.Model):
- user_type = models.ForeignKey('StackExchangeUserType', related_name='StackExchangeUser_user_type_set', null=True)
- open_id = models.CharField(max_length=200)
- reputation = models.IntegerField()
- views = models.IntegerField()
- creation_date = models.DateTimeField()
- last_access_date = models.DateTimeField()
- has_replies = models.BooleanField()
- has_message = models.BooleanField()
- opt_in_email = models.BooleanField()
- opt_in_recruit = models.BooleanField()
- last_login_date = models.DateTimeField()
- last_email_date = models.DateTimeField()
- last_login_ip = models.CharField(max_length=15)
- open_id_alt = models.CharField(max_length=200)
- email = models.CharField(max_length=100)
- display_name = models.CharField(max_length=40)
- display_name_cleaned = models.CharField(max_length=40)
- website_url = models.CharField(max_length=200)
- real_name = models.CharField(max_length=100)
- location = models.CharField(max_length=100)
- birthday = models.DateTimeField()
- badge_summary = models.CharField(max_length=50)
- about_me = models.TextField()
- preferences_raw = models.TextField()
- timed_penalty_date = models.DateTimeField()
- guid = models.CharField(max_length=64)
- phone = models.CharField(max_length=20)
- password_id = models.IntegerField()
-
-class StackExchangeUserType(models.Model):
- name = models.CharField(max_length=50)
- description = models.CharField(max_length=300)
-
-class StackExchangeVoteType(models.Model):
- name = models.CharField(max_length=50)
- description = models.CharField(max_length=300)
+class Badge(models.Model):
+ id = models.IntegerField(primary_key=True)
+ class_type = models.IntegerField(null=True)
+ name = models.CharField(max_length=50, null=True)
+ description = models.TextField(null=True)
+ single = models.NullBooleanField(null=True)
+ secret = models.NullBooleanField(null=True)
+ tag_based = models.NullBooleanField(null=True)
+ command = models.TextField(null=True)
+ award_frequency = models.IntegerField(null=True)
+
+class CloseReason(models.Model):
+ id = models.IntegerField(primary_key=True)
+ name = models.CharField(max_length=200, null=True)
+ description = models.CharField(max_length=256, null=True)
+ display_order = models.IntegerField(null=True)
+
+class Comment2Vote(models.Model):
+ id = models.IntegerField(primary_key=True)
+ post_comment = models.ForeignKey('PostComment', related_name='Comment2Vote_by_post_comment_set', null=True)
+ vote_type = models.ForeignKey('VoteType', related_name='Comment2Vote_by_vote_type_set', null=True)
+ creation_date = models.DateTimeField(null=True)
+ user = models.ForeignKey('User', related_name='Comment2Vote_by_user_set', null=True)
+ ip_address = models.CharField(max_length=40, null=True)
+ user_display_name = models.CharField(max_length=40, null=True)
+ deletion_date = models.DateTimeField(null=True)
+
+class FlatPage(models.Model):
+ id = models.IntegerField(primary_key=True)
+ name = models.CharField(max_length=50, null=True)
+ url = models.CharField(max_length=128, null=True)
+ value = models.TextField(null=True)
+ content_type = models.CharField(max_length=50, null=True)
+ active = models.NullBooleanField(null=True)
+ use_master = models.NullBooleanField(null=True)
+
+class Message(models.Model):
+ id = models.IntegerField(primary_key=True)
+ user = models.ForeignKey('User', related_name='Message_by_user_set', null=True)
+ message_type = models.ForeignKey('MessageType', related_name='Message_by_message_type_set', null=True)
+ is_read = models.NullBooleanField(null=True)
+ creation_date = models.DateTimeField(null=True)
+ text = models.TextField(null=True)
+ post = models.ForeignKey('Post', related_name='Message_by_post_set', null=True)
+
+class MessageType(models.Model):
+ id = models.IntegerField(primary_key=True)
+ name = models.CharField(max_length=50, null=True)
+ description = models.CharField(max_length=300, null=True)
+
+class ModeratorMessage(models.Model):
+ id = models.IntegerField(primary_key=True)
+ message_type = models.ForeignKey('MessageType', related_name='ModeratorMessage_by_message_type_set', null=True)
+ creation_date = models.DateTimeField(null=True)
+ creation_ip_address = models.CharField(max_length=40, null=True)
+ text = models.TextField(null=True)
+ user = models.ForeignKey('User', related_name='ModeratorMessage_by_user_set', null=True)
+ post = models.ForeignKey('Post', related_name='ModeratorMessage_by_post_set', null=True)
+ deletion_date = models.DateTimeField(null=True)
+ deletion_user = models.ForeignKey('User', related_name='ModeratorMessage_by_deletion_user_set', null=True)
+ deletion_ip_address = models.CharField(max_length=40, null=True)
+ user_display_name = models.CharField(max_length=40, null=True)
+
+class PostComment(models.Model):
+ id = models.IntegerField(primary_key=True)
+ post = models.ForeignKey('Post', related_name='PostComment_by_post_set', null=True)
+ text = models.TextField(null=True)
+ creation_date = models.DateTimeField(null=True)
+ ip_address = models.CharField(max_length=15, null=True)
+ user = models.ForeignKey('User', related_name='PostComment_by_user_set', null=True)
+ user_display_name = models.CharField(max_length=30, null=True)
+ deletion_date = models.DateTimeField(null=True)
+ deletion_user = models.ForeignKey('User', related_name='PostComment_by_deletion_user_set', null=True)
+ score = models.IntegerField(null=True)
+
+class PostHistoryType(models.Model):
+ id = models.IntegerField(primary_key=True)
+ name = models.CharField(max_length=50, null=True)
+ description = models.CharField(max_length=300, null=True)
+
+class PostHistory(models.Model):
+ id = models.IntegerField(primary_key=True)
+ post_history_type = models.ForeignKey('PostHistoryType', related_name='PostHistory_by_post_history_type_set', null=True)
+ post = models.ForeignKey('Post', related_name='PostHistory_by_post_set', null=True)
+ revision_guid = models.CharField(max_length=64, null=True)
+ creation_date = models.DateTimeField(null=True)
+ ip_address = models.CharField(max_length=40, null=True)
+ user = models.ForeignKey('User', related_name='PostHistory_by_user_set', null=True)
+ comment = models.CharField(max_length=400, null=True)
+ text = models.TextField(null=True)
+ user_display_name = models.CharField(max_length=40, null=True)
+ user_email = models.CharField(max_length=100, null=True)
+ user_website_url = models.CharField(max_length=200, null=True)
+
+class Post2Vote(models.Model):
+ id = models.IntegerField(primary_key=True)
+ post = models.ForeignKey('Post', related_name='Post2Vote_by_post_set', null=True)
+ user = models.ForeignKey('User', related_name='Post2Vote_by_user_set', null=True)
+ vote_type = models.ForeignKey('VoteType', related_name='Post2Vote_by_vote_type_set', null=True)
+ creation_date = models.DateTimeField(null=True)
+ deletion_date = models.DateTimeField(null=True)
+ target_user = models.ForeignKey('User', related_name='Post2Vote_by_target_user_set', null=True)
+ target_rep_change = models.IntegerField(null=True)
+ voter_rep_change = models.IntegerField(null=True)
+ comment = models.CharField(max_length=150, null=True)
+ ip_address = models.CharField(max_length=40, null=True)
+ linked_post = models.ForeignKey('Post', related_name='Post2Vote_by_linked_post_set', null=True)
+
+class Post(models.Model):
+ id = models.IntegerField(primary_key=True)
+ post_type = models.ForeignKey('PostType', related_name='Post_by_post_type_set', null=True)
+ creation_date = models.DateTimeField(null=True)
+ score = models.IntegerField(null=True)
+ view_count = models.IntegerField(null=True)
+ body = models.TextField(null=True)
+ owner_user = models.ForeignKey('User', related_name='Post_by_owner_user_set', null=True)
+ last_editor_user = models.ForeignKey('User', related_name='Post_by_last_editor_user_set', null=True)
+ last_edit_date = models.DateTimeField(null=True)
+ last_activity_date = models.DateTimeField(null=True)
+ last_activity_user = models.ForeignKey('User', related_name='Post_by_last_activity_user_set', null=True)
+ parent = models.ForeignKey('self', related_name='Post_by_parent_set', null=True)
+ accepted_answer = models.ForeignKey('self', related_name='Post_by_accepted_answer_set', null=True)
+ title = models.CharField(max_length=250, null=True)
+ tags = models.CharField(max_length=150, null=True)
+ community_owned_date = models.DateTimeField(null=True)
+ history_summary = models.CharField(max_length=150, null=True)
+ answer_score = models.IntegerField(null=True)
+ answer_count = models.IntegerField(null=True)
+ comment_count = models.IntegerField(null=True)
+ favorite_count = models.IntegerField(null=True)
+ deletion_date = models.DateTimeField(null=True)
+ closed_date = models.DateTimeField(null=True)
+ locked_date = models.DateTimeField(null=True)
+ locked_duration = models.IntegerField(null=True)
+ owner_display_name = models.CharField(max_length=40, null=True)
+ last_editor_display_name = models.CharField(max_length=40, null=True)
+ bounty_amount = models.IntegerField(null=True)
+ bounty_closes = models.DateTimeField(null=True)
+ bounty_closed = models.DateTimeField(null=True)
+ last_owner_email_date = models.DateTimeField(null=True)
+
+class PostType(models.Model):
+ id = models.IntegerField(primary_key=True)
+ name = models.CharField(max_length=50, null=True)
+ description = models.CharField(max_length=300, null=True)
+
+class SchemaVersion(models.Model):
+ version = models.IntegerField(null=True)
+
+class Setting(models.Model):
+ id = models.IntegerField(primary_key=True)
+ key = models.CharField(max_length=256, null=True)
+ value = models.TextField(null=True)
+
+class SystemMessage(models.Model):
+ id = models.IntegerField(primary_key=True)
+ user = models.ForeignKey('User', related_name='SystemMessage_by_user_set', null=True)
+ creation_date = models.DateTimeField(null=True)
+ text = models.TextField(null=True)
+ deletion_date = models.DateTimeField(null=True)
+ deletion_user = models.ForeignKey('User', related_name='SystemMessage_by_deletion_user_set', null=True)
+
+class Tag(models.Model):
+ id = models.IntegerField(primary_key=True)
+ name = models.CharField(max_length=50, null=True)
+ count = models.IntegerField(null=True)
+ user = models.ForeignKey('User', related_name='Tag_by_user_set', null=True)
+ creation_date = models.DateTimeField(null=True)
+ is_moderator_only = models.NullBooleanField(null=True)
+ is_required = models.NullBooleanField(null=True)
+ aliases = models.CharField(max_length=200, null=True)
+
+class ThemeResource(models.Model):
+ id = models.IntegerField(primary_key=True)
+ name = models.CharField(max_length=50, null=True)
+ value = models.TextField(null=True)
+ content_type = models.CharField(max_length=50, null=True)
+ version = models.CharField(max_length=6, null=True)
+
+class ThemeTextResource(models.Model):
+ id = models.IntegerField(primary_key=True)
+ name = models.CharField(max_length=50, null=True)
+ value = models.TextField(null=True)
+ content_type = models.CharField(max_length=50, null=True)
+
+class ThrottleBucket(models.Model):
+ id = models.IntegerField(primary_key=True)
+ type = models.CharField(max_length=256, null=True)
+ ip_address = models.CharField(max_length=64, null=True)
+ tokens = models.IntegerField(null=True)
+ last_update = models.DateTimeField(null=True)
+
+class UserHistoryType(models.Model):
+ id = models.IntegerField(primary_key=True)
+ name = models.CharField(max_length=50, null=True)
+ description = models.CharField(max_length=300, null=True)
+
+class UserHistory(models.Model):
+ id = models.IntegerField(primary_key=True)
+ user_history_type = models.ForeignKey('UserHistoryType', related_name='UserHistory_by_user_history_type_set', null=True)
+ creation_date = models.DateTimeField(null=True)
+ ip_address = models.CharField(max_length=40, null=True)
+ user = models.ForeignKey('User', related_name='UserHistory_by_user_set', null=True)
+ comment = models.CharField(max_length=400, null=True)
+ user_display_name = models.CharField(max_length=40, null=True)
+ moderator_user = models.ForeignKey('User', related_name='UserHistory_by_moderator_user_set', null=True)
+ reputation = models.IntegerField(null=True)
+
+class User2Badge(models.Model):
+ id = models.IntegerField(primary_key=True)
+ user = models.ForeignKey('User', related_name='User2Badge_by_user_set', null=True)
+ badge = models.ForeignKey('Badge', related_name='User2Badge_by_badge_set', null=True)
+ date = models.DateTimeField(null=True)
+ comment = models.CharField(max_length=50, null=True)
+
+class User2Vote(models.Model):
+ id = models.IntegerField(primary_key=True)
+ user = models.ForeignKey('User', related_name='User2Vote_by_user_set', null=True)
+ vote_type = models.ForeignKey('VoteType', related_name='User2Vote_by_vote_type_set', null=True)
+ target_user = models.ForeignKey('User', related_name='User2Vote_by_target_user_set', null=True)
+ creation_date = models.DateTimeField(null=True)
+ deletion_date = models.DateTimeField(null=True)
+ ip_address = models.CharField(max_length=40, null=True)
+
+class User(models.Model):
+ id = models.IntegerField(primary_key=True)
+ user_type = models.ForeignKey('UserType', related_name='User_by_user_type_set', null=True)
+ open_id = models.CharField(max_length=200, null=True)
+ reputation = models.IntegerField(null=True)
+ views = models.IntegerField(null=True)
+ creation_date = models.DateTimeField(null=True)
+ last_access_date = models.DateTimeField(null=True)
+ has_replies = models.NullBooleanField(null=True)
+ has_message = models.NullBooleanField(null=True)
+ opt_in_email = models.NullBooleanField(null=True)
+ opt_in_recruit = models.NullBooleanField(null=True)
+ last_login_date = models.DateTimeField(null=True)
+ last_email_date = models.DateTimeField(null=True)
+ last_login_ip = models.CharField(max_length=15, null=True)
+ open_id_alt = models.CharField(max_length=200, null=True)
+ email = models.CharField(max_length=100, null=True)
+ display_name = models.CharField(max_length=40, null=True)
+ display_name_cleaned = models.CharField(max_length=40, null=True)
+ website_url = models.CharField(max_length=200, null=True)
+ real_name = models.CharField(max_length=100, null=True)
+ location = models.CharField(max_length=100, null=True)
+ birthday = models.DateTimeField(null=True)
+ badge_summary = models.CharField(max_length=50, null=True)
+ about_me = models.TextField(null=True)
+ preferences_raw = models.TextField(null=True)
+ timed_penalty_date = models.DateTimeField(null=True)
+ guid = models.CharField(max_length=64, null=True)
+ phone = models.CharField(max_length=20, null=True)
+ password_id = models.IntegerField(null=True)
+
+class UserType(models.Model):
+ id = models.IntegerField(primary_key=True)
+ name = models.CharField(max_length=50, null=True)
+ description = models.CharField(max_length=300, null=True)
+
+class VoteType(models.Model):
+ id = models.IntegerField(primary_key=True)
+ name = models.CharField(max_length=50, null=True)
+ description = models.CharField(max_length=300, null=True)
diff --git a/stackexchange/parse_models.py b/stackexchange/parse_models.py
index e83ca0d5..64796e57 100644
--- a/stackexchange/parse_models.py
+++ b/stackexchange/parse_models.py
@@ -1,14 +1,24 @@
from xml.etree import ElementTree as et
import sys
import re
+import os
+if __name__ != '__main__':#hack do not import models if run as script
+ from django.db import models
+from datetime import datetime
+
+table_prefix = ''#StackExchange or something, if needed
+date_time_format = '%Y-%m-%dT%H:%M:%S' #note that fractional part of second is lost
+time_re = re.compile(r'(\.[\d]+)?$')
+loader_app_name = os.path.dirname(__file__)
types = {
'unsignedByte':'models.IntegerField',
'FK':'models.ForeignKey',
+ 'PK':'models.IntegerField',
'string':'models.CharField',
'text':'models.TextField',
'int':'models.IntegerField',
- 'boolean':'models.BooleanField',
+ 'boolean':'models.NullBooleanField',
'dateTime':'models.DateTimeField',
'base64Binary':'models.TextField',
'double':'models.IntegerField',
@@ -26,15 +36,22 @@ def singular(word):
else:
return word
+def get_table_name(name):
+ """Determine db table name
+ from the basename of the .xml file
+ """
+ out = table_prefix
+ if name.find('2') == -1:
+ out += singular(name)
+ else:
+ bits = name.split('2')
+ bits = map(singular, bits)
+ out += '2'.join(bits)
+ return out
+
class DjangoModel(object):
def __init__(self, name):
- self.name = 'StackExchange'
- if name.find('2') == -1:
- self.name += singular(name)
- else:
- bits = name.split('2')
- bits = map(singular, bits)
- self.name += '2'.join(bits)
+ self.name = get_table_name(name)
self.fields = []
def add_field(self,field):
field.table = self
@@ -57,20 +74,31 @@ class DjangoField(object):
def __str__(self):
out = '%s = %s(' % (self.name, types[self.type])
- if self.relation and self.restriction:
- raise Exception('impossible')
- elif self.relation:
+ if self.type == 'FK':
out += "'%s'" % self.relation
- out += ", related_name='%s_%s_set'" % (self.table.name, self.name)
+ out += ", related_name='%s_by_%s_set'" % (self.table.name, self.name)
out += ', null=True'#nullable to make life easier
+ elif self.type == 'PK':
+ out += 'primary_key=True'
elif self.restriction != -1:
if self.type == 'string':
out += 'max_length=%s' % self.restriction
+ out += ', null=True'
else:
- raise Exception('only max_length restriction is supported')
+ raise Exception('restriction (max_length) supported only for string type')
+ else:
+ out += 'null=True'
out += ')'
return out
+ def get_type(self):
+ return self.type
+
+class DjangoPK(DjangoField):
+ def __init__(self):
+ self.name = 'id'
+ self.type = 'PK'
+
class DjangoFK(DjangoField):
def __init__(self, source_name):
bits = source_name.split('Id')
@@ -83,7 +111,7 @@ class DjangoFK(DjangoField):
"""some relations need to be mapped
to actual tables
"""
- self.relation = 'StackExchange'
+ self.relation = table_prefix
if name.endswith('User'):
self.relation += 'User'
elif name.endswith('Post'):
@@ -92,6 +120,8 @@ class DjangoFK(DjangoField):
self.relation = 'self' #self-referential Post model
else:
self.relation += name
+ def get_relation(self):
+ return self.relation
def get_col_type(col):
type = col.get('type')
@@ -108,25 +138,88 @@ def get_col_type(col):
restriction = -1
return type, restriction
+def make_field_from_xml_tree(xml_element):
+ """used by the model parser
+ here we need to be detailed about field types
+ because this defines the database schema
+ """
+ name = xml_element.get('name')
+ if name == 'LinkedVoteId':#not used
+ return None
+ if name == 'Id':
+ field = DjangoPK()
+ elif name.endswith('Id') and name not in ('OpenId','PasswordId'):
+ field = DjangoFK(name)
+ elif name.endswith('GUID'):
+ field = DjangoField(name, 'string', 64)
+ else:
+ type, restriction = get_col_type(xml_element)
+ field = DjangoField(name, type, restriction)
+ return field
+
+def parse_field_name(input):
+ """used by the data reader
+
+ The problem is that I've scattered
+ code for determination of field name over three classes:
+ DjangoField, DjangoPK and DjangoFK
+ so the function actually cretes fake field objects
+ many time over
+ """
+ if input == 'Id':
+ return DjangoPK().name
+ elif input in ('OpenId', 'PasswordId'):
+ return DjangoField(input, 'string', 7).name#happy fake field
+ elif input.endswith('Id'):
+ return DjangoFK(input).name#real FK field
+ else:
+ return DjangoField(input, 'string', 7).name#happy fake field
+
+def parse_value(input, field_object):
+ if isinstance(field_object, models.ForeignKey):
+ try:
+ id = int(input)
+ except:
+ raise Exception('non-numeric foreign key %s' % input)
+ related_model = field_object.rel.to
+ try:
+ return related_model.objects.get(id=id)
+ except related_model.DoesNotExist:
+ obj = related_model(id=id)
+ obj.save()#save fake empty object
+ return obj
+ elif isinstance(field_object, models.IntegerField):
+ try:
+ return int(input)
+ except:
+ raise Exception('expected integer, found %s' % input)
+ elif isinstance(field_object, models.CharField):
+ return input
+ elif isinstance(field_object, models.TextField):
+ return input
+ elif isinstance(field_object, models.BooleanField):
+ try:
+ return bool(input)
+ except:
+ raise Exception('boolean value expected %s found' % input)
+ elif isinstance(field_object, models.DateTimeField):
+ input = time_re.sub('', input)
+ try:
+ return datetime.strptime(input, date_time_format)
+ except:
+ raise Exception('datetime expected "%s" found' % input)
+
print 'from django.db import models'
for file in sys.argv:
if '.xsd' in file:
- tname = file.replace('.xsd','')
+ tname = os.path.basename(file).replace('.xsd','')
tree = et.parse(file)
model = DjangoModel(tname)
row = tree.find('.//sequence')
for col in row.getchildren():
- name = col.get('name')
- if name in ('Id', 'LinkedVoteId'):#second one is not used
- continue
- elif name.endswith('Id') and name not in ('OpenId','PasswordId'):
- field = DjangoFK(name)
- elif name.endswith('GUID'):
- field = DjangoField(name, 'string', 64)
- else:
- type, restriction = get_col_type(col)
- field = DjangoField(name, type, restriction)
- model.add_field(field)
+ field = make_field_from_xml_tree(col)
+ if field:
+ model.add_field(field)
print model