diff options
author | Evgeny Fadeev <evgeny.fadeev@gmail.com> | 2010-03-03 01:32:56 -0500 |
---|---|---|
committer | Evgeny Fadeev <evgeny.fadeev@gmail.com> | 2010-03-03 01:32:56 -0500 |
commit | e6f49c04ab60a1729f09aee50804ce83f4869a17 (patch) | |
tree | f9dd69e746b78eebfa80e71d7699adc9faa8deea /stackexchange | |
parent | bac35fb088833fdc757c8dc9e90bcb53a0a8755c (diff) | |
download | askbot-e6f49c04ab60a1729f09aee50804ce83f4869a17.tar.gz askbot-e6f49c04ab60a1729f09aee50804ce83f4869a17.tar.bz2 askbot-e6f49c04ab60a1729f09aee50804ce83f4869a17.zip |
created model parser and initial models for stackexchange importer
Diffstat (limited to 'stackexchange')
-rw-r--r-- | stackexchange/README | 16 | ||||
-rw-r--r-- | stackexchange/__init__.py | 0 | ||||
-rw-r--r-- | stackexchange/models.py | 240 | ||||
-rw-r--r-- | stackexchange/parse_models.py | 132 |
4 files changed, 388 insertions, 0 deletions
diff --git a/stackexchange/README b/stackexchange/README new file mode 100644 index 00000000..4d3f7750 --- /dev/null +++ b/stackexchange/README @@ -0,0 +1,16 @@ +this app's function will be to: + +* install it's own tables +* read SE xml dump into DjangoDB +* populate osqa database +* remove SE tables + +So far models are automatically created via: + +1) add 'stackexchange' to the list of installed apps +2) type commands + + cd stackexchange + python parse_models.py SE_DUMP/xsd/*.xsd > models.py + cd .. + python manage.py syncdb diff --git a/stackexchange/__init__.py b/stackexchange/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/stackexchange/__init__.py diff --git a/stackexchange/models.py b/stackexchange/models.py new file mode 100644 index 00000000..28b2dda6 --- /dev/null +++ b/stackexchange/models.py @@ -0,0 +1,240 @@ +from django.db import models +class StackExchangeBadge(models.Model): + class_type = models.IntegerField() + name = models.CharField(max_length=50) + description = models.TextField() + single = models.BooleanField() + secret = models.BooleanField() + tag_based = models.BooleanField() + command = models.TextField() + award_frequency = models.IntegerField() + +class StackExchangeCloseReason(models.Model): + name = models.CharField(max_length=200) + description = models.CharField(max_length=256) + display_order = models.IntegerField() + +class StackExchangeComment2Vote(models.Model): + post_comment = models.ForeignKey('StackExchangePostComment', related_name='StackExchangeComment2Vote_post_comment_set', null=True) + vote_type = models.ForeignKey('StackExchangeVoteType', related_name='StackExchangeComment2Vote_vote_type_set', null=True) + creation_date = models.DateTimeField() + user = models.ForeignKey('StackExchangeUser', related_name='StackExchangeComment2Vote_user_set', null=True) + ip_address = models.CharField(max_length=40) + user_display_name = models.CharField(max_length=40) + deletion_date = models.DateTimeField() + +class StackExchangeFlatPage(models.Model): + name = models.CharField(max_length=50) + url = models.CharField(max_length=128) + value = models.TextField() + content_type = models.CharField(max_length=50) + active = models.BooleanField() + use_master = models.BooleanField() + +class StackExchangeMessage(models.Model): + user = models.ForeignKey('StackExchangeUser', related_name='StackExchangeMessage_user_set', null=True) + message_type = models.ForeignKey('StackExchangeMessageType', related_name='StackExchangeMessage_message_type_set', null=True) + is_read = models.BooleanField() + creation_date = models.DateTimeField() + text = models.TextField() + post = models.ForeignKey('StackExchangePost', related_name='StackExchangeMessage_post_set', null=True) + +class StackExchangeMessageType(models.Model): + name = models.CharField(max_length=50) + description = models.CharField(max_length=300) + +class StackExchangeModeratorMessage(models.Model): + message_type = models.ForeignKey('StackExchangeMessageType', related_name='StackExchangeModeratorMessage_message_type_set', null=True) + creation_date = models.DateTimeField() + creation_ip_address = models.CharField(max_length=40) + text = models.TextField() + user = models.ForeignKey('StackExchangeUser', related_name='StackExchangeModeratorMessage_user_set', null=True) + post = models.ForeignKey('StackExchangePost', related_name='StackExchangeModeratorMessage_post_set', null=True) + deletion_date = models.DateTimeField() + deletion_user = models.ForeignKey('StackExchangeUser', related_name='StackExchangeModeratorMessage_deletion_user_set', null=True) + deletion_ip_address = models.CharField(max_length=40) + user_display_name = models.CharField(max_length=40) + +class StackExchangePostComment(models.Model): + post = models.ForeignKey('StackExchangePost', related_name='StackExchangePostComment_post_set', null=True) + text = models.TextField() + creation_date = models.DateTimeField() + ip_address = models.CharField(max_length=15) + user = models.ForeignKey('StackExchangeUser', related_name='StackExchangePostComment_user_set', null=True) + user_display_name = models.CharField(max_length=30) + deletion_date = models.DateTimeField() + deletion_user = models.ForeignKey('StackExchangeUser', related_name='StackExchangePostComment_deletion_user_set', null=True) + score = models.IntegerField() + +class StackExchangePostHistoryType(models.Model): + name = models.CharField(max_length=50) + description = models.CharField(max_length=300) + +class StackExchangePostHistory(models.Model): + post_history_type = models.ForeignKey('StackExchangePostHistoryType', related_name='StackExchangePostHistory_post_history_type_set', null=True) + post = models.ForeignKey('StackExchangePost', related_name='StackExchangePostHistory_post_set', null=True) + revision_guid = models.CharField(max_length=64) + creation_date = models.DateTimeField() + ip_address = models.CharField(max_length=40) + user = models.ForeignKey('StackExchangeUser', related_name='StackExchangePostHistory_user_set', null=True) + comment = models.CharField(max_length=400) + text = models.TextField() + user_display_name = models.CharField(max_length=40) + user_email = models.CharField(max_length=100) + user_website_url = models.CharField(max_length=200) + +class StackExchangePost2Vote(models.Model): + post = models.ForeignKey('StackExchangePost', related_name='StackExchangePost2Vote_post_set', null=True) + user = models.ForeignKey('StackExchangeUser', related_name='StackExchangePost2Vote_user_set', null=True) + vote_type = models.ForeignKey('StackExchangeVoteType', related_name='StackExchangePost2Vote_vote_type_set', null=True) + creation_date = models.DateTimeField() + deletion_date = models.DateTimeField() + target_user = models.ForeignKey('StackExchangeUser', related_name='StackExchangePost2Vote_target_user_set', null=True) + target_rep_change = models.IntegerField() + voter_rep_change = models.IntegerField() + comment = models.CharField(max_length=150) + ip_address = models.CharField(max_length=40) + linked_post = models.ForeignKey('StackExchangePost', related_name='StackExchangePost2Vote_linked_post_set', null=True) + +class StackExchangePost(models.Model): + post_type = models.ForeignKey('StackExchangePostType', related_name='StackExchangePost_post_type_set', null=True) + creation_date = models.DateTimeField() + score = models.IntegerField() + view_count = models.IntegerField() + body = models.TextField() + owner_user = models.ForeignKey('StackExchangeUser', related_name='StackExchangePost_owner_user_set', null=True) + last_editor_user = models.ForeignKey('StackExchangeUser', related_name='StackExchangePost_last_editor_user_set', null=True) + last_edit_date = models.DateTimeField() + last_activity_date = models.DateTimeField() + last_activity_user = models.ForeignKey('StackExchangeUser', related_name='StackExchangePost_last_activity_user_set', null=True) + parent = models.ForeignKey('self', related_name='StackExchangePost_parent_set', null=True) + accepted_answer = models.ForeignKey('self', related_name='StackExchangePost_accepted_answer_set', null=True) + title = models.CharField(max_length=250) + tags = models.CharField(max_length=150) + community_owned_date = models.DateTimeField() + history_summary = models.CharField(max_length=150) + answer_score = models.IntegerField() + answer_count = models.IntegerField() + comment_count = models.IntegerField() + favorite_count = models.IntegerField() + deletion_date = models.DateTimeField() + closed_date = models.DateTimeField() + locked_date = models.DateTimeField() + locked_duration = models.IntegerField() + owner_display_name = models.CharField(max_length=40) + last_editor_display_name = models.CharField(max_length=40) + bounty_amount = models.IntegerField() + bounty_closes = models.DateTimeField() + bounty_closed = models.DateTimeField() + last_owner_email_date = models.DateTimeField() + +class StackExchangePostType(models.Model): + name = models.CharField(max_length=50) + description = models.CharField(max_length=300) + +class StackExchangeSchemaVersion(models.Model): + version = models.IntegerField() + +class StackExchangeSetting(models.Model): + key = models.CharField(max_length=256) + value = models.TextField() + +class StackExchangeSystemMessage(models.Model): + user = models.ForeignKey('StackExchangeUser', related_name='StackExchangeSystemMessage_user_set', null=True) + creation_date = models.DateTimeField() + text = models.TextField() + deletion_date = models.DateTimeField() + deletion_user = models.ForeignKey('StackExchangeUser', related_name='StackExchangeSystemMessage_deletion_user_set', null=True) + +class StackExchangeTag(models.Model): + name = models.CharField(max_length=50) + count = models.IntegerField() + user = models.ForeignKey('StackExchangeUser', related_name='StackExchangeTag_user_set', null=True) + creation_date = models.DateTimeField() + is_moderator_only = models.BooleanField() + is_required = models.BooleanField() + aliases = models.CharField(max_length=200) + +class StackExchangeThemeResource(models.Model): + name = models.CharField(max_length=50) + value = models.TextField() + content_type = models.CharField(max_length=50) + version = models.CharField(max_length=6) + +class StackExchangeThemeTextResource(models.Model): + name = models.CharField(max_length=50) + value = models.TextField() + content_type = models.CharField(max_length=50) + +class StackExchangeThrottleBucket(models.Model): + type = models.CharField(max_length=256) + ip_address = models.CharField(max_length=64) + tokens = models.IntegerField() + last_update = models.DateTimeField() + +class StackExchangeUserHistoryType(models.Model): + name = models.CharField(max_length=50) + description = models.CharField(max_length=300) + +class StackExchangeUserHistory(models.Model): + user_history_type = models.ForeignKey('StackExchangeUserHistoryType', related_name='StackExchangeUserHistory_user_history_type_set', null=True) + creation_date = models.DateTimeField() + ip_address = models.CharField(max_length=40) + user = models.ForeignKey('StackExchangeUser', related_name='StackExchangeUserHistory_user_set', null=True) + comment = models.CharField(max_length=400) + user_display_name = models.CharField(max_length=40) + moderator_user = models.ForeignKey('StackExchangeUser', related_name='StackExchangeUserHistory_moderator_user_set', null=True) + reputation = models.IntegerField() + +class StackExchangeUser2Badge(models.Model): + user = models.ForeignKey('StackExchangeUser', related_name='StackExchangeUser2Badge_user_set', null=True) + badge = models.ForeignKey('StackExchangeBadge', related_name='StackExchangeUser2Badge_badge_set', null=True) + date = models.DateTimeField() + comment = models.CharField(max_length=50) + +class StackExchangeUser2Vote(models.Model): + user = models.ForeignKey('StackExchangeUser', related_name='StackExchangeUser2Vote_user_set', null=True) + vote_type = models.ForeignKey('StackExchangeVoteType', related_name='StackExchangeUser2Vote_vote_type_set', null=True) + target_user = models.ForeignKey('StackExchangeUser', related_name='StackExchangeUser2Vote_target_user_set', null=True) + creation_date = models.DateTimeField() + deletion_date = models.DateTimeField() + ip_address = models.CharField(max_length=40) + +class StackExchangeUser(models.Model): + user_type = models.ForeignKey('StackExchangeUserType', related_name='StackExchangeUser_user_type_set', null=True) + open_id = models.CharField(max_length=200) + reputation = models.IntegerField() + views = models.IntegerField() + creation_date = models.DateTimeField() + last_access_date = models.DateTimeField() + has_replies = models.BooleanField() + has_message = models.BooleanField() + opt_in_email = models.BooleanField() + opt_in_recruit = models.BooleanField() + last_login_date = models.DateTimeField() + last_email_date = models.DateTimeField() + last_login_ip = models.CharField(max_length=15) + open_id_alt = models.CharField(max_length=200) + email = models.CharField(max_length=100) + display_name = models.CharField(max_length=40) + display_name_cleaned = models.CharField(max_length=40) + website_url = models.CharField(max_length=200) + real_name = models.CharField(max_length=100) + location = models.CharField(max_length=100) + birthday = models.DateTimeField() + badge_summary = models.CharField(max_length=50) + about_me = models.TextField() + preferences_raw = models.TextField() + timed_penalty_date = models.DateTimeField() + guid = models.CharField(max_length=64) + phone = models.CharField(max_length=20) + password_id = models.IntegerField() + +class StackExchangeUserType(models.Model): + name = models.CharField(max_length=50) + description = models.CharField(max_length=300) + +class StackExchangeVoteType(models.Model): + name = models.CharField(max_length=50) + description = models.CharField(max_length=300) + diff --git a/stackexchange/parse_models.py b/stackexchange/parse_models.py new file mode 100644 index 00000000..e83ca0d5 --- /dev/null +++ b/stackexchange/parse_models.py @@ -0,0 +1,132 @@ +from xml.etree import ElementTree as et +import sys +import re + +types = { + 'unsignedByte':'models.IntegerField', + 'FK':'models.ForeignKey', + 'string':'models.CharField', + 'text':'models.TextField', + 'int':'models.IntegerField', + 'boolean':'models.BooleanField', + 'dateTime':'models.DateTimeField', + 'base64Binary':'models.TextField', + 'double':'models.IntegerField', +} + +def camel_to_python(camel): + """http://stackoverflow.com/questions/1175208/ + """ + s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', camel) + return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower() + +def singular(word): + if word.endswith('s'): + return word[:-1] + else: + return word + +class DjangoModel(object): + def __init__(self, name): + self.name = 'StackExchange' + if name.find('2') == -1: + self.name += singular(name) + else: + bits = name.split('2') + bits = map(singular, bits) + self.name += '2'.join(bits) + self.fields = [] + def add_field(self,field): + field.table = self + self.fields.append(field) + def __str__(self): + out = 'class %s(models.Model):\n' % self.name + for f in self.fields: + out += ' %s\n' % str(f) + return out + +class DjangoField(object): + def __init__(self, name, type, restriction = None): + self.name = camel_to_python(name) + if self.name == 'class': + self.name = 'class_type'#work around python keyword + self.type = type + self.table = None + self.restriction = restriction + self.relation = None + + def __str__(self): + out = '%s = %s(' % (self.name, types[self.type]) + if self.relation and self.restriction: + raise Exception('impossible') + elif self.relation: + out += "'%s'" % self.relation + out += ", related_name='%s_%s_set'" % (self.table.name, self.name) + out += ', null=True'#nullable to make life easier + elif self.restriction != -1: + if self.type == 'string': + out += 'max_length=%s' % self.restriction + else: + raise Exception('only max_length restriction is supported') + out += ')' + return out + +class DjangoFK(DjangoField): + def __init__(self, source_name): + bits = source_name.split('Id') + if len(bits) == 2 and bits[1] == '': + name = bits[0] + super(DjangoFK, self).__init__(name, 'FK') + self.set_relation(name) + + def set_relation(self, name): + """some relations need to be mapped + to actual tables + """ + self.relation = 'StackExchange' + if name.endswith('User'): + self.relation += 'User' + elif name.endswith('Post'): + self.relation += 'Post' + elif name in ('AcceptedAnswer','Parent'): + self.relation = 'self' #self-referential Post model + else: + self.relation += name + +def get_col_type(col): + type = col.get('type') + restriction = -1 + if type == None: + type_e = col.find('.//simpleType/restriction') + type = type_e.get('base') + try: + restriction = int(type_e.getchildren()[0].get('value')) + except: + restriction = -1 + if restriction > 400: + type = 'text' + restriction = -1 + return type, restriction + +print 'from django.db import models' +for file in sys.argv: + if '.xsd' in file: + tname = file.replace('.xsd','') + tree = et.parse(file) + + model = DjangoModel(tname) + + row = tree.find('.//sequence') + for col in row.getchildren(): + name = col.get('name') + if name in ('Id', 'LinkedVoteId'):#second one is not used + continue + elif name.endswith('Id') and name not in ('OpenId','PasswordId'): + field = DjangoFK(name) + elif name.endswith('GUID'): + field = DjangoField(name, 'string', 64) + else: + type, restriction = get_col_type(col) + field = DjangoField(name, type, restriction) + model.add_field(field) + print model |