summaryrefslogtreecommitdiffstats
path: root/stackexchange
diff options
context:
space:
mode:
authorEvgeny Fadeev <evgeny.fadeev@gmail.com>2010-03-03 01:32:56 -0500
committerEvgeny Fadeev <evgeny.fadeev@gmail.com>2010-03-03 01:32:56 -0500
commite6f49c04ab60a1729f09aee50804ce83f4869a17 (patch)
treef9dd69e746b78eebfa80e71d7699adc9faa8deea /stackexchange
parentbac35fb088833fdc757c8dc9e90bcb53a0a8755c (diff)
downloadaskbot-e6f49c04ab60a1729f09aee50804ce83f4869a17.tar.gz
askbot-e6f49c04ab60a1729f09aee50804ce83f4869a17.tar.bz2
askbot-e6f49c04ab60a1729f09aee50804ce83f4869a17.zip
created model parser and initial models for stackexchange importer
Diffstat (limited to 'stackexchange')
-rw-r--r--stackexchange/README16
-rw-r--r--stackexchange/__init__.py0
-rw-r--r--stackexchange/models.py240
-rw-r--r--stackexchange/parse_models.py132
4 files changed, 388 insertions, 0 deletions
diff --git a/stackexchange/README b/stackexchange/README
new file mode 100644
index 00000000..4d3f7750
--- /dev/null
+++ b/stackexchange/README
@@ -0,0 +1,16 @@
+this app's function will be to:
+
+* install it's own tables
+* read SE xml dump into DjangoDB
+* populate osqa database
+* remove SE tables
+
+So far models are automatically created via:
+
+1) add 'stackexchange' to the list of installed apps
+2) type commands
+
+ cd stackexchange
+ python parse_models.py SE_DUMP/xsd/*.xsd > models.py
+ cd ..
+ python manage.py syncdb
diff --git a/stackexchange/__init__.py b/stackexchange/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/stackexchange/__init__.py
diff --git a/stackexchange/models.py b/stackexchange/models.py
new file mode 100644
index 00000000..28b2dda6
--- /dev/null
+++ b/stackexchange/models.py
@@ -0,0 +1,240 @@
+from django.db import models
+class StackExchangeBadge(models.Model):
+ class_type = models.IntegerField()
+ name = models.CharField(max_length=50)
+ description = models.TextField()
+ single = models.BooleanField()
+ secret = models.BooleanField()
+ tag_based = models.BooleanField()
+ command = models.TextField()
+ award_frequency = models.IntegerField()
+
+class StackExchangeCloseReason(models.Model):
+ name = models.CharField(max_length=200)
+ description = models.CharField(max_length=256)
+ display_order = models.IntegerField()
+
+class StackExchangeComment2Vote(models.Model):
+ post_comment = models.ForeignKey('StackExchangePostComment', related_name='StackExchangeComment2Vote_post_comment_set', null=True)
+ vote_type = models.ForeignKey('StackExchangeVoteType', related_name='StackExchangeComment2Vote_vote_type_set', null=True)
+ creation_date = models.DateTimeField()
+ user = models.ForeignKey('StackExchangeUser', related_name='StackExchangeComment2Vote_user_set', null=True)
+ ip_address = models.CharField(max_length=40)
+ user_display_name = models.CharField(max_length=40)
+ deletion_date = models.DateTimeField()
+
+class StackExchangeFlatPage(models.Model):
+ name = models.CharField(max_length=50)
+ url = models.CharField(max_length=128)
+ value = models.TextField()
+ content_type = models.CharField(max_length=50)
+ active = models.BooleanField()
+ use_master = models.BooleanField()
+
+class StackExchangeMessage(models.Model):
+ user = models.ForeignKey('StackExchangeUser', related_name='StackExchangeMessage_user_set', null=True)
+ message_type = models.ForeignKey('StackExchangeMessageType', related_name='StackExchangeMessage_message_type_set', null=True)
+ is_read = models.BooleanField()
+ creation_date = models.DateTimeField()
+ text = models.TextField()
+ post = models.ForeignKey('StackExchangePost', related_name='StackExchangeMessage_post_set', null=True)
+
+class StackExchangeMessageType(models.Model):
+ name = models.CharField(max_length=50)
+ description = models.CharField(max_length=300)
+
+class StackExchangeModeratorMessage(models.Model):
+ message_type = models.ForeignKey('StackExchangeMessageType', related_name='StackExchangeModeratorMessage_message_type_set', null=True)
+ creation_date = models.DateTimeField()
+ creation_ip_address = models.CharField(max_length=40)
+ text = models.TextField()
+ user = models.ForeignKey('StackExchangeUser', related_name='StackExchangeModeratorMessage_user_set', null=True)
+ post = models.ForeignKey('StackExchangePost', related_name='StackExchangeModeratorMessage_post_set', null=True)
+ deletion_date = models.DateTimeField()
+ deletion_user = models.ForeignKey('StackExchangeUser', related_name='StackExchangeModeratorMessage_deletion_user_set', null=True)
+ deletion_ip_address = models.CharField(max_length=40)
+ user_display_name = models.CharField(max_length=40)
+
+class StackExchangePostComment(models.Model):
+ post = models.ForeignKey('StackExchangePost', related_name='StackExchangePostComment_post_set', null=True)
+ text = models.TextField()
+ creation_date = models.DateTimeField()
+ ip_address = models.CharField(max_length=15)
+ user = models.ForeignKey('StackExchangeUser', related_name='StackExchangePostComment_user_set', null=True)
+ user_display_name = models.CharField(max_length=30)
+ deletion_date = models.DateTimeField()
+ deletion_user = models.ForeignKey('StackExchangeUser', related_name='StackExchangePostComment_deletion_user_set', null=True)
+ score = models.IntegerField()
+
+class StackExchangePostHistoryType(models.Model):
+ name = models.CharField(max_length=50)
+ description = models.CharField(max_length=300)
+
+class StackExchangePostHistory(models.Model):
+ post_history_type = models.ForeignKey('StackExchangePostHistoryType', related_name='StackExchangePostHistory_post_history_type_set', null=True)
+ post = models.ForeignKey('StackExchangePost', related_name='StackExchangePostHistory_post_set', null=True)
+ revision_guid = models.CharField(max_length=64)
+ creation_date = models.DateTimeField()
+ ip_address = models.CharField(max_length=40)
+ user = models.ForeignKey('StackExchangeUser', related_name='StackExchangePostHistory_user_set', null=True)
+ comment = models.CharField(max_length=400)
+ text = models.TextField()
+ user_display_name = models.CharField(max_length=40)
+ user_email = models.CharField(max_length=100)
+ user_website_url = models.CharField(max_length=200)
+
+class StackExchangePost2Vote(models.Model):
+ post = models.ForeignKey('StackExchangePost', related_name='StackExchangePost2Vote_post_set', null=True)
+ user = models.ForeignKey('StackExchangeUser', related_name='StackExchangePost2Vote_user_set', null=True)
+ vote_type = models.ForeignKey('StackExchangeVoteType', related_name='StackExchangePost2Vote_vote_type_set', null=True)
+ creation_date = models.DateTimeField()
+ deletion_date = models.DateTimeField()
+ target_user = models.ForeignKey('StackExchangeUser', related_name='StackExchangePost2Vote_target_user_set', null=True)
+ target_rep_change = models.IntegerField()
+ voter_rep_change = models.IntegerField()
+ comment = models.CharField(max_length=150)
+ ip_address = models.CharField(max_length=40)
+ linked_post = models.ForeignKey('StackExchangePost', related_name='StackExchangePost2Vote_linked_post_set', null=True)
+
+class StackExchangePost(models.Model):
+ post_type = models.ForeignKey('StackExchangePostType', related_name='StackExchangePost_post_type_set', null=True)
+ creation_date = models.DateTimeField()
+ score = models.IntegerField()
+ view_count = models.IntegerField()
+ body = models.TextField()
+ owner_user = models.ForeignKey('StackExchangeUser', related_name='StackExchangePost_owner_user_set', null=True)
+ last_editor_user = models.ForeignKey('StackExchangeUser', related_name='StackExchangePost_last_editor_user_set', null=True)
+ last_edit_date = models.DateTimeField()
+ last_activity_date = models.DateTimeField()
+ last_activity_user = models.ForeignKey('StackExchangeUser', related_name='StackExchangePost_last_activity_user_set', null=True)
+ parent = models.ForeignKey('self', related_name='StackExchangePost_parent_set', null=True)
+ accepted_answer = models.ForeignKey('self', related_name='StackExchangePost_accepted_answer_set', null=True)
+ title = models.CharField(max_length=250)
+ tags = models.CharField(max_length=150)
+ community_owned_date = models.DateTimeField()
+ history_summary = models.CharField(max_length=150)
+ answer_score = models.IntegerField()
+ answer_count = models.IntegerField()
+ comment_count = models.IntegerField()
+ favorite_count = models.IntegerField()
+ deletion_date = models.DateTimeField()
+ closed_date = models.DateTimeField()
+ locked_date = models.DateTimeField()
+ locked_duration = models.IntegerField()
+ owner_display_name = models.CharField(max_length=40)
+ last_editor_display_name = models.CharField(max_length=40)
+ bounty_amount = models.IntegerField()
+ bounty_closes = models.DateTimeField()
+ bounty_closed = models.DateTimeField()
+ last_owner_email_date = models.DateTimeField()
+
+class StackExchangePostType(models.Model):
+ name = models.CharField(max_length=50)
+ description = models.CharField(max_length=300)
+
+class StackExchangeSchemaVersion(models.Model):
+ version = models.IntegerField()
+
+class StackExchangeSetting(models.Model):
+ key = models.CharField(max_length=256)
+ value = models.TextField()
+
+class StackExchangeSystemMessage(models.Model):
+ user = models.ForeignKey('StackExchangeUser', related_name='StackExchangeSystemMessage_user_set', null=True)
+ creation_date = models.DateTimeField()
+ text = models.TextField()
+ deletion_date = models.DateTimeField()
+ deletion_user = models.ForeignKey('StackExchangeUser', related_name='StackExchangeSystemMessage_deletion_user_set', null=True)
+
+class StackExchangeTag(models.Model):
+ name = models.CharField(max_length=50)
+ count = models.IntegerField()
+ user = models.ForeignKey('StackExchangeUser', related_name='StackExchangeTag_user_set', null=True)
+ creation_date = models.DateTimeField()
+ is_moderator_only = models.BooleanField()
+ is_required = models.BooleanField()
+ aliases = models.CharField(max_length=200)
+
+class StackExchangeThemeResource(models.Model):
+ name = models.CharField(max_length=50)
+ value = models.TextField()
+ content_type = models.CharField(max_length=50)
+ version = models.CharField(max_length=6)
+
+class StackExchangeThemeTextResource(models.Model):
+ name = models.CharField(max_length=50)
+ value = models.TextField()
+ content_type = models.CharField(max_length=50)
+
+class StackExchangeThrottleBucket(models.Model):
+ type = models.CharField(max_length=256)
+ ip_address = models.CharField(max_length=64)
+ tokens = models.IntegerField()
+ last_update = models.DateTimeField()
+
+class StackExchangeUserHistoryType(models.Model):
+ name = models.CharField(max_length=50)
+ description = models.CharField(max_length=300)
+
+class StackExchangeUserHistory(models.Model):
+ user_history_type = models.ForeignKey('StackExchangeUserHistoryType', related_name='StackExchangeUserHistory_user_history_type_set', null=True)
+ creation_date = models.DateTimeField()
+ ip_address = models.CharField(max_length=40)
+ user = models.ForeignKey('StackExchangeUser', related_name='StackExchangeUserHistory_user_set', null=True)
+ comment = models.CharField(max_length=400)
+ user_display_name = models.CharField(max_length=40)
+ moderator_user = models.ForeignKey('StackExchangeUser', related_name='StackExchangeUserHistory_moderator_user_set', null=True)
+ reputation = models.IntegerField()
+
+class StackExchangeUser2Badge(models.Model):
+ user = models.ForeignKey('StackExchangeUser', related_name='StackExchangeUser2Badge_user_set', null=True)
+ badge = models.ForeignKey('StackExchangeBadge', related_name='StackExchangeUser2Badge_badge_set', null=True)
+ date = models.DateTimeField()
+ comment = models.CharField(max_length=50)
+
+class StackExchangeUser2Vote(models.Model):
+ user = models.ForeignKey('StackExchangeUser', related_name='StackExchangeUser2Vote_user_set', null=True)
+ vote_type = models.ForeignKey('StackExchangeVoteType', related_name='StackExchangeUser2Vote_vote_type_set', null=True)
+ target_user = models.ForeignKey('StackExchangeUser', related_name='StackExchangeUser2Vote_target_user_set', null=True)
+ creation_date = models.DateTimeField()
+ deletion_date = models.DateTimeField()
+ ip_address = models.CharField(max_length=40)
+
+class StackExchangeUser(models.Model):
+ user_type = models.ForeignKey('StackExchangeUserType', related_name='StackExchangeUser_user_type_set', null=True)
+ open_id = models.CharField(max_length=200)
+ reputation = models.IntegerField()
+ views = models.IntegerField()
+ creation_date = models.DateTimeField()
+ last_access_date = models.DateTimeField()
+ has_replies = models.BooleanField()
+ has_message = models.BooleanField()
+ opt_in_email = models.BooleanField()
+ opt_in_recruit = models.BooleanField()
+ last_login_date = models.DateTimeField()
+ last_email_date = models.DateTimeField()
+ last_login_ip = models.CharField(max_length=15)
+ open_id_alt = models.CharField(max_length=200)
+ email = models.CharField(max_length=100)
+ display_name = models.CharField(max_length=40)
+ display_name_cleaned = models.CharField(max_length=40)
+ website_url = models.CharField(max_length=200)
+ real_name = models.CharField(max_length=100)
+ location = models.CharField(max_length=100)
+ birthday = models.DateTimeField()
+ badge_summary = models.CharField(max_length=50)
+ about_me = models.TextField()
+ preferences_raw = models.TextField()
+ timed_penalty_date = models.DateTimeField()
+ guid = models.CharField(max_length=64)
+ phone = models.CharField(max_length=20)
+ password_id = models.IntegerField()
+
+class StackExchangeUserType(models.Model):
+ name = models.CharField(max_length=50)
+ description = models.CharField(max_length=300)
+
+class StackExchangeVoteType(models.Model):
+ name = models.CharField(max_length=50)
+ description = models.CharField(max_length=300)
+
diff --git a/stackexchange/parse_models.py b/stackexchange/parse_models.py
new file mode 100644
index 00000000..e83ca0d5
--- /dev/null
+++ b/stackexchange/parse_models.py
@@ -0,0 +1,132 @@
+from xml.etree import ElementTree as et
+import sys
+import re
+
+types = {
+ 'unsignedByte':'models.IntegerField',
+ 'FK':'models.ForeignKey',
+ 'string':'models.CharField',
+ 'text':'models.TextField',
+ 'int':'models.IntegerField',
+ 'boolean':'models.BooleanField',
+ 'dateTime':'models.DateTimeField',
+ 'base64Binary':'models.TextField',
+ 'double':'models.IntegerField',
+}
+
+def camel_to_python(camel):
+ """http://stackoverflow.com/questions/1175208/
+ """
+ s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', camel)
+ return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
+
+def singular(word):
+ if word.endswith('s'):
+ return word[:-1]
+ else:
+ return word
+
+class DjangoModel(object):
+ def __init__(self, name):
+ self.name = 'StackExchange'
+ if name.find('2') == -1:
+ self.name += singular(name)
+ else:
+ bits = name.split('2')
+ bits = map(singular, bits)
+ self.name += '2'.join(bits)
+ self.fields = []
+ def add_field(self,field):
+ field.table = self
+ self.fields.append(field)
+ def __str__(self):
+ out = 'class %s(models.Model):\n' % self.name
+ for f in self.fields:
+ out += ' %s\n' % str(f)
+ return out
+
+class DjangoField(object):
+ def __init__(self, name, type, restriction = None):
+ self.name = camel_to_python(name)
+ if self.name == 'class':
+ self.name = 'class_type'#work around python keyword
+ self.type = type
+ self.table = None
+ self.restriction = restriction
+ self.relation = None
+
+ def __str__(self):
+ out = '%s = %s(' % (self.name, types[self.type])
+ if self.relation and self.restriction:
+ raise Exception('impossible')
+ elif self.relation:
+ out += "'%s'" % self.relation
+ out += ", related_name='%s_%s_set'" % (self.table.name, self.name)
+ out += ', null=True'#nullable to make life easier
+ elif self.restriction != -1:
+ if self.type == 'string':
+ out += 'max_length=%s' % self.restriction
+ else:
+ raise Exception('only max_length restriction is supported')
+ out += ')'
+ return out
+
+class DjangoFK(DjangoField):
+ def __init__(self, source_name):
+ bits = source_name.split('Id')
+ if len(bits) == 2 and bits[1] == '':
+ name = bits[0]
+ super(DjangoFK, self).__init__(name, 'FK')
+ self.set_relation(name)
+
+ def set_relation(self, name):
+ """some relations need to be mapped
+ to actual tables
+ """
+ self.relation = 'StackExchange'
+ if name.endswith('User'):
+ self.relation += 'User'
+ elif name.endswith('Post'):
+ self.relation += 'Post'
+ elif name in ('AcceptedAnswer','Parent'):
+ self.relation = 'self' #self-referential Post model
+ else:
+ self.relation += name
+
+def get_col_type(col):
+ type = col.get('type')
+ restriction = -1
+ if type == None:
+ type_e = col.find('.//simpleType/restriction')
+ type = type_e.get('base')
+ try:
+ restriction = int(type_e.getchildren()[0].get('value'))
+ except:
+ restriction = -1
+ if restriction > 400:
+ type = 'text'
+ restriction = -1
+ return type, restriction
+
+print 'from django.db import models'
+for file in sys.argv:
+ if '.xsd' in file:
+ tname = file.replace('.xsd','')
+ tree = et.parse(file)
+
+ model = DjangoModel(tname)
+
+ row = tree.find('.//sequence')
+ for col in row.getchildren():
+ name = col.get('name')
+ if name in ('Id', 'LinkedVoteId'):#second one is not used
+ continue
+ elif name.endswith('Id') and name not in ('OpenId','PasswordId'):
+ field = DjangoFK(name)
+ elif name.endswith('GUID'):
+ field = DjangoField(name, 'string', 64)
+ else:
+ type, restriction = get_col_type(col)
+ field = DjangoField(name, type, restriction)
+ model.add_field(field)
+ print model