summaryrefslogtreecommitdiffstats
path: root/stackexchange
diff options
context:
space:
mode:
authorEvgeny Fadeev <evgeny.fadeev@gmail.com>2010-04-24 19:11:05 -0400
committerEvgeny Fadeev <evgeny.fadeev@gmail.com>2010-04-24 19:11:05 -0400
commit3b8821b7e6c00a34fe71b22f011841786bb9d7e5 (patch)
treed0216d50ede25086efe89aa41e9a4aeac07b1094 /stackexchange
parent9dac785a3416e3472a69155247ad5d5af58636db (diff)
downloadaskbot-3b8821b7e6c00a34fe71b22f011841786bb9d7e5.tar.gz
askbot-3b8821b7e6c00a34fe71b22f011841786bb9d7e5.tar.bz2
askbot-3b8821b7e6c00a34fe71b22f011841786bb9d7e5.zip
moved some files and directories inside forum from root
Diffstat (limited to 'stackexchange')
-rw-r--r--stackexchange/ANOMALIES14
-rw-r--r--stackexchange/README62
-rw-r--r--stackexchange/__init__.py0
-rw-r--r--stackexchange/management/__init__.py0
-rw-r--r--stackexchange/management/commands/__init__.py0
-rw-r--r--stackexchange/management/commands/load_stackexchange.py804
-rw-r--r--stackexchange/models.py266
-rw-r--r--stackexchange/parse_models.py225
8 files changed, 0 insertions, 1371 deletions
diff --git a/stackexchange/ANOMALIES b/stackexchange/ANOMALIES
deleted file mode 100644
index 05a7dbdb..00000000
--- a/stackexchange/ANOMALIES
+++ /dev/null
@@ -1,14 +0,0 @@
-* several user accounts with same email
-* users with no openid
-* users with no email (hack: gravatar set to settings.ANONYMOUS_USER_EMAIL)
-* users with no screen name
-* users with no email and no screen name (25% in homeschool)
-* tag preferences are not stored explicitly (interesting/ignored)
- maybe they are in se.User.preferences_raw
- but the data there is not marked up and is kind of cryptic
-* we don't have Community user. SE has one with id=-1
- this id may break the load script
- potential break places are anywhere where is X.get_user() call
- issues may happen with larger data sets where activity
- of user "Community" is somehow reflected in a way
- that load_stackexchange does not take care of
diff --git a/stackexchange/README b/stackexchange/README
deleted file mode 100644
index 64d8f5fb..00000000
--- a/stackexchange/README
+++ /dev/null
@@ -1,62 +0,0 @@
-this app's function will be to:
-
-* install it's own tables (#todo: not yet automated)
-* read SE xml dump into DjangoDB (automated)
-* populate osqa database (automated)
-* remove SE tables (#todo: not done yet)
-
-Current process to load SE data into OSQA is:
-==============================================
-
-1) backup database
-
-2) unzip SE dump into dump_dir (any directory name)
- you may want to make sure that your dump directory in .gitignore file
- so that you don't publish it by mistake
-
-3) enable 'stackexchange' in the list of installed apps (probably aready in settings.py)
-
-4) (optional - create models.py for SE, which is included anyway) run:
-
- #a) run in-place removal of xml namspace prefix to make parsing easier
- perl -pi -w -e 's/xs://g' $SE_DUMP_PATH/xsd/*.xsd
- cd stackexchange
- python parse_models.py $SE_DUMP_PATH/xsd/*.xsd > models.py
-
-5) Install stackexchange models (as well as any other missing models)
- python manage.py syncdb
-
-6) make sure that osqa badges are installed
- if not, run (example for mysql):
-
- mysql -u user -p dbname < sql_scripts/badges.sql
-
-7) load SE data:
-
- python manage.py load_stackexchange dump_dir
-
- if anything doesn't go right - run 'python manage.py flush' and repeat
- steps 6 and 7
-
-NOTES:
-============
-
-Here is the load script that I used for the testing
-it assumes that SE dump has been unzipped inside the tmp directory
-
- #!/bin/sh$
- python manage.py flush
- #delete all data
- mysql -u osqa -p osqa < sql_scripts/badges.sql
- python manage.py load_stackexchange tmp
-
-Untested parts are tagged with comments starting with
-#todo:
-
-The test set did not have all the usage cases of StackExchange represented so
-it may break with other sets.
-
-The job takes some time to run, especially
-content revisions and votes - may be optimized
-
-Some of the fringe cases are described in file stackexchange/ANOMALIES
diff --git a/stackexchange/__init__.py b/stackexchange/__init__.py
deleted file mode 100644
index e69de29b..00000000
--- a/stackexchange/__init__.py
+++ /dev/null
diff --git a/stackexchange/management/__init__.py b/stackexchange/management/__init__.py
deleted file mode 100644
index e69de29b..00000000
--- a/stackexchange/management/__init__.py
+++ /dev/null
diff --git a/stackexchange/management/commands/__init__.py b/stackexchange/management/commands/__init__.py
deleted file mode 100644
index e69de29b..00000000
--- a/stackexchange/management/commands/__init__.py
+++ /dev/null
diff --git a/stackexchange/management/commands/load_stackexchange.py b/stackexchange/management/commands/load_stackexchange.py
deleted file mode 100644
index 9cfc7192..00000000
--- a/stackexchange/management/commands/load_stackexchange.py
+++ /dev/null
@@ -1,804 +0,0 @@
-from django.core.management.base import BaseCommand
-from django.template.defaultfilters import slugify #todo: adopt unicode-aware slugify
-#todo: http://stackoverflow.com/questions/837828/how-to-use-a-slug-in-django
-import os
-import re
-import sys
-import stackexchange.parse_models as se_parser
-from xml.etree import ElementTree as et
-from django.db import models
-import forum.models as askbot
-import stackexchange.models as se
-from forum.forms import EditUserEmailFeedsForm
-from forum.utils.html import sanitize_html
-from django.conf import settings
-from django.contrib.auth.models import Message as DjangoMessage
-from django.utils.translation import ugettext as _
-#from markdown2 import Markdown
-#markdowner = Markdown(html4tags=True)
-
-xml_read_order = (
- 'VoteTypes','UserTypes','Users','Users2Votes',
- 'Badges','Users2Badges','CloseReasons','FlatPages',
- 'MessageTypes','PostHistoryTypes','PostTypes','SchemaVersion',
- 'Settings','SystemMessages','ThemeResources','ThemeTextResources',
- 'ThrottleBucket','UserHistoryTypes','UserHistory',
- 'Users2Badges','VoteTypes','Users2Votes','MessageTypes',
- 'Posts','Posts2Votes','PostHistory','PostComments',
- 'ModeratorMessages','Messages','Comments2Votes',
- )
-
-#association tables SE item id --> ASKBOT item id
-#table associations are implied
-#todo: there is an issue that these may be inconsistent with the database
-USER = {}#SE User.id --> django(ASKBOT) User.id
-QUESTION = {}
-ANSWER = {}
-NAMESAKE_COUNT = {}# number of times user name was used - for X.get_screen_name
-
-class X(object):#
- """class with methods for handling some details
- of SE --> ASKBOT mapping
- """
- badge_type_map = {'1':'gold','2':'silver','3':'bronze'}
-
- askbot_supported_id_providers = (
- 'google','yahoo','aol','myopenid',
- 'flickr','technorati',
- 'wordpress','blogger','livejournal',
- 'claimid','vidoop','verisign',
- 'openidurl','facebook','local',
- 'twitter' #oauth is not on this list, b/c it has no own url
- )
-
- #map SE VoteType -> askbot.User vote method
- #created methods with the same call structure in askbot.User
- #User.<vote_method>(post, timestamp=None, cancel=False)
- vote_actions = {
- 'UpMod':'upvote',
- 'DownMod':'downvote',
- 'AcceptedByOriginator':'accept_answer',
- 'Offensive':'flag_post',
- 'Favorite':'toggle_favorite_question',
- }
-
- #these modes cannot be mixed
- #only wiki is assumed to be mixable
- exclusive_revision_modes = (
- 'initial','edit','rollback','lock',
- 'migrate','close','merge','delete',
- )
-
- #badges whose names don't match exactly, but
- #present in both SE and ASKBOT
- badge_exceptions = {# SE --> ASKBOT
- 'Citizen Patrol':'Citizen patrol',#single #todo: why sentence case?
- 'Strunk &amp; White':'Strunk & White',#single
- 'Civic Duty':'Civic duty',
- }
-
- revision_type_map = {
- 'Initial Title':'initial',
- 'Initial Body':'initial',
- 'Initial Tags':'initial',
- 'Edit Title':'edit',
- 'Edit Body':'edit',
- 'Edit Tags':'edit',
- 'Rollback Title':'rollback',
- 'Rollback Body':'rollback',
- 'Rollback Tags':'rollback',
- 'Post Closed':'close',
- 'Post Reopened':'close',
- 'Post Deleted':'delete',
- 'Post Undeleted':'delete',
- 'Post Locked':'lock',
- 'Post Unlocked':'lock',
- 'Community Owned':'wiki',
- 'Post Migrated':'migrate',
- 'Question Merged':'merge',
- }
-
- close_reason_map = {
- 1:1,#duplicate
- 2:2,#off-topic
- 3:3,#subjective and argumentative
- 4:4,#not a real question
- 5:7,#offensive
- 6:6,#irrelevant or outdated question
- 7:9,#too localized
- 10:8,#spam
- }
-
- @classmethod
- def get_message_text(cls, se_m):
- """try to intelligently translate
- SE message to ASKBOT so that it makese sense in
- our context
- """
- #todo: properly translate messages
- #todo: maybe work through more instances of messages
- if se_m.message_type.name == 'Badge Notification':
- return se_m.text
- else:
- if 'you are now an administrator' in se_m.text:
- return _('Congratulations, you are now an Administrator')
- elif re.search(r'^You have \d+ new',se_m.text):
- bits = se_m.text.split('.')
- text = bits[0]
- if se_m.user.id == -1:
- return None
- url = cls.get_user(se_m.user).get_profile_url()
- return '<a href="%s?sort=responses">%s</a>' % (url,text)
- return None
-
- @classmethod
- def get_post(cls, se_post):
- #todo: fix this hack - either in-memory id association table
- #or use database to store these associations
- post_type = se_post.post_type.name
- if post_type == 'Question':
- return askbot.Question.objects.get(id=QUESTION[se_post.id].id)
- elif post_type == 'Answer':
- return askbot.Answer.objects.get(id=ANSWER[se_post.id].id)
- else:
- raise Exception('unknown post type %s' % post_type)
-
- @classmethod
- def get_close_reason(cls, se_reason):
- #todo: this is a guess - have not seen real data
- se_reason = int(se_reason)
- return cls.close_reason_map[se_reason]
-
- @classmethod
- def get_user(cls, se_user):
- #todo: same as get_post
- return askbot.User.objects.get(id=USER[se_user.id].id)
-
- @classmethod
- def get_post_revision_group_types(cls, rev_group):
- rev_types = {}
- for rev in rev_group:
- rev_type = cls.get_post_revision_type(rev)
- rev_types[rev_type] = 1
- rev_types = rev_types.keys()
-
- #make sure that exclusive rev modes are not mixed
- exclusive = cls.exclusive_revision_modes
- if len(rev_types) > 1 and all([t in exclusive for t in rev_types]):
- tstr = ','.join(rev_types)
- gstr = ','.join([str(rev.id) for rev in rev_group])
- msg = 'incompatible revision types %s in PostHistory %s' % (tstr,gstr)
- raise Exception(msg)
- return rev_types
-
- @classmethod
- def clean_tags(cls, tags):
- tags = re.subn(r'\s+',' ',tags.strip())[0]
- bits = tags.split(' ')
- tags = ' '.join([bit[1:-1] for bit in bits])
- tags = re.subn(r'\xf6','-',tags)[0]
- return tags
-
- @classmethod
- def get_screen_name(cls, name):
- """always returns unique screen name
- even if there are multiple users in SE
- with the same exact screen name
- """
- if name is None:
- name = 'anonymous'
- name = name.strip()
- name = re.subn(r'\s+',' ',name)[0]#remove repeating spaces
-
- try:
- u = askbot.User.objects.get(username = name)
- try:
- if u.location:
- name += ', %s' % u.location
- if name in NAMESAKE_COUNT:
- NAMESAKE_COUNT[name] += 1
- name += ' %d' % NAMESAKE_COUNT[name]
- else:
- NAMESAKE_COUNT[name] = 1
- except askbot.User.DoesNotExist:
- pass
- except askbot.User.DoesNotExist:
- NAMESAKE_COUNT[name] = 1
- return name
-
- @classmethod
- def get_email(cls, email):#todo: fix fringe case - user did not give email!
- if email is None:
- return settings.ANONYMOUS_USER_EMAIL
- else:
- assert(email != '')
- return email
-
- @classmethod
- def get_post_revision_type(cls, rev):
- rev_name = rev.post_history_type.name
- rev_type = cls.revision_type_map.get(rev_name, None)
- if rev_type is None:
- raise Exception('dont understand revision type %s' % rev)
- return rev_type
-
- #crude method of getting id provider name from the url
- @classmethod
- def get_openid_provider_name(cls, openid_url):
- openid_str = str(openid_url)
- bits = openid_str.split('/')
- base_url = bits[2] #assume this is base url
- url_bits = base_url.split('.')
- provider_name = url_bits[-2].lower()
- if provider_name not in cls.askbot_supported_id_providers:
- raise Exception('could not determine login provider for %s' % openid_url)
- return provider_name
-
- @staticmethod
- def blankable(input):
- if input is None:
- return ''
- else:
- return input
-
- @classmethod
- def parse_badge_summary(cls, badge_summary):
- (gold,silver,bronze) = (0,0,0)
- if badge_summary:
- if len(badge_summary) > 3:
- print 'warning: guessing that badge summary is comma separated'
- print 'have %s' % badge_summary
- bits = badge_summary.split(',')
- else:
- bits = [badge_summary]
- for bit in bits:
- m = re.search(r'^(?P<type>[1-3])=(?P<count>\d+)$', bit)
- if not m:
- raise Exception('could not parse badge summary: %s' % badge_summary)
- else:
- badge_type = cls.badge_type_map[m.groupdict()['type']]
- locals()[badge_type] = int(m.groupdict()['count'])
- return (gold,silver,bronze)
-
- @classmethod
- def get_badge_name(cls, name):
- return cls.badge_exceptions.get(name, name)
-
-class Command(BaseCommand):
- help = 'Loads StackExchange data from unzipped directory of XML files into the ASKBOT database'
- args = 'se_dump_dir'
-
- def handle(self, *arg, **kwarg):
- if len(arg) < 1 or not os.path.isdir(arg[0]):
- print 'Error: first argument must be a directory with all the SE *.xml files'
- sys.exit(1)
-
- self.dump_path = arg[0]
- #read the data into SE tables
- for xml in xml_read_order:
- xml_path = self.get_xml_path(xml)
- table_name = self.get_table_name(xml)
- self.load_xml_file(xml_path, table_name)
-
- #this is important so that when we clean up messages
- #automatically generated by the procedures below
- #we do not delete old messages
- #todo: unfortunately this may need to be redone
- #when we upgrade to django 1.2 and definitely by 1.4 when
- #the current message system will be replaced with the
- #django messages framework
- self.save_askbot_message_id_list()
-
- #transfer data into ASKBOT tables
- print 'Transferring users...',
- sys.stdout.flush()
- self.transfer_users()
- print 'done.'
- print 'Transferring content edits...',
- sys.stdout.flush()
- self.transfer_question_and_answer_activity()
- print 'done.'
- print 'Transferring view counts...',
- sys.stdout.flush()
- self.transfer_question_view_counts()
- print 'done.'
- print 'Transferring comments...',
- sys.stdout.flush()
- self.transfer_comments()
- print 'done.'
- print 'Transferring badges and badge awards...',
- sys.stdout.flush()
- self.transfer_badges()
- print 'done.'
- print 'Transferring votes...',
- sys.stdout.flush()
- self.transfer_votes()#includes favorites, accepts and flags
- print 'done.'
-
- self.cleanup_messages()#delete autogenerated messages
- self.transfer_messages()
-
- #todo: these are not clear how to go about
- self.transfer_update_subscriptions()
- self.transfer_tag_preferences()
- self.transfer_meta_pages()
-
- def save_askbot_message_id_list(self):
- id_list = list(DjangoMessage.objects.all().values('id'))
- self._askbot_message_id_list = id_list
-
- def cleanup_messages(self):
- """deletes messages generated by the load process
- """
- id_list = self._askbot_message_id_list
- mset = DjangoMessage.objects.all().exclude(id__in=id_list)
- mset.delete()
-
- def transfer_messages(self):
- """transfers some messages from
- SE to ASKBOT
- """
- for m in se.Message.objects.all():
- if m.is_read:
- continue
- if m.user.id == -1:
- continue
- u = X.get_user(m.user)
- text = X.get_message_text(m)
- if text:
- u.message_set.create(
- message=text,
- )
-
- def _process_post_initial_revision_group(self, rev_group):
-
- title = None
- text = None
- tags = None
- wiki = False
- author = USER[rev_group[0].user.id]
- added_at = rev_group[0].creation_date
-
- for rev in rev_group:
- rev_type = rev.post_history_type.name
- if rev_type == 'Initial Title':
- title = rev.text
- elif rev_type == 'Initial Body':
- text = rev.text
- elif rev_type == 'Initial Tags':
- tags = X.clean_tags(rev.text)
- elif rev_type == 'Community Owned':
- wiki = True
- else:
- raise Exception('unexpected revision type %s' % rev_type)
-
- post_type = rev_group[0].post.post_type.name
- if post_type == 'Question':
- q = askbot.Question.objects.create_new(
- title = title,
- author = author,
- added_at = added_at,
- wiki = wiki,
- tagnames = tags,
- text = text
- )
- QUESTION[rev_group[0].post.id] = q
- elif post_type == 'Answer':
- q = X.get_post(rev_group[0].post.parent)
- a = askbot.Answer.objects.create_new(
- question = q,
- author = author,
- added_at = added_at,
- wiki = wiki,
- text = text,
- )
- ANSWER[rev_group[0].post.id] = a
- else:
- post_id = rev_group[0].post.id
- raise Exception('unknown post type %s for id=%d' % (post_type, post_id))
-
- def _process_post_edit_revision_group(self, rev_group):
- #question apply edit
- (title, text, tags) = (None, None, None)
- for rev in rev_group:
- rev_type = rev.post_history_type.name
- if rev_type == 'Edit Title':
- title = rev.text
- elif rev_type == 'Edit Body':
- text = rev.text
- elif rev_type == 'Edit Tags':
- tags = X.clean_tags(rev.text)
- elif rev_type == 'Community Owned':
- pass
- else:
- raise Exception('unexpected revision type %s' % rev_type)
-
- rev0 = rev_group[0]
- edited_by = USER[rev0.user.id]
- edited_at = rev0.creation_date
- comment = ';'.join([rev.comment for rev in rev_group if rev.comment])
- post_type = rev0.post.post_type.name
-
- if post_type == 'Question':
- q = X.get_post(rev0.post)
- q.apply_edit(
- edited_at = edited_at,
- edited_by = edited_by,
- title = title,
- text = text,
- comment = comment,
- tags = tags,
- )
- elif post_type == 'Answer':
- a = X.get_post(rev0.post)
- a.apply_edit(
- edited_at = edited_at,
- edited_by = edited_by,
- text = text,
- comment = comment,
- )
-
- def _make_post_wiki(self, rev_group):
- #todo: untested
- for rev in rev_group:
- if rev.post_history_type.name == 'Community Owned':
- p = X.get_post(rev.post)
- u = X.get_user(rev.user)
- t = rev.creation_date
- p.wiki = True
- p.wikified_at = t
- p.wikified_by = u
- self.mark_activity(p,u,t)
- p.save()
- return
-
- def mark_activity(self,p,u,t):
- """p,u,t - post, user, timestamp
- """
- if isinstance(p, askbot.Question):
- p.last_activity_by = u
- p.last_activity_at = t
- elif isinstance(p, askbot.Answer):
- p.question.last_activity_by = u
- p.question.last_activity_at = t
- p.question.save()
-
- def _process_post_rollback_revision_group(self, rev_group):
- #todo: don't know what to do here as there were no
- #such data available
- pass
-
- def _process_post_lock_revision_group(self, rev_group):
- #todo: untested
- for rev in rev_group:
- rev_type = rev.post_history_type.name
- if rev_type.endswith('ocked'):
- t = rev.creation_date
- u = X.get_user(rev.user)
- p = X.get_post(rev.post)
- if rev_type == 'Post Locked':
- p.locked = True
- p.locked_by = u
- p.locked_at = t
- elif rev_type == 'Post Unlocked':
- p.locked = False
- p.locked_by = None
- p.locked_at = None
- else:
- return
- self.mark_activity(p,u,t)
- p.save()
- return
-
- def _process_post_close_revision_group(self, rev_group):
- #todo: untested
- for rev in rev_group:
- if rev.post.post_type.name != 'Question':
- return
- rev_type = rev.post_history_type.name
- if rev_type in ('Post Closed', 'Post Reopened'):
- t = rev.creation_date
- u = X.get_user(rev.user)
- p = X.get_post(rev.post)
- if rev_type == 'Post Closed':
- p.closed = True
- p.closed_at = t
- p.closed_by = u
- p.close_reason = X.get_close_reason(rev.text)
- elif rev_type == 'Post Reopened':
- p.closed = False
- p.closed_at = None
- p.closed_by = None
- p.close_reason = None
- self.mark_activity(p,u,t)
- p.save()
- return
-
- def _process_post_delete_revision_group(self, rev_group):
- #todo: untested
- for rev in rev_group:
- rev_type = rev.post_history_type.name
- if rev_type.endswith('eleted'):
- t = rev.creation_date
- u = X.get_user(rev.user)
- p = X.get_post(rev.post)
- if rev_type == 'Post Deleted':
- p.deleted = True
- p.deleted_at = t
- p.deleted_by = u
- elif rev_type == 'Post Undeleted':
- p.deleted = False
- p.deleted_at = None
- p.deleted_by = None
- self.mark_activity(p,u,t)
- p.save()
- return
-
- def _process_post_revision_group(self, rev_group):
- #determine revision type
- #'initial','edit','rollback','lock',
- #'migrate','close','merge','delete',
- rev_types = X.get_post_revision_group_types(rev_group)
- if 'initial' in rev_types:
- self._process_post_initial_revision_group(rev_group)
- elif 'edit' in rev_types:
- self._process_post_edit_revision_group(rev_group)
- elif 'rollback' in rev_types:
- self._process_post_rollback_revision_group(rev_group)
- elif 'lock' in rev_types:
- self._process_post_lock_revision_group(rev_group)
- elif 'close' in rev_types:
- self._process_post_close_revision_group(rev_group)
- elif 'delete' in rev_types:
- self._process_post_delete_revision_group(rev_group)
- else:
- pass
- #todo: rollback, lock, close and delete are
- #not tested
- #merge and migrate actions are ignored
- #wiki is mixable with other groups, so process it in addition
- if 'wiki' in rev_types:
- self._make_post_wiki(rev_group)
-
- def transfer_tag_preferences(self):
- #todo: figure out where these are stored in SE
- #maybe in se.User.preferences_raw?
- pass
-
- def transfer_question_and_answer_activity(self):
- """transfers all question and answer
- edits and related status changes
- """
- #assuming that there are only two post types
- se_revs = se.PostHistory.objects.all()
- #assuming that chronologial order is correct and there
- #will be no problems of data integrity upon insertion of records
- se_revs = se_revs.order_by('creation_date','revision_guid')
- #todo: ignored fringe case - no revisions
- c_guid = se_revs[0].revision_guid
- c_group = []
- #this loop groups revisions by revision id, then calls process function
- #for the revision grup (elementary revisions posted at once)
- for se_rev in se_revs:
- if se_rev.revision_guid == c_guid:
- c_group.append(se_rev)
- else:
- self._process_post_revision_group(c_group)
- c_group = []
- c_group.append(se_rev)
- c_guid = se_rev.revision_guid
- if len(c_group) != 0:
- self._process_post_revision_group(c_group)
-
- def transfer_comments(self):
- for se_c in se.PostComment.objects.all():
- if se_c.deletion_date:
- print 'Warning deleted comment %d dropped' % se_c.id
- continue
- se_post = se_c.post
- if se_post.post_type.name == 'Question':
- askbot_post = QUESTION[se_post.id]
- elif se_post.post_type.name == 'Answer':
- askbot_post = ANSWER[se_post.id]
-
- askbot_post.add_comment(
- comment = se_c.text,
- added_at = se_c.creation_date,
- user = USER[se_c.user.id]
- )
-
- def _install_missing_badges(self):
- self._missing_badges = {}
- for se_b in se.Badge.objects.all():
- name = X.get_badge_name(se_b.name)
- try:
- askbot.Badge.objects.get(name=name)
- except:
- self._missing_badges[name] = 0
- if len(se_b.description) > 300:
- print 'Warning truncated description for badge %d' % se_b.id
- askbot.Badge.objects.create(
- name = name,
- slug = slugify(name),
- description = se_b.description,
- multiple = (not se_b.single),
- type = se_b.class_type
- )
-
- def _award_badges(self):
- #note: SE does not keep information on
- #content-related badges like askbot does
- for se_a in se.User2Badge.objects.all():
- if se_a.user.id == -1:
- continue #skip community user
- u = USER[se_a.user.id]
- badge_name = X.get_badge_name(se_a.badge.name)
- b = askbot.Badge.objects.get(name=badge_name)
- if b.multiple == False:
- if b.award_badge.count() > 0:
- continue
- #todo: fake content object here b/c SE does not support this
- #todo: but askbot requires related content object
- askbot.Award.objects.create(
- user=u,
- badge=b,
- awarded_at=se_a.date,
- content_object=u,
- )
- if b.name in self._missing_badges:
- self._missing_badges[b.name] += 1
-
- def _cleanup_badges(self):
- d = self._missing_badges
- unused = [name for name in d.keys() if d[name] == 0]
- askbot.Badge.objects.filter(name__in=unused).delete()
- installed = [name for name in d.keys() if d[name] > 0]
- print 'Warning - following unsupported badges were installed:'
- print ', '.join(installed)
-
- def transfer_badges(self):
- #note: badge level is neglected
- #1) install missing badges
- self._install_missing_badges()
- #2) award badges
- self._award_badges()
- #3) delete unused newly installed badges
- self._cleanup_badges()
- pass
-
- def transfer_question_view_counts(self):
- for se_q in se.Post.objects.filter(post_type__name='Question'):
- q = X.get_post(se_q)
- q.view_count = se_q.view_count
- q.save()
-
-
- def transfer_votes(self):
- for v in se.Post2Vote.objects.all():
- vote_type = v.vote_type.name
- if not vote_type in X.vote_actions:
- continue
-
- u = X.get_user(v.user)
- p = X.get_post(v.post)
- m = X.vote_actions[vote_type]
- vote_method = getattr(askbot.User, m)
- vote_method(u, p, timestamp = v.creation_date)
- if v.deletion_date:
- vote_method(u, p, timestamp = v.deletion_date, cancel=True)
-
- def transfer_update_subscriptions(self):
- #todo: not clear where this is stored in SE
- #maybe in se.User.preferences_raw?
- pass
-
- def transfer_meta_pages(self):
- #here we actually don't have anything in the database yet
- #so we can't do this
- pass
-
- def load_xml_file(self, xml_path, table_name):
- tree = et.parse(xml_path)
- print 'loading from %s to %s' % (xml_path, table_name) ,
- model = models.get_model('stackexchange', table_name)
- i = 0
- for row in tree.findall('.//row'):
- model_entry = model()
- i += 1
- for col in row.getchildren():
- field_name = se_parser.parse_field_name(col.tag)
- field_type = model._meta.get_field(field_name)
- field_value = se_parser.parse_value(col.text, field_type)
- setattr(model_entry, field_name, field_value)
- model_entry.save()
- print '... %d objects saved' % i
-
- def get_table_name(self,xml):
- return se_parser.get_table_name(xml)
-
- def get_xml_path(self, xml):
- xml_path = os.path.join(self.dump_path, xml + '.xml')
- if not os.path.isfile(xml_path):
- print 'Error: file %s not found' % xml_path
- sys.exit(1)
- return xml_path
-
- def transfer_users(self):
- for se_u in se.User.objects.all():
- if se_u.id < 1:#skip the Community user
- continue
- u = askbot.User()
- u_type = se_u.user_type.name
- if u_type == 'Administrator':
- u.is_superuser = True
- elif u_type == 'Moderator':
- u.is_staff = True
- elif u_type not in ('Unregistered', 'Registered'):
- raise Exception('unknown user type %s' % u_type)
-
- #if user is not registered, no association record created
- #we do not allow posting by users who are not authenticated
- #probably they'll just have to "recover" their account by email
- if u_type != 'Unregistered':
- assert(se_u.open_id)#everybody must have open_id
- u_auth = askbot.AuthKeyUserAssociation()
- u_auth.key = se_u.open_id
- u_auth.provider = X.get_openid_provider_name(se_u.open_id)
- u_auth.added_at = se_u.creation_date
-
- if se_u.open_id is None and se_u.email is None:
- print 'Warning: SE user %d is not recoverable (no email or openid)'
-
- u.reputation = 1#se_u.reputation, it's actually re-computed
- u.last_seen = se_u.last_access_date
- u.email = X.get_email(se_u.email)
- u.location = X.blankable(se_u.location)
- u.date_of_birth = se_u.birthday #dattime -> date
- u.website = X.blankable(se_u.website_url)
- u.about = X.blankable(se_u.about_me)
- u.last_login = se_u.last_login_date
- u.date_joined = se_u.creation_date
- u.is_active = True #todo: this may not be the case
-
- u.username = X.get_screen_name(se_u.display_name)
- u.real_name = X.blankable(se_u.real_name)
-
- (gold,silver,bronze) = X.parse_badge_summary(se_u.badge_summary)
- u.gold = gold
- u.silver = silver
- u.bronze = bronze
-
- #todo: we don't have these fields
- #views - number of profile views?
- #has_replies
- #has_message
- #opt_in_recruit
- #last_login_ip
- #open_id_alt - ??
- #preferences_raw - not clear how to use
- #display_name_cleaned - lowercased, srtipped name
- #timed_penalty_date
- #phone
-
- #don't know how to handle these - there was no usage example
- #password_id
- #guid
-
- #ignored
- #last_email_date - this translates directly to EmailFeedSetting.reported_at
-
- #save the data
- u.save()
- form = EditUserEmailFeedsForm()
- form.reset()
- if se_u.opt_in_email == True:#set up daily subscription on "own" items
- form.initial['individually_selected'] = 'd'
- form.initial['asked_by_me'] = 'd'
- form.initial['answered_by_me'] = 'd'
- #
- form.save(user=u, save_unbound=True)
-
- if 'u_auth' in locals():
- u_auth.user = u
- u_auth.save()
- USER[se_u.id] = u
diff --git a/stackexchange/models.py b/stackexchange/models.py
deleted file mode 100644
index a30a9859..00000000
--- a/stackexchange/models.py
+++ /dev/null
@@ -1,266 +0,0 @@
-from django.db import models
-class Badge(models.Model):
- id = models.IntegerField(primary_key=True)
- class_type = models.IntegerField(null=True)
- name = models.CharField(max_length=50, null=True)
- description = models.TextField(null=True)
- single = models.NullBooleanField(null=True)
- secret = models.NullBooleanField(null=True)
- tag_based = models.NullBooleanField(null=True)
- command = models.TextField(null=True)
- award_frequency = models.IntegerField(null=True)
-
-class CloseReason(models.Model):
- id = models.IntegerField(primary_key=True)
- name = models.CharField(max_length=200, null=True)
- description = models.CharField(max_length=256, null=True)
- display_order = models.IntegerField(null=True)
-
-class Comment2Vote(models.Model):
- id = models.IntegerField(primary_key=True)
- post_comment = models.ForeignKey('PostComment', related_name='Comment2Vote_by_post_comment_set', null=True)
- vote_type = models.ForeignKey('VoteType', related_name='Comment2Vote_by_vote_type_set', null=True)
- creation_date = models.DateTimeField(null=True)
- user = models.ForeignKey('User', related_name='Comment2Vote_by_user_set', null=True)
- ip_address = models.CharField(max_length=40, null=True)
- user_display_name = models.CharField(max_length=40, null=True)
- deletion_date = models.DateTimeField(null=True)
-
-class FlatPage(models.Model):
- id = models.IntegerField(primary_key=True)
- name = models.CharField(max_length=50, null=True)
- url = models.CharField(max_length=128, null=True)
- value = models.TextField(null=True)
- content_type = models.CharField(max_length=50, null=True)
- active = models.NullBooleanField(null=True)
- use_master = models.NullBooleanField(null=True)
-
-class Message(models.Model):
- id = models.IntegerField(primary_key=True)
- user = models.ForeignKey('User', related_name='Message_by_user_set', null=True)
- message_type = models.ForeignKey('MessageType', related_name='Message_by_message_type_set', null=True)
- is_read = models.NullBooleanField(null=True)
- creation_date = models.DateTimeField(null=True)
- text = models.TextField(null=True)
- post = models.ForeignKey('Post', related_name='Message_by_post_set', null=True)
-
-class MessageType(models.Model):
- id = models.IntegerField(primary_key=True)
- name = models.CharField(max_length=50, null=True)
- description = models.CharField(max_length=300, null=True)
-
-class ModeratorMessage(models.Model):
- id = models.IntegerField(primary_key=True)
- message_type = models.ForeignKey('MessageType', related_name='ModeratorMessage_by_message_type_set', null=True)
- creation_date = models.DateTimeField(null=True)
- creation_ip_address = models.CharField(max_length=40, null=True)
- text = models.TextField(null=True)
- user = models.ForeignKey('User', related_name='ModeratorMessage_by_user_set', null=True)
- post = models.ForeignKey('Post', related_name='ModeratorMessage_by_post_set', null=True)
- deletion_date = models.DateTimeField(null=True)
- deletion_user = models.ForeignKey('User', related_name='ModeratorMessage_by_deletion_user_set', null=True)
- deletion_ip_address = models.CharField(max_length=40, null=True)
- user_display_name = models.CharField(max_length=40, null=True)
-
-class PostComment(models.Model):
- id = models.IntegerField(primary_key=True)
- post = models.ForeignKey('Post', related_name='PostComment_by_post_set', null=True)
- text = models.TextField(null=True)
- creation_date = models.DateTimeField(null=True)
- ip_address = models.CharField(max_length=15, null=True)
- user = models.ForeignKey('User', related_name='PostComment_by_user_set', null=True)
- user_display_name = models.CharField(max_length=30, null=True)
- deletion_date = models.DateTimeField(null=True)
- deletion_user = models.ForeignKey('User', related_name='PostComment_by_deletion_user_set', null=True)
- score = models.IntegerField(null=True)
-
-class PostHistoryType(models.Model):
- id = models.IntegerField(primary_key=True)
- name = models.CharField(max_length=50, null=True)
- description = models.CharField(max_length=300, null=True)
-
-class PostHistory(models.Model):
- id = models.IntegerField(primary_key=True)
- post_history_type = models.ForeignKey('PostHistoryType', related_name='PostHistory_by_post_history_type_set', null=True)
- post = models.ForeignKey('Post', related_name='PostHistory_by_post_set', null=True)
- revision_guid = models.CharField(max_length=64, null=True)
- creation_date = models.DateTimeField(null=True)
- ip_address = models.CharField(max_length=40, null=True)
- user = models.ForeignKey('User', related_name='PostHistory_by_user_set', null=True)
- comment = models.CharField(max_length=400, null=True)
- text = models.TextField(null=True)
- user_display_name = models.CharField(max_length=40, null=True)
- user_email = models.CharField(max_length=100, null=True)
- user_website_url = models.CharField(max_length=200, null=True)
-
-class Post2Vote(models.Model):
- id = models.IntegerField(primary_key=True)
- post = models.ForeignKey('Post', related_name='Post2Vote_by_post_set', null=True)
- user = models.ForeignKey('User', related_name='Post2Vote_by_user_set', null=True)
- vote_type = models.ForeignKey('VoteType', related_name='Post2Vote_by_vote_type_set', null=True)
- creation_date = models.DateTimeField(null=True)
- deletion_date = models.DateTimeField(null=True)
- target_user = models.ForeignKey('User', related_name='Post2Vote_by_target_user_set', null=True)
- target_rep_change = models.IntegerField(null=True)
- voter_rep_change = models.IntegerField(null=True)
- comment = models.CharField(max_length=150, null=True)
- ip_address = models.CharField(max_length=40, null=True)
- linked_post = models.ForeignKey('Post', related_name='Post2Vote_by_linked_post_set', null=True)
-
-class Post(models.Model):
- id = models.IntegerField(primary_key=True)
- post_type = models.ForeignKey('PostType', related_name='Post_by_post_type_set', null=True)
- creation_date = models.DateTimeField(null=True)
- score = models.IntegerField(null=True)
- view_count = models.IntegerField(null=True)
- body = models.TextField(null=True)
- owner_user = models.ForeignKey('User', related_name='Post_by_owner_user_set', null=True)
- last_editor_user = models.ForeignKey('User', related_name='Post_by_last_editor_user_set', null=True)
- last_edit_date = models.DateTimeField(null=True)
- last_activity_date = models.DateTimeField(null=True)
- last_activity_user = models.ForeignKey('User', related_name='Post_by_last_activity_user_set', null=True)
- parent = models.ForeignKey('self', related_name='Post_by_parent_set', null=True)
- accepted_answer = models.ForeignKey('self', related_name='Post_by_accepted_answer_set', null=True)
- title = models.CharField(max_length=250, null=True)
- tags = models.CharField(max_length=150, null=True)
- community_owned_date = models.DateTimeField(null=True)
- history_summary = models.CharField(max_length=150, null=True)
- answer_score = models.IntegerField(null=True)
- answer_count = models.IntegerField(null=True)
- comment_count = models.IntegerField(null=True)
- favorite_count = models.IntegerField(null=True)
- deletion_date = models.DateTimeField(null=True)
- closed_date = models.DateTimeField(null=True)
- locked_date = models.DateTimeField(null=True)
- locked_duration = models.IntegerField(null=True)
- owner_display_name = models.CharField(max_length=40, null=True)
- last_editor_display_name = models.CharField(max_length=40, null=True)
- bounty_amount = models.IntegerField(null=True)
- bounty_closes = models.DateTimeField(null=True)
- bounty_closed = models.DateTimeField(null=True)
- last_owner_email_date = models.DateTimeField(null=True)
-
-class PostType(models.Model):
- id = models.IntegerField(primary_key=True)
- name = models.CharField(max_length=50, null=True)
- description = models.CharField(max_length=300, null=True)
-
-class SchemaVersion(models.Model):
- version = models.IntegerField(null=True)
-
-class Setting(models.Model):
- id = models.IntegerField(primary_key=True)
- key = models.CharField(max_length=256, null=True)
- value = models.TextField(null=True)
-
-class SystemMessage(models.Model):
- id = models.IntegerField(primary_key=True)
- user = models.ForeignKey('User', related_name='SystemMessage_by_user_set', null=True)
- creation_date = models.DateTimeField(null=True)
- text = models.TextField(null=True)
- deletion_date = models.DateTimeField(null=True)
- deletion_user = models.ForeignKey('User', related_name='SystemMessage_by_deletion_user_set', null=True)
-
-class Tag(models.Model):
- id = models.IntegerField(primary_key=True)
- name = models.CharField(max_length=50, null=True)
- count = models.IntegerField(null=True)
- user = models.ForeignKey('User', related_name='Tag_by_user_set', null=True)
- creation_date = models.DateTimeField(null=True)
- is_moderator_only = models.NullBooleanField(null=True)
- is_required = models.NullBooleanField(null=True)
- aliases = models.CharField(max_length=200, null=True)
-
-class ThemeResource(models.Model):
- id = models.IntegerField(primary_key=True)
- name = models.CharField(max_length=50, null=True)
- value = models.TextField(null=True)
- content_type = models.CharField(max_length=50, null=True)
- version = models.CharField(max_length=6, null=True)
-
-class ThemeTextResource(models.Model):
- id = models.IntegerField(primary_key=True)
- name = models.CharField(max_length=50, null=True)
- value = models.TextField(null=True)
- content_type = models.CharField(max_length=50, null=True)
-
-class ThrottleBucket(models.Model):
- id = models.IntegerField(primary_key=True)
- type = models.CharField(max_length=256, null=True)
- ip_address = models.CharField(max_length=64, null=True)
- tokens = models.IntegerField(null=True)
- last_update = models.DateTimeField(null=True)
-
-class UserHistoryType(models.Model):
- id = models.IntegerField(primary_key=True)
- name = models.CharField(max_length=50, null=True)
- description = models.CharField(max_length=300, null=True)
-
-class UserHistory(models.Model):
- id = models.IntegerField(primary_key=True)
- user_history_type = models.ForeignKey('UserHistoryType', related_name='UserHistory_by_user_history_type_set', null=True)
- creation_date = models.DateTimeField(null=True)
- ip_address = models.CharField(max_length=40, null=True)
- user = models.ForeignKey('User', related_name='UserHistory_by_user_set', null=True)
- comment = models.CharField(max_length=400, null=True)
- user_display_name = models.CharField(max_length=40, null=True)
- moderator_user = models.ForeignKey('User', related_name='UserHistory_by_moderator_user_set', null=True)
- reputation = models.IntegerField(null=True)
-
-class User2Badge(models.Model):
- id = models.IntegerField(primary_key=True)
- user = models.ForeignKey('User', related_name='User2Badge_by_user_set', null=True)
- badge = models.ForeignKey('Badge', related_name='User2Badge_by_badge_set', null=True)
- date = models.DateTimeField(null=True)
- comment = models.CharField(max_length=50, null=True)
-
-class User2Vote(models.Model):
- id = models.IntegerField(primary_key=True)
- user = models.ForeignKey('User', related_name='User2Vote_by_user_set', null=True)
- vote_type = models.ForeignKey('VoteType', related_name='User2Vote_by_vote_type_set', null=True)
- target_user = models.ForeignKey('User', related_name='User2Vote_by_target_user_set', null=True)
- creation_date = models.DateTimeField(null=True)
- deletion_date = models.DateTimeField(null=True)
- ip_address = models.CharField(max_length=40, null=True)
-
-class User(models.Model):
- id = models.IntegerField(primary_key=True)
- user_type = models.ForeignKey('UserType', related_name='User_by_user_type_set', null=True)
- open_id = models.CharField(max_length=200, null=True)
- reputation = models.IntegerField(null=True)
- views = models.IntegerField(null=True)
- creation_date = models.DateTimeField(null=True)
- last_access_date = models.DateTimeField(null=True)
- has_replies = models.NullBooleanField(null=True)
- has_message = models.NullBooleanField(null=True)
- opt_in_email = models.NullBooleanField(null=True)
- opt_in_recruit = models.NullBooleanField(null=True)
- last_login_date = models.DateTimeField(null=True)
- last_email_date = models.DateTimeField(null=True)
- last_login_ip = models.CharField(max_length=15, null=True)
- open_id_alt = models.CharField(max_length=200, null=True)
- email = models.CharField(max_length=100, null=True)
- display_name = models.CharField(max_length=40, null=True)
- display_name_cleaned = models.CharField(max_length=40, null=True)
- website_url = models.CharField(max_length=200, null=True)
- real_name = models.CharField(max_length=100, null=True)
- location = models.CharField(max_length=100, null=True)
- birthday = models.DateTimeField(null=True)
- badge_summary = models.CharField(max_length=50, null=True)
- about_me = models.TextField(null=True)
- preferences_raw = models.TextField(null=True)
- timed_penalty_date = models.DateTimeField(null=True)
- guid = models.CharField(max_length=64, null=True)
- phone = models.CharField(max_length=20, null=True)
- password_id = models.IntegerField(null=True)
-
-class UserType(models.Model):
- id = models.IntegerField(primary_key=True)
- name = models.CharField(max_length=50, null=True)
- description = models.CharField(max_length=300, null=True)
-
-class VoteType(models.Model):
- id = models.IntegerField(primary_key=True)
- name = models.CharField(max_length=50, null=True)
- description = models.CharField(max_length=300, null=True)
-
diff --git a/stackexchange/parse_models.py b/stackexchange/parse_models.py
deleted file mode 100644
index 64796e57..00000000
--- a/stackexchange/parse_models.py
+++ /dev/null
@@ -1,225 +0,0 @@
-from xml.etree import ElementTree as et
-import sys
-import re
-import os
-if __name__ != '__main__':#hack do not import models if run as script
- from django.db import models
-from datetime import datetime
-
-table_prefix = ''#StackExchange or something, if needed
-date_time_format = '%Y-%m-%dT%H:%M:%S' #note that fractional part of second is lost
-time_re = re.compile(r'(\.[\d]+)?$')
-loader_app_name = os.path.dirname(__file__)
-
-types = {
- 'unsignedByte':'models.IntegerField',
- 'FK':'models.ForeignKey',
- 'PK':'models.IntegerField',
- 'string':'models.CharField',
- 'text':'models.TextField',
- 'int':'models.IntegerField',
- 'boolean':'models.NullBooleanField',
- 'dateTime':'models.DateTimeField',
- 'base64Binary':'models.TextField',
- 'double':'models.IntegerField',
-}
-
-def camel_to_python(camel):
- """http://stackoverflow.com/questions/1175208/
- """
- s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', camel)
- return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
-
-def singular(word):
- if word.endswith('s'):
- return word[:-1]
- else:
- return word
-
-def get_table_name(name):
- """Determine db table name
- from the basename of the .xml file
- """
- out = table_prefix
- if name.find('2') == -1:
- out += singular(name)
- else:
- bits = name.split('2')
- bits = map(singular, bits)
- out += '2'.join(bits)
- return out
-
-class DjangoModel(object):
- def __init__(self, name):
- self.name = get_table_name(name)
- self.fields = []
- def add_field(self,field):
- field.table = self
- self.fields.append(field)
- def __str__(self):
- out = 'class %s(models.Model):\n' % self.name
- for f in self.fields:
- out += ' %s\n' % str(f)
- return out
-
-class DjangoField(object):
- def __init__(self, name, type, restriction = None):
- self.name = camel_to_python(name)
- if self.name == 'class':
- self.name = 'class_type'#work around python keyword
- self.type = type
- self.table = None
- self.restriction = restriction
- self.relation = None
-
- def __str__(self):
- out = '%s = %s(' % (self.name, types[self.type])
- if self.type == 'FK':
- out += "'%s'" % self.relation
- out += ", related_name='%s_by_%s_set'" % (self.table.name, self.name)
- out += ', null=True'#nullable to make life easier
- elif self.type == 'PK':
- out += 'primary_key=True'
- elif self.restriction != -1:
- if self.type == 'string':
- out += 'max_length=%s' % self.restriction
- out += ', null=True'
- else:
- raise Exception('restriction (max_length) supported only for string type')
- else:
- out += 'null=True'
- out += ')'
- return out
-
- def get_type(self):
- return self.type
-
-class DjangoPK(DjangoField):
- def __init__(self):
- self.name = 'id'
- self.type = 'PK'
-
-class DjangoFK(DjangoField):
- def __init__(self, source_name):
- bits = source_name.split('Id')
- if len(bits) == 2 and bits[1] == '':
- name = bits[0]
- super(DjangoFK, self).__init__(name, 'FK')
- self.set_relation(name)
-
- def set_relation(self, name):
- """some relations need to be mapped
- to actual tables
- """
- self.relation = table_prefix
- if name.endswith('User'):
- self.relation += 'User'
- elif name.endswith('Post'):
- self.relation += 'Post'
- elif name in ('AcceptedAnswer','Parent'):
- self.relation = 'self' #self-referential Post model
- else:
- self.relation += name
- def get_relation(self):
- return self.relation
-
-def get_col_type(col):
- type = col.get('type')
- restriction = -1
- if type == None:
- type_e = col.find('.//simpleType/restriction')
- type = type_e.get('base')
- try:
- restriction = int(type_e.getchildren()[0].get('value'))
- except:
- restriction = -1
- if restriction > 400:
- type = 'text'
- restriction = -1
- return type, restriction
-
-def make_field_from_xml_tree(xml_element):
- """used by the model parser
- here we need to be detailed about field types
- because this defines the database schema
- """
- name = xml_element.get('name')
- if name == 'LinkedVoteId':#not used
- return None
- if name == 'Id':
- field = DjangoPK()
- elif name.endswith('Id') and name not in ('OpenId','PasswordId'):
- field = DjangoFK(name)
- elif name.endswith('GUID'):
- field = DjangoField(name, 'string', 64)
- else:
- type, restriction = get_col_type(xml_element)
- field = DjangoField(name, type, restriction)
- return field
-
-def parse_field_name(input):
- """used by the data reader
-
- The problem is that I've scattered
- code for determination of field name over three classes:
- DjangoField, DjangoPK and DjangoFK
- so the function actually cretes fake field objects
- many time over
- """
- if input == 'Id':
- return DjangoPK().name
- elif input in ('OpenId', 'PasswordId'):
- return DjangoField(input, 'string', 7).name#happy fake field
- elif input.endswith('Id'):
- return DjangoFK(input).name#real FK field
- else:
- return DjangoField(input, 'string', 7).name#happy fake field
-
-def parse_value(input, field_object):
- if isinstance(field_object, models.ForeignKey):
- try:
- id = int(input)
- except:
- raise Exception('non-numeric foreign key %s' % input)
- related_model = field_object.rel.to
- try:
- return related_model.objects.get(id=id)
- except related_model.DoesNotExist:
- obj = related_model(id=id)
- obj.save()#save fake empty object
- return obj
- elif isinstance(field_object, models.IntegerField):
- try:
- return int(input)
- except:
- raise Exception('expected integer, found %s' % input)
- elif isinstance(field_object, models.CharField):
- return input
- elif isinstance(field_object, models.TextField):
- return input
- elif isinstance(field_object, models.BooleanField):
- try:
- return bool(input)
- except:
- raise Exception('boolean value expected %s found' % input)
- elif isinstance(field_object, models.DateTimeField):
- input = time_re.sub('', input)
- try:
- return datetime.strptime(input, date_time_format)
- except:
- raise Exception('datetime expected "%s" found' % input)
-
-print 'from django.db import models'
-for file in sys.argv:
- if '.xsd' in file:
- tname = os.path.basename(file).replace('.xsd','')
- tree = et.parse(file)
-
- model = DjangoModel(tname)
-
- row = tree.find('.//sequence')
- for col in row.getchildren():
- field = make_field_from_xml_tree(col)
- if field:
- model.add_field(field)
- print model