diff options
author | Evgeny Fadeev <evgeny.fadeev@gmail.com> | 2010-03-06 16:22:44 -0500 |
---|---|---|
committer | Evgeny Fadeev <evgeny.fadeev@gmail.com> | 2010-03-06 16:22:44 -0500 |
commit | f040951ff0ea5bfa583972299be759ff6de55a1f (patch) | |
tree | f188744dc71699e5632d5147ebdddfcf325c6350 | |
parent | 1397d1996d4d69803943ed02bbdaf8eae5af659d (diff) | |
download | askbot-f040951ff0ea5bfa583972299be759ff6de55a1f.tar.gz askbot-f040951ff0ea5bfa583972299be759ff6de55a1f.tar.bz2 askbot-f040951ff0ea5bfa583972299be759ff6de55a1f.zip |
wrote code to import users, untested
-rwxr-xr-x | forum/authentication/__init__.py | 4 | ||||
-rwxr-xr-x | forum/models/user.py | 2 | ||||
-rw-r--r-- | stackexchange/management/commands/load_stackexchange.py | 155 |
3 files changed, 158 insertions, 3 deletions
diff --git a/forum/authentication/__init__.py b/forum/authentication/__init__.py index 5326c45c..75099303 100755 --- a/forum/authentication/__init__.py +++ b/forum/authentication/__init__.py @@ -7,7 +7,7 @@ class ConsumerAndContext(): self.id = id self.consumer = consumer() - context.id = id + context.id = id #add extra field to context self.context = context consumers = dict([ @@ -24,4 +24,4 @@ contexts = dict([ AUTH_PROVIDERS = dict([ (name, ConsumerAndContext(name, consumers[name], contexts[name])) for name in consumers.keys() if name in contexts - ])
\ No newline at end of file + ]) diff --git a/forum/models/user.py b/forum/models/user.py index bed6f55e..8db119e0 100755 --- a/forum/models/user.py +++ b/forum/models/user.py @@ -68,7 +68,7 @@ class AnonymousEmail(models.Model): class AuthKeyUserAssociation(models.Model): key = models.CharField(max_length=255,null=False,unique=True) - provider = models.CharField(max_length=64) + provider = models.CharField(max_length=64)#string 'yahoo', 'google', etc. user = models.ForeignKey(User) added_at = models.DateTimeField(default=datetime.datetime.now) diff --git a/stackexchange/management/commands/load_stackexchange.py b/stackexchange/management/commands/load_stackexchange.py index 42ccc9f9..39966bef 100644 --- a/stackexchange/management/commands/load_stackexchange.py +++ b/stackexchange/management/commands/load_stackexchange.py @@ -1,9 +1,13 @@ from django.core.management.base import BaseCommand import os +import re import sys import stackexchange.parse_models as se_parser from xml.etree import ElementTree as et from django.db import models +import forum.models as osqa +import stackexchange.models as se +from forum.forms import EditUserEmailFeedsForm xml_read_order = ( 'VoteTypes','UserTypes','Users','Users2Votes', @@ -16,6 +20,78 @@ xml_read_order = ( 'ModeratorMessages','Messages','Comments2Votes', ) +#association tables SE item id --> OSQA item id +#table associations are implied +USER = {}#SE User.id --> django(OSQA) User.id +NAMESAKE_COUNT = {}# number of times user name was used - for X.get_screen_name + +class X(object):# + """class with methods for handling some details + of SE --> OSQA mapping + """ + osqa_supported_id_providers = ( + 'google','yahoo','aol','myopenid', + 'flickr','technorati',#todo: typo in code openidauth/authentication.py ! + 'wordpress','blogger','livejournal', + 'claimid','vidoop','verisign', + 'openidurl','facebook','local', + 'twitter' #oauth is not on this list, b/c it has no own url + ) + badge_type_map = {'1':'gold','2':'silver','3':'bronze'} + + @classmethod + def get_screen_name(cls, se_display_name): + """always returns unique screen name + even if there are multiple users in SE + with the same exact screen name + """ + name = se_display_name.strip() + name = re.subn(r'\s+',' ',name)#remove repeating spaces + + try: + osqa.User.objects.get(username = name) + try: + name += ', %' % u.location + if name in NAMESAKE_COUNT: + NAMESAKE_COUNT[name] += 1 + name += ' %d' % NAMESAKE_COUNT[name] + except osqa.User.DoesNotExist: + NAMESAKE_COUNT[name] = 1 + return name + except osqa.User.DoesNotExist: + return name + except osqa.User.DoesNotExist: + return name + + #crude method of getting id provider name from the url + @classmethod + def get_openid_provider_name(cls, openid_url): + openid_str = str(openid_url) + bits = openid_str.split('/') + base_url = bits[2] #assume this is base url + url_bits = base_url.split('.') + provider_name = url_bits[-2].lower() + if provider_name not in cls.osqa_supported_id_providers: + raise Exception('could not determine login provider for %s' % openid_url) + return provider_name + + @classmethod + def parse_badge_summary(cls, badge_summary): + (gold,silver,bronze) = (0,0,0) + if len(badge_summary) > 3: + print 'warning: guessing that badge summary is comma separated' + print 'have %s' % badge_summary + bits = badge_summary.split(',') + else: + bits = [badge_summary] + for bit in bits: + m = re.search(r'^(?P<type>[1-3])=(?P<count>\d+)$', bit) + if not m: + raise Exception('could not parse badge summary: %s' % badge_summary) + else: + badge_type = cls.badge_type_map[m.groupdict['type']] + locals()[badge_type] = int(m.groupdict['count']) + return (gold,silver,bronze) class Command(BaseCommand): help = 'Loads StackExchange data from unzipped directory of XML files into the OSQA database' @@ -27,11 +103,15 @@ class Command(BaseCommand): sys.exit(1) self.dump_path = arg[0] + #read the data into SE tables for xml in xml_read_order: xml_path = self.get_xml_path(xml) table_name = self.get_table_name(xml) self.load_xml_file(xml_path, table_name) + #transfer data into OSQA tables + self.transfer_users() + def load_xml_file(self, xml_path, table_name): tree = et.parse(xml_path) print 'loading from %s to %s' % (xml_path, table_name) , @@ -57,3 +137,78 @@ class Command(BaseCommand): print 'Error: file %s not found' % xml_path sys.exit(1) return xml_path + + def transfer_users(self): + for se_u in se.User.objects.all(): + if se_u.id < 1:#skip the Community user + continue + u = osqa.User() + u_type = se_u.user_type.name + if u_type == 'Administrator': + u.is_superuser = True + elif u_type == 'Moderator': + u.is_staff = True + elif u_type == 'Registered': + assert(se_u.open_id)#this must be true by definition of Registered user + u_auth = osqa.AuthKeyUserAssociation() + u_auth.key = se_u.open_id + u_auth.provider = X.get_openid_provider_name(se.open_id) + u_auth.added_at = se_u.creation_date + + elif u_type == 'Unregistered': + raise Exception('dont know what to do with unregistered users') + else: + raise Exception('unknown SE user type %s' % u_type) + u.reputation = se_u.reputation + u.last_seen = se_u.last_access_date + u.email = se_u.email + u.location = se_u.location + u.date_of_birth = se_u.birthday #dattime -> date + u.website = se_u.website_url + u.about = se_u.about_me + u.last_login = se_u.last_login_date + u.date_joined = se_u.creation_date + u.is_active = True #todo: this may not be the case + + u.username = X.get_screen_name(se_u.display_name) + u.real_name = se_u.real_name + + (gold,silver,bronze) = X.parse_badge_summary(se_u.badge_summary) + u.gold = gold + u.silver = silver + u.bronze = bronze + + #todo: we don't have these fields + #views - number of profile views? + #has_replies + #has_message + #opt_in_recruit + #last_login_ip + #open_id_alt - ?? + #preferences_raw - not clear how to use + #display_name_cleaned - lowercased, srtipped name + #timed_penalty_date + #phone + + #don't know how to handle these - there was no usage example + #password_id + #guid + + #ignored + #last_email_date - this translates directly to EmailFeedSetting.reported_at + + #save the data + u.save() + form = EditUserEmailFeedsForm() + form.reset() + if se_u.opt_in_email == True:#set up daily subscription on "own" items + form.cleaned_data['individually_selected'] = 'd' + form.cleaned_data['asked_by_me'] = 'd' + form.cleaned_data['answered_by_me'] = 'd' + # + form.save(user=u, save_unbound=True) + + if 'u_auth' in locals(): + u_auth.user = u + u_auth.save() + USER[se_u.id] = u.id |