1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
|
from django.core.management.base import LabelCommand
from zipfile import ZipFile
from xml.dom import minidom as dom
import datetime
from forum.models import User
class Command(LabelCommand):
def handle_label(self, label, **options):
zip = ZipFile(label)
map = {}
map['users'] = self.import_users(zip.open("Users.xml"))
map['questions'], map['answers'] = self.import_posts(zip.open("Posts.xml"))
def row_to_dic(self, row):
return dict([
(child.localName.lower(),
" ".join([t.nodeValue for t in child.childNodes if t.nodeType == t.TEXT_NODE]))
for child in row.childNodes
if child.nodeType == child.ELEMENT_NODE
])
def from_sx_time(self, timestring):
if timestring is None:
return timestring
try:
return datetime.datetime.strptime(timestring, '%Y-%m-%dT%H:%M:%S')
except:
return datetime.datetime.strptime(timestring, '%Y-%m-%dT%H:%M:%S.%f')
def import_users(self, users):
pkey_map = {}
doc = dom.parse(users)
rows = doc.getElementsByTagName('row')
unknown_count = 0
added_names = []
for row in rows:
values = self.row_to_dic(row)
username = values.get('displayname',
values.get('realname',
values.get('email', None)))
if username is None:
unknown_count += 1
username = 'Unknown User %d' % unknown_count
if username in added_names:
cnt = 1
new_username = "%s %d" % (username, cnt)
while new_username in added_names:
cnt += 1
new_username = "%s %d" % (username, cnt)
username = new_username
added_names.append(username)
user = User(username=username, email=values.get('email', ''))
user.reputation = values['reputation']
user.last_seen = self.from_sx_time(values['lastaccessdate'])
user.real_name = values.get('realname', '')
user.about = values.get('aboutme', '')
user.website = values.get('websiteurl', '')
user.date_of_birth = self.from_sx_time(values.get('birthday', None))
user.location = values.get('location', '')
user.is_active = True
user.email_isvalid = True
if int(values['usertypeid']) == 5:
user.is_superuser = True
if int(values['usertypeid']) == 5:
user.is_staff = True
user.save()
pkey_map[values['id']] = user
return users
def import_posts(self, posts, map):
pkey_map = {}
doc = dom.parse(posts)
rows = doc.getElementsByTagName('row')
for row in rows:
map = {
'title': row['']
}
pass
pass
|