1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
|
from xml.etree import ElementTree as et
import sys
import re
import os
if __name__ != '__main__':#hack do not import models if run as script
from django.db import models
from datetime import datetime
table_prefix = ''#StackExchange or something, if needed
date_time_format = '%Y-%m-%dT%H:%M:%S' #note that fractional part of second is lost
time_re = re.compile(r'(\.[\d]+)?$')
loader_app_name = os.path.dirname(__file__)
types = {
'unsignedByte':'models.IntegerField',
'FK':'models.ForeignKey',
'PK':'models.IntegerField',
'string':'models.CharField',
'text':'models.TextField',
'int':'models.IntegerField',
'boolean':'models.NullBooleanField',
'dateTime':'models.DateTimeField',
'base64Binary':'models.TextField',
'double':'models.IntegerField',
}
def camel_to_python(camel):
"""http://stackoverflow.com/questions/1175208/
"""
s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', camel)
return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
def singular(word):
if word.endswith('s'):
return word[:-1]
else:
return word
def get_table_name(name):
"""Determine db table name
from the basename of the .xml file
"""
out = table_prefix
if name.find('2') == -1:
out += singular(name)
else:
bits = name.split('2')
bits = map(singular, bits)
out += '2'.join(bits)
return out
class DjangoModel(object):
def __init__(self, name):
self.name = get_table_name(name)
self.fields = []
def add_field(self,field):
field.table = self
self.fields.append(field)
def __str__(self):
out = 'class %s(models.Model):\n' % self.name
for f in self.fields:
out += ' %s\n' % str(f)
return out
class DjangoField(object):
def __init__(self, name, type, restriction = None):
self.name = camel_to_python(name)
if self.name == 'class':
self.name = 'class_type'#work around python keyword
self.type = type
self.table = None
self.restriction = restriction
self.relation = None
def __str__(self):
out = '%s = %s(' % (self.name, types[self.type])
if self.type == 'FK':
out += "'%s'" % self.relation
out += ", related_name='%s_by_%s_set'" % (self.table.name, self.name)
out += ', null=True'#nullable to make life easier
elif self.type == 'PK':
out += 'primary_key=True'
elif self.restriction != -1:
if self.type == 'string':
out += 'max_length=%s' % self.restriction
out += ', null=True'
else:
raise Exception('restriction (max_length) supported only for string type')
else:
out += 'null=True'
out += ')'
return out
def get_type(self):
return self.type
class DjangoPK(DjangoField):
def __init__(self):
self.name = 'id'
self.type = 'PK'
class DjangoFK(DjangoField):
def __init__(self, source_name):
bits = source_name.split('Id')
if len(bits) == 2 and bits[1] == '':
name = bits[0]
super(DjangoFK, self).__init__(name, 'FK')
self.set_relation(name)
def set_relation(self, name):
"""some relations need to be mapped
to actual tables
"""
self.relation = table_prefix
if name.endswith('User'):
self.relation += 'User'
elif name.endswith('Post'):
self.relation += 'Post'
elif name in ('AcceptedAnswer','Parent'):
self.relation = 'self' #self-referential Post model
else:
self.relation += name
def get_relation(self):
return self.relation
def get_col_type(col):
type = col.get('type')
restriction = -1
if type == None:
type_e = col.find('.//simpleType/restriction')
type = type_e.get('base')
try:
restriction = int(type_e.getchildren()[0].get('value'))
except:
restriction = -1
if restriction > 400:
type = 'text'
restriction = -1
return type, restriction
def make_field_from_xml_tree(xml_element):
"""used by the model parser
here we need to be detailed about field types
because this defines the database schema
"""
name = xml_element.get('name')
if name == 'LinkedVoteId':#not used
return None
if name == 'Id':
field = DjangoPK()
elif name.endswith('Id') and name not in ('OpenId','PasswordId'):
field = DjangoFK(name)
elif name.endswith('GUID'):
field = DjangoField(name, 'string', 64)
else:
type, restriction = get_col_type(xml_element)
field = DjangoField(name, type, restriction)
return field
def parse_field_name(input):
"""used by the data reader
The problem is that I've scattered
code for determination of field name over three classes:
DjangoField, DjangoPK and DjangoFK
so the function actually cretes fake field objects
many time over
"""
if input == 'Id':
return DjangoPK().name
elif input in ('OpenId', 'PasswordId'):
return DjangoField(input, 'string', 7).name#happy fake field
elif input.endswith('Id'):
return DjangoFK(input).name#real FK field
else:
return DjangoField(input, 'string', 7).name#happy fake field
def parse_value(input, field_object):
if isinstance(field_object, models.ForeignKey):
try:
id = int(input)
except:
raise Exception('non-numeric foreign key %s' % input)
related_model = field_object.rel.to
try:
return related_model.objects.get(id=id)
except related_model.DoesNotExist:
obj = related_model(id=id)
obj.save()#save fake empty object
return obj
elif isinstance(field_object, models.IntegerField):
try:
return int(input)
except:
raise Exception('expected integer, found %s' % input)
elif isinstance(field_object, models.CharField):
return input
elif isinstance(field_object, models.TextField):
return input
elif isinstance(field_object, models.BooleanField):
try:
return bool(input)
except:
raise Exception('boolean value expected %s found' % input)
elif isinstance(field_object, models.DateTimeField):
input = time_re.sub('', input)
try:
return datetime.strptime(input, date_time_format)
except:
raise Exception('datetime expected "%s" found' % input)
print 'from django.db import models'
for file in sys.argv:
if '.xsd' in file:
tname = os.path.basename(file).replace('.xsd','')
tree = et.parse(file)
model = DjangoModel(tname)
row = tree.find('.//sequence')
for col in row.getchildren():
field = make_field_from_xml_tree(col)
if field:
model.add_field(field)
print model
|