summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEvgeny Fadeev <evgeny.fadeev@gmail.com>2011-06-14 02:06:45 -0400
committerEvgeny Fadeev <evgeny.fadeev@gmail.com>2011-06-14 02:06:45 -0400
commit41cd8bd6165669eacef20215b9ae671d55111d5d (patch)
treeba8ce48880bf92440bab27b1a0c0530071aebefd
parent6cb91750ff7eb7fc2e9234a1d027e0ebe047f3e0 (diff)
downloadaskbot-41cd8bd6165669eacef20215b9ae671d55111d5d.tar.gz
askbot-41cd8bd6165669eacef20215b9ae671d55111d5d.tar.bz2
askbot-41cd8bd6165669eacef20215b9ae671d55111d5d.zip
user import from zendesk kinda works
-rw-r--r--askbot/management/commands/import_zendesk.py133
-rw-r--r--askbot/utils/console.py29
-rw-r--r--askbot/utils/html.py27
3 files changed, 182 insertions, 7 deletions
diff --git a/askbot/management/commands/import_zendesk.py b/askbot/management/commands/import_zendesk.py
new file mode 100644
index 00000000..c4916f72
--- /dev/null
+++ b/askbot/management/commands/import_zendesk.py
@@ -0,0 +1,133 @@
+"""importer from cnprog, please note, that you need an exporter in the first place
+to use this command.
+If you are interested to use it - please ask Evgeny <evgeny.fadeev@gmail.com>
+"""
+import os
+import re
+import sys
+import tarfile
+import tempfile
+from datetime import datetime, date
+from django.core.management.base import BaseCommand, CommandError
+from django.db import transaction
+from lxml import etree
+from askbot import models
+from askbot.utils import console
+from askbot.utils.html import unescape
+
+#a hack, did not know how to parse timezone offset
+ZERO_TIME = datetime.strptime('00:00', '%H:%M')
+
+def get_unique_username(name_seed):
+ """returns unique user name, by modifying the
+ name if the same name exists in the database
+ until the modified name is unique
+ """
+ original_name = name_seed
+ attempt_no = 1
+ while True:
+ try:
+ models.User.objects.get(username = name_seed)
+ name_seed = original_name + str(attempt_no)
+ attempt_no += 1
+ except models.User.DoesNotExist:
+ return name_seed
+
+def get_val(elem, field_name):
+ field = elem.find(field_name)
+ try:
+ field_type = field.attrib['type']
+ except KeyError:
+ field_type = ''
+ raw_val = field.text
+ if field_type == 'boolean':
+ if raw_val == 'true':
+ return True
+ elif raw_val == 'false':
+ return False
+ else:
+ raise ValueError('"true" or "false" expected, found "%s"' % raw_val)
+ elif field_type.endswith('integer'):
+ return int(raw_val)
+ elif field_type == 'datetime':
+ raw_datetime = raw_val[:19]
+ tzoffset_sign = raw_val[19]
+ raw_tzoffset = raw_val[20:]
+ if raw_val:
+ dt = datetime.strptime(raw_datetime, '%Y-%m-%dT%H:%M:%S')
+ tzoffset_amt = datetime.strptime(raw_tzoffset, '%H:%M')
+ tzoffset = tzoffset_amt - ZERO_TIME
+ if tzoffset_sign == '-':
+ return dt - tzoffset
+ else:
+ return dt + tzoffset
+ else:
+ return None
+ else:
+ if raw_val:
+ return raw_val
+ else:
+ return ''
+
+class Command(BaseCommand):
+ def handle(self, *args, **kwargs):
+ if len(args) != 1:
+ raise CommandError('please provide path to tarred and gzipped cnprog dump')
+
+ self.tar = tarfile.open(args[0], 'r:gz')
+
+ sys.stdout.write("Importing user accounts: ")
+ self.import_users()
+ #self.import_openid_associations()
+ #self.import_email_settings()
+
+ #self.import_question_edits()
+ #self.import_answer_edits()
+
+ #self.import_question_data()
+ #self.import_answer_data()
+
+ #self.import_comments()
+
+ #self.import_question_views()
+ #self.import_favorite_questions()
+ #self.import_marked_tags()
+
+ #self.import_votes()
+
+ def get_file(self, file_name):
+ first_item = self.tar.getnames()[0]
+ file_path = file_name
+ if not first_item.endswith('.xml'):
+ file_path = os.path.join(first_item, file_path)
+
+ file_info = self.tar.getmember(file_path)
+ xml_file = self.tar.extractfile(file_info)
+ return etree.parse(xml_file)
+
+ @transaction.commit_manually
+ def import_users(self):
+ xml = self.get_file('users.xml')
+ added_users = 0
+ for user in xml.findall('user'):
+ #a whole bunch of fields are actually dropped now
+ #see what's available in users.xml meanings of some
+ #values there is not clear
+
+ #special treatment for the user name
+ username = unescape(get_val(user, 'name'))#unescape html entities
+ username = get_unique_username(username)
+
+ ab_user = models.User(
+ email = get_val(user, 'email'),
+ email_isvalid = get_val(user, 'is-verified'),
+ date_joined = get_val(user, 'created-at'),
+ username = username,
+ is_active = get_val(user, 'is-active'),
+ )
+ ab_user.save()
+ added_users += 1
+ console.print_action(ab_user.username)
+ transaction.commit()
+ console.print_action('%d users added' % added_users, nowipe = True)
+ transaction.commit()
diff --git a/askbot/utils/console.py b/askbot/utils/console.py
index 041fc839..470856b5 100644
--- a/askbot/utils/console.py
+++ b/askbot/utils/console.py
@@ -49,13 +49,28 @@ def open_new_file(prompt_phrase, extension = '', hint = None):
return file_object
-def print_progress(format_string, progress):
+def print_action(action_text, nowipe = False):
+ """print the string to the standard output
+ then wipe it out to clear space
+ """
+ #for some reason sys.stdout.write does not work here
+ #when action text is unicode
+ print action_text,
+ sys.stdout.flush()
+ if nowipe == False:
+ #return to the beginning of the word
+ sys.stdout.write('\b' * len(action_text))
+ #white out the printed text
+ sys.stdout.write(' ' * len(action_text))
+ #return again
+ sys.stdout.write('\b' * len(action_text))
+ else:
+ sys.stdout.write('\n')
+
+def print_progress(elapsed, total, nowipe = False):
"""print dynamic output of progress of some
- operation to the console and clear the output with
+ operation, in percent, to the console and clear the output with
a backspace character to have the number increment
in-place"""
- output = format_string % progress
- sys.stdout.write(output)
- sys.stdout.flush()
- sys.stdout.write('\b' * len(output))
-
+ output = '%6.2f%%' % 100 * float(elapsed)/float(total)
+ print_action(output, nowipe)
diff --git a/askbot/utils/html.py b/askbot/utils/html.py
index aa8e24d8..f6c168fb 100644
--- a/askbot/utils/html.py
+++ b/askbot/utils/html.py
@@ -1,6 +1,7 @@
"""Utilities for working with HTML."""
import html5lib
from html5lib import sanitizer, serializer, tokenizer, treebuilders, treewalkers
+import re, htmlentitydefs
class HTMLSanitizerMixin(sanitizer.HTMLSanitizerMixin):
acceptable_elements = ('a', 'abbr', 'acronym', 'address', 'b', 'big',
@@ -49,3 +50,29 @@ def sanitize_html(html):
quote_attr_values=True)
output_generator = s.serialize(stream)
return u''.join(output_generator)
+
+def unescape(text):
+ """source: http://effbot.org/zone/re-sub.htm#unescape-html
+ Removes HTML or XML character references and entities from a text string.
+ @param text The HTML (or XML) source text.
+ @return The plain text, as a Unicode string, if necessary.
+ """
+ def fixup(m):
+ text = m.group(0)
+ if text[:2] == "&#":
+ # character reference
+ try:
+ if text[:3] == "&#x":
+ return unichr(int(text[3:-1], 16))
+ else:
+ return unichr(int(text[2:-1]))
+ except ValueError:
+ pass
+ else:
+ # named entity
+ try:
+ text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
+ except KeyError:
+ pass
+ return text # leave as is
+ return re.sub("&#?\w+;", fixup, text)