diff options
author | KP <kp@shotgunsoftware.com> | 2013-05-22 10:52:43 -0500 |
---|---|---|
committer | KP <kp@shotgunsoftware.com> | 2013-06-13 10:23:26 -0500 |
commit | 7cfe01cdb9ee32992c966fd6eb0fea24be3cf40d (patch) | |
tree | d3dde02eda81b5457d624373d00563efcd3e6f27 | |
parent | 52bb0054fb4fe2fa1359561887e4f3611cac8c82 (diff) | |
download | askbot-7cfe01cdb9ee32992c966fd6eb0fea24be3cf40d.tar.gz askbot-7cfe01cdb9ee32992c966fd6eb0fea24be3cf40d.tar.bz2 askbot-7cfe01cdb9ee32992c966fd6eb0fea24be3cf40d.zip |
complete rewrite of Zendesk importer
-rw-r--r-- | askbot/importers/zendesk/management/commands/import_zendesk.py | 1036 | ||||
-rw-r--r-- | askbot/importers/zendesk/models.py | 190 |
2 files changed, 1095 insertions, 131 deletions
diff --git a/askbot/importers/zendesk/management/commands/import_zendesk.py b/askbot/importers/zendesk/management/commands/import_zendesk.py index 2614a51c..7472f4f7 100644 --- a/askbot/importers/zendesk/management/commands/import_zendesk.py +++ b/askbot/importers/zendesk/management/commands/import_zendesk.py @@ -1,10 +1,75 @@ -"""importer from zendesk data dump -the dump must be a tar/gzipped file, containing one directory -with all the .xml files. +""" +Zendesk XML data import + +This script will import a tar/gzipped file generated by Zendesk into Askbot. +For more info see https://support.zendesk.com/entries/23002207 (use XML +instead of CSV) + +The tgz archive must contain a single directory with the following xml +files: + accounts.xml (ignored) + categories.xml (ignored) + entries.xml + forums.xml + groups.xml (ignored) + organizations.xml + posts.xml + tickets.xml + users.xml + +You have the ability to filter choose whether to import forums, tickets, or +both. Additionally, you can specify whether you wish to filter the content +further by specific forums, tags, and date. + +FORUMS: + Importing forums will give you a list of your public forums to choose from. + Private forums will not be shown by default. Once you choose the forums + you wish to import, you can filter down the entries by tags (any entry + with any matching tag will be imported), and by date range (of created_at + datetime). + + Votes on Entries will be converted to Votes for the question in Askbot. + View counts are transferred as well. If a Post is marked as is_informative, + it will mark the answer as accepted. Note that since Zendesk supports + multiple "accepted" answers and Askbot does not, this will cause the + accepted answer in Askbot to be the most recent accepted Post. + +TICKETS: + Importing Tickets will give you an option to filter down the entries by + tags (any Ticket with any matching tag will be imported), and by date range + (of created_at datetime). + + Tickets don't have any view count or vote stats so none of that info is + transferred. Additionally, there's no easy way to determine which comment + on the Ticket may be the accepted answer so the script doesn't mark any + answer as accepted. + + Private comments are not imported. + +REQUIREMENTS: + This script requires the lxml module which is not part of the base + Askbot install. The lxml module will require your server have the + libxml2-devel and libxslt-devel packages installed in order to + install correctly. + +NOTES: + Running this import will truncate the existing zendesk_* tables in order + to ensure you don't end up re-importing existing data. + + If your site is configured to only allow a single answer per user + (LIMIT_ONE_ANSWER_PER_USER = True), you will be prompted to disable this + setting temporarily while the import proceeds. It will turn it back on + when complete. If you choose not to disable this setting, then the import + will add any additional answers from a user as comments on their original + answer. This is not ideal so it's encouraged you agree to turn this + setting off while doing the import. Run this command as:: + python manage.py import_zendesk /path/to/zendesk/archive.tgz - python manage.py import_zendesk path/to/dump.tgz +TODO: + - Use logging for more verbose output + - Add option to import Attachments from existing Zendesk installation """ import os import re @@ -12,97 +77,310 @@ import sys import tarfile import tempfile from datetime import datetime, date +from lxml import etree from django.core.management.base import BaseCommand, CommandError from django.conf import settings from django.db import transaction -from lxml import etree +from django.db import connection from askbot import models as askbot_models from askbot.utils import console from askbot.utils.html import unescape - +from askbot import exceptions as askbot_exceptions +from askbot.conf import settings as askbot_settings from askbot.importers.zendesk import models as zendesk_models -#a hack, did not know how to parse timezone offset +# a hack, did not know how to parse timezone offset +# todo: clean this up ZERO_TIME = datetime.strptime('00:00', '%H:%M') +# load admin user where a user is needed (eg. user who closed thread) +ADMIN_USER = askbot_models.User.objects.filter(is_superuser=True)[:1] +# option choices for what data to import from Zendesk +DATA_IMPORT_ALL = 0 +DATA_IMPORT_FORUMS = 1 +DATA_IMPORT_TICKETS = 2 + +# used for seeding Vote count when importing Zendesk Forum content +try: + PHANTOM_VOTER_USER = askbot_models.User.objects.get(username='phantom_voter') +except askbot_models.User.DoesNotExist: + PHANTOM_VOTER_USER = askbot_models.User( + username = 'phantom_voter', + first_name = 'Phantom', + last_name = 'Voter', + real_name = 'Phantom Voter', + date_joined = datetime.now(), + is_active = False, + about = 'Fake account for seeding vote counts during Zendesk import', + ).save() + +def ensure_unique_username(name_seed): + """Returns unique user name, by modifying the name if the same name exists + in the database until the modified name is unique. + + :param name_seed: (str) proposed user name -def get_unique_username(name_seed): - """returns unique user name, by modifying the - name if the same name exists in the database - until the modified name is unique + :returns: (str) validated unique user name """ original_name = name_seed attempt_no = 1 while True: try: askbot_models.User.objects.get(username = name_seed) - name_seed = original_name + str(attempt_no) + name_seed = original_name[:29] + str(attempt_no) attempt_no += 1 except askbot_models.User.DoesNotExist: return name_seed -def clean_username(name_seed): - """makes sure that the name is unique - and is no longer than 30 characters""" - username = get_unique_username(name_seed) - if len(username) > 30: - username = get_unique_username(username[:28]) - if len(username) > 30: - #will allow about a million extra possible unique names - username = get_unique_username(username[:24]) - return username - def create_askbot_user(zd_user): - """create askbot user from zendesk user record - return askbot user or None, if there is error - """ - #special treatment for the user name - raw_username = unescape(zd_user.name) - username = clean_username(raw_username) - if len(username) > 30:#nearly impossible skip such user - print "Warning: could not import user %s" % raw_username - return None + """Create askbot user from Zendesk User record + Zendesk User fields that are copied over or otherwise translated: + name + username + email + Organization name matching organization_id + is_verified + is_active + last_login + created_at + + :param zd_user: (obj) zendesk_models.User object to create Askbot user + from. + + :returns: (mixed) askbot user object or None if there is an error + """ if zd_user.email is None: + username = zd_user.name.replace(" ", "_").lower() email = '' else: + username = zd_user.email email = zd_user.email + username = ensure_unique_username(username[:30]) + + # last_seen cannot be null + last_seen = zd_user.last_login + if not last_seen: + last_seen = zd_user.created_at + + # lookup organization name (todo: cache this) + about = "" + if zd_user.organization_id: + try: + org = zendesk_models.Organization.objects.get(organization_id=zd_user.organization_id) + about = org.name + except zendesk_models.Organization.DoesNotExist: + pass ab_user = askbot_models.User( + username = username, + first_name = zd_user.name.rpartition(' ')[0].strip()[:30], + last_name = zd_user.name.rpartition(' ')[2].strip()[:30], + real_name = zd_user.name[:100], email = email, email_isvalid = zd_user.is_verified, date_joined = zd_user.created_at, - last_seen = zd_user.created_at,#add initial date for now - username = username, - is_active = zd_user.is_active + last_seen = last_seen, + is_active = zd_user.is_active, + about = about, ) ab_user.save() return ab_user -def post_question(zendesk_post): - """posts question to askbot, using zendesk post item""" +def seed_post_with_votes(post, votes_count): + """Seed imported Question with an initial vote count + + Votes are set in multple locations for caching. points = (vote_up_count - + vote_down_count). Since we're creating the post now and Zendesk doesn't + have down votes, we just calculate this as up votes. + + Vote objects require a user. We have created an inactive PHANTOM_VOTER_USER + above to artificially serve this purpose. + + NOTE: Vote objects are indended to be +1 (VOTE_UP) or -1 (VOTE_DOWN). + We're overriding this by adding a Vote object that is +votes_count to + create a weighted artificial Vote. This may cause problems if the votes + are recalculated for some reason later. + + :param post: (obj) the askbot.models.Post object to seed with the votes + :param votes_count: (int) number of votes to seed the Post with + """ + post.points = votes_count + post.vote_up_count = votes_count + post.save() + post.thread.points = votes_count + post.thread.save() + askbot_models.Vote(user=PHANTOM_VOTER_USER, voted_post=post, + vote=votes_count, voted_at=datetime.now()).save() + +def post_question(zendesk_entry): + """Posts question to askbot from Zendesk Entry + + Translates Zendesk Entry to an Askbot question. Links correct user, + updates the view count and vote count. Closes the question if the + Entry is locked. + + :param zendesk_entry: (obj) zendesk_models.Entry object + + :returns: (obj) askbot Post object if it succeeded. None if there was + an error. + """ try: - return zendesk_post.get_author().post_question( - title = zendesk_post.get_fake_title(), - body_text = zendesk_post.get_body_text(), - tags = zendesk_post.get_tag_name(), - timestamp = zendesk_post.created_at + askbot_post = zendesk_entry.get_author().post_question( + title = zendesk_entry.title, + body_text = zendesk_entry.get_body_text(), + tags = zendesk_entry.get_tag_names(), + timestamp = zendesk_entry.created_at, ) + # seed the views with the # hits we had on zendesk + askbot_post.thread.increase_view_count(increment=zendesk_entry.hits) + if zendesk_entry.votes_count: + seed_post_with_votes(askbot_post, zendesk_entry.votes_count) + + # close threads that were locked in Zendesk and assign a default + # reason of "question answered". Set default user to admin. + if zendesk_entry.is_locked: + askbot_post.thread.set_closed_status( + closed=True, + closed_by=ADMIN_USER, + closed_at=datetime.now(), + close_reason=5) + askbot_post.thread.save() + return askbot_post except Exception, e: msg = unicode(e) - print "Warning: post %d dropped: %s" % (zendesk_post.post_id, msg) + print "Warning: entry %d skipped: %s" % (zendesk_entry.entry_id, msg) + +def post_question_from_ticket(zendesk_ticket): + """Posts question to Askbot from Zendesk Ticket -def post_answer(zendesk_post, question = None): + Translates Zendesk Ticket to an Askbot question. View count and votes + aren't relevant on Tickets in Zendesk so we don't seed any of that info + (like we do on post_question()). + + :param zendesk_ticket: (obj) zendesk_models.Ticket object + + :returns: (obj) askbot Post object if it succeeded. None if there was + an error. + + :todo: wrap this into post_question() + """ try: - zendesk_post.get_author().post_answer( + askbot_post = zendesk_ticket.get_author().post_question( + title = zendesk_ticket.subject, + body_text = zendesk_ticket.get_body_text(), + tags = zendesk_ticket.get_tag_names(), + timestamp = zendesk_ticket.created_at + ) + return askbot_post + except Exception, e: + msg = unicode(e) + print "Warning: ticket %d skipped: %s" % (zendesk_ticket.ticket_id, msg) + +def post_comment(source_post, parent): + """Post comment on an answer from a Zendesk Post or Comment. + + :param source_post: (obj) A zendesk_models.Post or zendesk_models.Comment + object + :param parent: (obj) Askbot Post object which will be the parent of the + comment + + :returns: (obj) Askbot Post object with post_type='comment' or None if + there was an error. + """ + try: + askbot_comment = source_post.get_author().post_comment( + parent_post = parent, + body_text = source_post.get_body_text(), + timestamp = source_post.created_at + ) + return askbot_comment + except Exception, e: + msg = unicode(e) + print "Warning: post %d skipped: %s" % (zendesk_post.post_id, msg) + +def post_answer(zendesk_post, question): + """Posts an answer to Askbot, from a Zendesk Post + + If the Post was marked as informative in Zendesk, we mark it as an accepted + answer in Askbot. Since Askbot only allows a single accepted answer and + Zendesk supports multiple answers, this will re-mark the answer + for each one and ultimately end on the most recent post. This may not be + the most relevant answer in the end. + + If Askbot is configured to only allow a single answer per user, any + additional answers from the user will be added as comments on the original + answer from the user. This will likely create some context confusion so + it's recommended you have this setting during for the import. + + :param zendesk_post: (obj) zendesk_models.Post object to create answer from + :param question: (obj) Askbot Post object with post_type='question' to post + the answer to. + + :returns: (obj) Askbot Post object with post_type='answer' or 'comment' + depending on the setting for LIMIT_ONE_ANSWER_PER_USER or None if + there was an error. + """ + try: + askbot_post = zendesk_post.get_author().post_answer( question = question, body_text = zendesk_post.get_body_text(), timestamp = zendesk_post.created_at ) + if zendesk_post.is_informative: + askbot_post.thread.accepted_answer_id = askbot_post.id + askbot_post.thread.save() + return askbot_post + except askbot_exceptions.AnswerAlreadyGiven: + answer = question.thread.get_answers_by_user(user=zendesk_post.get_author())[0] + askbot_comment = post_comment(zendesk_post, answer) + return askbot_comment + except Exception, e: + msg = unicode(e) + print "Warning: post %d skipped: %s" % (zendesk_post.post_id, msg) + +def post_answer_from_comment(zendesk_comment, question): + """Posts an answer to Askbot, from Zendesk Comment on a ticket + + If Askbot is configured to only allow a single answer per user, any + additional answers from the user will be added as comments on the original + answer from the user. This will likely create some context confusion so + it's recommended you have this setting during for the import. + + There is no reliable way to know which comment is the "accepted" answer + so we don't try and set that automatically. + + :param zendesk_comment: (obj) zendesk_models.Comment object to create + answer from. + :param question: (obj) Askbot Post object with post_type='question' to post + the answer to. + """ + if not zendesk_comment.is_public: + return + try: + askbot_post = zendesk_comment.get_author().post_answer( + question = question, + body_text = zendesk_comment.get_body_text(), + timestamp = zendesk_comment.created_at + ) + return askbot_post + except askbot_exceptions.AnswerAlreadyGiven: + answer = question.thread.get_answers_by_user(user=zendesk_comment.get_author())[0] + askbot_comment = post_comment(zendesk_comment, answer) + return askbot_comment except Exception, e: msg = unicode(e) - print "Warning: post %d dropped: %s" % (zendesk_post.post_id, msg) + print "Warning: comment %d skipped: %s" % (zendesk_comment.id, msg) + +def get_xml_element_val(elem, field_name): + """Return the value of the etree element for field_name and cast it to the + correct data type. + + :param elem: (obj) etree element object to search + :param field_name: (str) field name to search for in elem -def get_val(elem, field_name): + :returns: (mixed) value of field_name in etree object cast to the correct + native Python data type + """ field = elem.find(field_name) if field is None: return None @@ -113,7 +391,6 @@ def get_val(elem, field_name): raw_val = field.text if raw_val is None: return None - if field_type == 'boolean': if raw_val == 'true': return True @@ -126,6 +403,7 @@ def get_val(elem, field_name): elif field_type == 'datetime': if raw_val is None: return None + # todo: clean this up raw_datetime = raw_val[:19] tzoffset_sign = raw_val[19] raw_tzoffset = raw_val[20:] @@ -139,29 +417,252 @@ def get_val(elem, field_name): return dt + tzoffset else: return None + elif field_type == 'array': + # returns a list of child elements + # comments > comment + sfield_name = field_name[:-1] + return field.findall(sfield_name) else: return raw_val +def toggle_user_answer_limit_setting(val): + """Turns the Askbot live_setting for LIMIT_ONE_ANSWER_PER_USER on + or off. + + :param val: (bool) value to set LIMIT_ONE_ANSWER_PER_USER to + """ + if val: + askbot_settings.update('LIMIT_ONE_ANSWER_PER_USER', True) + else: + askbot_settings.update('LIMIT_ONE_ANSWER_PER_USER', False) + print "set LIMIT_ONE_ANSWER_PER_USER to %s" % val + +def check_user_answer_limit(): + """Checks if LIMIT_ONE_ANSWER_PER_USER is True, if so, warn the user + and give them an option to turn it off temporarily for the import. + + The import is really messy if we don't allow multiple answers for a user + when translating from Zendesk to Askbot. If the user opts to turn this + off at the beginning of the import it will be turned on automatically when + we're done. + """ + if not askbot_settings.LIMIT_ONE_ANSWER_PER_USER: + return + else: + print + print "*"*64 + print "* WARNING" + print "*"*64 + print "* Your settings are currently limiting users to a single" + print "* answer per question. Zendesk doesn't translate well to" + print "* this. It's highly recommended you let us switch this" + print "* off temporarily while the import proceeds. We'll switch" + print "* it back on when we're done." + print "*" + print "* If you choose not to do this, each additional post on" + print "* a forum topic or additional comment on a ticket will be" + print "* appended as a comment on to the first answer by the user." + print "*" * 64 + prompt = "Okay to turn off the LIMIT_ONE_ANSWER_PER_USER setting?" + response = console.get_yes_or_no(prompt, 'yes') + if response == 'yes': + toggle_user_answer_limit_setting(False) + print + return True + print + class Command(BaseCommand): def handle(self, *args, **kwargs): + """Base handler for command run from command line + + Walks the user through the complete import process. + + Checks the LIMIT_ONE_ANSWER_PER_USER and prompts the user to turn it + OFF if it is currently enabled since imported data will make more sense + with it off temporarily. If it is on prior to the import and they + agree to turn it off, the importer will turn it back on when the import + is complete. + + Prompts user to choose whether they would like to import Forums & + Tickets, Forums only, or Tickets only. Users are automatically imported + as they are required to link new Posts to. Organizations are only used + for looking up the organization name to add the user's profile. + + All content of the required xml files is loaded into separate Zendesk + model tables as an intermediate step. + + When importing Forums, the user is given the choice to choose which + Forums they would like to import content from. + + Tag Filters + The user is able to specify tags to filter on (importing any Forum or + Ticket entries matching ANY of the tags specified). + + Date Filters + The user is able to specify a date range for filtering Forum Posts by + date created. + + These options allow some simple control over potentially large amounts + of data, weeding out noise and otherwise outdated or irrelevant + content. + + :param args: (mixed) positional arguments to command. We require + a single str argument here as the full path to the Zendesk tgz archive + containing the xml files. + """ if len(args) != 1: - raise CommandError('please provide path to tarred and gzipped cnprog dump') + raise CommandError('Please provide the path to the Zendesk tgz archive.') self.tar = tarfile.open(args[0], 'r:gz') - sys.stdout.write('Reading users.xml: ') + # ask what data we are importing + print + print + print "-"*64 + print "This script will attempt to import your Zendesk data into" + print "Askbot. If you are importing into an existing installation," + print "** backup your database before continuing **!" + print "-"*64 + print "You will have a chance to decide if you want to import" + print "tickets, forums, or both. Additional options are presented" + print "to filter the imported content by forum, tag, and date." + print "Users are always imported." + print "-"*64 + + user_answer_limit_reset = check_user_answer_limit() + choices = ['Forums and Tickets', 'Forums Only', 'Tickets Only'] + prompt = "What data do you wish to import from Zendesk?" + data_choice = console.numeric_choice_dialog(prompt, choices=choices) + print + + # read relevant data into temporary tables. We read everything and then + # filter when we actually import into the Askbot tablespace + sys.stdout.write("Reading organizations.xml... ") + self.read_organizations() + sys.stdout.write("Reading users.xml... ") self.read_users() - sys.stdout.write('Reading posts.xml: ') - self.read_posts() - sys.stdout.write('Reading forums.xml: ') - self.read_forums() + if data_choice in [DATA_IMPORT_ALL, DATA_IMPORT_FORUMS]: + sys.stdout.write("Reading forums.xml... ") + self.read_forums() + sys.stdout.write("Reading entries.xml... ") + self.read_entries() + sys.stdout.write("Reading posts.xml... ") + self.read_posts() + if data_choice in [DATA_IMPORT_ALL, DATA_IMPORT_TICKETS]: + sys.stdout.write("Reading tickets.xml... ") + self.read_tickets() + + # forums choices + # --------------------------------------------------------------------- + print + if data_choice in [DATA_IMPORT_ALL, DATA_IMPORT_FORUMS]: + print "="*64 + print " FORUMS" + print "="*64 + forum_choices = self.prompt_for_forums() + forum_tag_choices = self.prompt_for_tags() + (forum_date_filter) = self.prompt_for_date() + # tickets choices + # --------------------------------------------------------------------- + if data_choice in [DATA_IMPORT_ALL, DATA_IMPORT_TICKETS]: + print + print "="*64 + print " TICKETS" + print "="*64 + ticket_tag_choices = self.prompt_for_tags() + (ticket_date_filter) = self.prompt_for_date() + + # import data + # --------------------------------------------------------------------- sys.stdout.write("Importing user accounts: ") self.import_users() - sys.stdout.write("Loading threads: ") - self.import_content() + if data_choice in [DATA_IMPORT_ALL, DATA_IMPORT_FORUMS]: + self.import_forums(forum_choices, forum_tag_choices, forum_date_filter) + if data_choice in [DATA_IMPORT_ALL, DATA_IMPORT_TICKETS]: + self.import_tickets(tags=ticket_tag_choices, date_filter=ticket_date_filter) + + # cleaning up + # --------------------------------------------------------------------- + if user_answer_limit_reset: + toggle_user_answer_limit_setting(True) + print + print "Done!" + print + + + def prompt_for_forums(self): + """Prompt user to select the forums they'd like to import or choose all + of them + + :returns: (list) zendesk_models.Forum objects selected by user + """ + # special case for console.numeric_multiple_choice_dialog 0 = all + ALL_FORUMS = 0 + public_forums = zendesk_models.Forum.objects.filter(is_public='t').order_by('forum_id') + choices = [f.name for f in public_forums] + prompt = "Which forums do you want to import (separate multiple choices by a space)?" + numeric_choices = console.numeric_multiple_choice_dialog(prompt, choices=choices, all_option=True) + if ALL_FORUMS in numeric_choices: + return public_forums + return [public_forums[f-1] for f in numeric_choices] + + def prompt_for_tags(self): + """Prompt user for a space-separated list of tags to filter imported + objects by. + + Tags are case-insensitive for the import and everything is forced to + lowercase. + + :returns: (list) tags specified by user + """ + prompt = "Enter tags separated by spaces to filter by (leave blank for all):" + tags = console.simple_dialog(prompt, required=False) + return tags.split() + + def prompt_for_date(self): + """Prompt user for start and end dates in YYYY-MM-DD format for + filtering imported objects by a date range of when they were created. + + :returns: (tuple) 2 datetime objects representing the start date and + end date respectively to filter by. Either or both of the elements can + also be None indicating no filter is required for that boundary. + """ + start_date = False + end_date = False + while not start_date: + prompt = "Enter earliest date (yyyy-mm-dd) to import content from (leave blank for all):" + start = console.simple_dialog(prompt, required=False).strip() + if start: + try: + start_date = datetime.strptime(start,"%Y-%m-%d") + except ValueError: + print + print "*** Please enter a date in the format YYYY-MM-DD or leave it blank ***" + else: + start_date = None + break + while not end_date: + prompt = "Enter latest date (yyyy-mm-dd) to import content from (leave blank for all):" + end = console.simple_dialog(prompt, required=False).strip() + if end: + try: + end_date = datetime.strptime(end,"%Y-%m-%d") + except ValueError: + print + print "*** Please enter a date in the format YYYY-MM-DD or leave it blank ***" + else: + end_date = None + break + return (start_date, end_date) def get_file(self, file_name): + """Opens file and reads in xml data + + :param file_name: (str) full path to Zendesk export tgz file + + :returns: (obj) etree object for traversing xml element tree + """ first_item = self.tar.getnames()[0] file_path = file_name if not first_item.endswith('.xml'): @@ -171,29 +672,52 @@ class Command(BaseCommand): xml_file = self.tar.extractfile(file_info) return etree.parse(xml_file) - @transaction.commit_manually + @transaction.autocommit def read_xml_file(self, file_name = None, entry_name = None, model = None, fields = None, - extra_field_mappings = None + extra_field_mappings = None, + sub_entities = [] ): + """Reads xml file, parses entries into Zendesk model objects, and saves + them to the database. + + Values are cast to their correct data types. + + Sub-entities are used for extracting an embedded structure from the + element tree into a separate model and table. + + :param file_name: (str) name of xml file, + :param entry_name: (str) name of entries to read from the xml file + :param model: (obj) model where data will be stored + :param fields: (list) field names (str) in xml that will be translated + to model fields by simple substitiution of '_' for '-' + :param extra_field_mappings (tuple) list of two tuples for xml field + names have specific translation that doesn't follow the standard for + the fields parameter + :param sub_entities: (list) of dicts describing fields that should be + treated as separate models (like Ticket.comments). The structure is + similar to this method. Each dict key is the field name to be treated + as a sub-entity. The value is a tuple with (model, [sub-entity fields], + (sub-entity extra_field_mappings)). + [{'comments': ( + zendesk_models.Comment, + ['author-id', 'created-at', 'is-public', 'type', + 'value', 'via-id', 'ticket-id'], + (),) + }] + todo: support blank values vs. nulls for strings """ - * file_name - is name of xml file, - * entry_name - name of entries to read from the xml file - * model - model, which is to receive data - * fields - list of field names in xml that will be translated to model fields - by simple substitiution of '-' with '_' - * extra field mappings - list of two tuples where xml field names are - translated to model fields in a special way - """ + cursor = connection.cursor() + cursor.execute('TRUNCATE TABLE "{0}" CASCADE'.format(model._meta.db_table)) xml = self.get_file(file_name) items_saved = 0 for xml_entry in xml.findall(entry_name): instance = model() for field in fields: - value = get_val(xml_entry, field) + value = get_xml_element_val(xml_entry, field) model_field_name = field.replace('-', '_') max_length = instance._meta.get_field(model_field_name).max_length if value and max_length: @@ -201,16 +725,55 @@ class Command(BaseCommand): setattr(instance, model_field_name, value) if extra_field_mappings: for (field, model_field_name) in extra_field_mappings: - value = get_val(xml_entry, field) + value = get_xml_element_val(xml_entry, field) setattr(instance, model_field_name, value) + + sub_instances = [] + for sub_entity in sub_entities: + for sub_field_name, sub_def in sub_entity.iteritems(): + sub_list = get_xml_element_val(xml_entry, sub_field_name) + sub_model, sub_fields, sub_extra_field_mappings = sub_def + for child in sub_list: + sub_instance = sub_model() + for sub_field in sub_fields: + sub_value = get_xml_element_val(child, sub_field) + sub_model_field_name = sub_field.replace('-', '_') + sub_max_length = sub_instance._meta.get_field(sub_model_field_name).max_length + if sub_value and sub_max_length: + sub_value = sub_value[:sub_max_length] + setattr(sub_instance, sub_model_field_name, sub_value) + sub_instances.append(sub_instance) + instance.save() - transaction.commit() + for si in sub_instances: + # set the parent id + setattr(si, "%s_id" % entry_name, instance.id) + si.save() items_saved += 1 - console.print_action('%d items' % items_saved) - console.print_action('%d items' % items_saved, nowipe = True) + console.print_action('%d' % items_saved) + console.print_action('%d total' % items_saved, nowipe = True) + def read_organizations(self): + """Read Zendesk Organizations from xml file and save them as Zendesk + models + """ + self.read_xml_file( + file_name = 'organizations.xml', + entry_name = 'organization', + model = zendesk_models.Organization, + fields = ( + 'created-at', 'default', 'details', 'external-id', 'group-id', + 'is-shared', 'is-shared-comments', 'name', + 'notes', 'suspended', 'updated-at' + ), + extra_field_mappings = (('id', 'organization_id'),) + ) + def read_users(self): + """Read Zendesk Users from xml file and save them as Zendesk + models + """ self.read_xml_file( file_name = 'users.xml', entry_name = 'user', @@ -221,10 +784,38 @@ class Command(BaseCommand): 'roles', 'time-zone', 'updated-at', 'uses-12-hour-clock', 'email', 'is-verified', 'photo-url' ), - extra_field_mappings = (('id', 'user_id'),) + extra_field_mappings = (('id', 'zendesk_user_id'),) + ) + + def read_entries(self): + """Read Zendesk Entries from xml file and save them as Zendesk + models. + + Entries in Zendesk are top-level posts in a forum. + """ + self.read_xml_file( + file_name = 'entries.xml', + entry_name = 'entry', + model = zendesk_models.Entry, + fields = ( + 'body', 'created-at', 'flag-type-id', 'forum-id', + 'hits', 'entry-id', 'is-highlighted', 'is-locked', 'is-pinned', + 'is-public', 'organization-id', 'position', 'posts-count', + 'submitter-id', 'title', 'updated-at', 'votes-count' + ), + extra_field_mappings = ( + ('id', 'entry_id'), + ('current-tags', 'tags'), + ) ) def read_posts(self): + """Read Zendesk Posts from xml file and save them as Zendesk + models. + + Posts in Zendesk are children of Entries. They are like replies + on a top-level forum post. + """ self.read_xml_file( file_name = 'posts.xml', entry_name = 'post', @@ -239,6 +830,24 @@ class Command(BaseCommand): ) def read_forums(self): + """Read Zendesk Forums from xml file and save them as Zendesk + models. + + Forums in Zendesk are category groupings for forum posts. They + do not have any "posts" themselves, but have Entries. Entries + then have Posts: + - Forum + - Entry + - Post + - Post + - Entry + - Post + - Forum + - Entry + - Post + ... + ... + """ self.read_xml_file( file_name = 'forums.xml', entry_name = 'forum', @@ -256,36 +865,75 @@ class Command(BaseCommand): extra_field_mappings = (('id', 'forum_id'),) ) - @transaction.commit_manually + def read_tickets(self): + """Read Zendesk Tickets from xml file and save them as Zendesk + models. + + This is a little more complex in that we want to read the Comments + as well which are child elements on the ticket. We define this with the + sub_entities parameter. + """ + self.read_xml_file( + file_name = 'tickets.xml', + entry_name = 'ticket', + model = zendesk_models.Ticket, + fields = ( + 'assigned-at', 'assignee-id', 'base-score', 'created-at', + 'current-collaborators','current-tags','description', + 'due-date', 'entry-id', 'external-id', 'group-id', + 'initially-assigned-at', 'latest-recipients', + 'organization-id', 'original-recipient-address', 'priority-id', + 'recipient', 'requester-id', 'resolution-time', 'solved-at', + 'status-id', 'status-updated-at', 'subject', 'submitter-id', + 'ticket-type-id', 'updated-at', 'updated-by-type-id', 'via-id', + 'score', 'problem-id', 'has-incidents' + ), + extra_field_mappings = (('nice-id', 'ticket_id'),), + sub_entities = [ + {'comments': ( + zendesk_models.Comment, + ['author-id', 'created-at', 'is-public', 'type', 'value', + 'via-id', 'ticket-id'], + None + ) + } + ] + ) + + @transaction.autocommit def import_users(self): + """Creates new Askbot users for each zendesk_models.User. + + For each Zendesk user, see if there are any matching Askbot users + with the same email. If not, create a new Askbot user and copy + over any openauth id info as well. + + See create_askbot_user() for a full list of fields that are copied over + from Zendesk. + """ added_users = 0 for zd_user in zendesk_models.User.objects.all(): - #a whole bunch of fields are actually dropped now - #see what's available in users.xml meanings of some - #values there is not clear - - #if email is blank, just create a new user + # if email is blank, just create a new user if zd_user.email == '': ab_user = create_askbot_user(zd_user) - if ab_user in None: - print 'Warning: could not create user %s ' % zd_user.name + # todo: check for failure? + if ab_user is None: continue + added_users += 1 console.print_action(ab_user.username) else: - #else see if user with the same email already exists - #and only create new askbot user if email is not yet in the - #database + # create new user if no matching user email was found try: ab_user = askbot_models.User.objects.get(email = zd_user.email) except askbot_models.User.DoesNotExist: ab_user = create_askbot_user(zd_user) if ab_user is None: continue - console.print_action(ab_user.username, nowipe = True) added_users += 1 + console.print_action("%d %s" % (added_users, ab_user.username)) zd_user.askbot_user_id = ab_user.id zd_user.save() - + # save open auth info as well. if zd_user.openid_url != None and \ 'askbot.deps.django_authopenid' in settings.INSTALLED_APPS: from askbot.deps.django_authopenid.models import UserAssociation @@ -298,35 +946,197 @@ class Command(BaseCommand): ) assoc.save() except: - #drop user association + # unsupported provider pass - transaction.commit() console.print_action('%d users added' % added_users, nowipe = True) - @transaction.commit_manually - def import_content(self): - thread_ids = zendesk_models.Post.objects.values_list( - 'entry_id', - flat = True - ).distinct() - threads_posted = 0 - for thread_id in thread_ids: - thread_entries = zendesk_models.Post.objects.filter( - entry_id = thread_id - ).order_by('created_at') - question_post = thread_entries[0] - question = post_question(question_post) - question_post.is_processed = True - question_post.save() - transaction.commit() - entry_count = thread_entries.count() - threads_posted += 1 - console.print_action(str(threads_posted)) - if entry_count > 1: - for answer_post in thread_entries[1:]: - post_answer(answer_post, question = question) - answer_post.is_processed = True - answer_post.save() - transaction.commit() - console.print_action(str(threads_posted), nowipe = True) + @transaction.autocommit + def _import_posts(self, question, entry): + """Create Askbot answers from Zendesk Entries. + + :param question: (obj) Askbot Post object with post_type='question' + :param entry: (obj) Zendesk Entry object + """ + for post in zendesk_models.Post.objects.filter( + entry_id=entry.entry_id + ).order_by('created_at'): + # create answers + answer = post_answer(post, question=question) + if not answer: + continue + post.ab_id = answer.id + post.save() + + @transaction.autocommit + def _import_entry(self, entry): + """Create an Askbot question and answers from a Zendesk Entry + + :param entry: (obj) Zendesk Entry object + + :returns: (bool) True if Entry (and Posts linked to the Entry) were + posted successfully. False if not. + """ + question = post_question(entry) + if not question: + return + entry.ab_id = question.id + entry.save() + self._import_posts(question, entry) + return True + + def import_forums(self, forums, tags, date_filter): + """Import Zendesk forums into Askbot. Create questions from Zendesk + Entries and answers from Zendesk Posts. + + :param forums: (list) zendesk_models.Forum objects to import + :param tags: (list) tags (str) to filter Zendesk Forum Entries by. + Entries that match ANY of the tags will be posted as questions. Tags + are case-insensitive in this import regardless of your settings in + Askbot. + :param date_filter: (tuple) two-element tuple representing the start + date and end date to filter Zendesk Forum Entries by date range. The + tuple values are datetime objects or None. + """ + if tags: + print "Filtering forum posts by tags: %s" % tags + if date_filter: + print "Filtering forum post by dates between %s and %s" % (date_filter[0], date_filter[1]) + print "Importing forums... " + print "="*64 + for forum in forums: + thread_count = 0 + # don't import private forums, forums restricted to organizations + # or forums that require login (comment this out if you don't care, + # or modify the viewable_to_public() method for zendesk_models.Forum) + if not forum.viewable_to_public(): + console.print_action("Skipping private forum \"%s\"" % forum.name, + nowipe = True) + continue + sys.stdout.write("[#%d] %s: " % (forum.forum_id, forum.name)) + for entry in zendesk_models.Entry.objects.filter(forum_id=forum.forum_id): + # filters + # if provided, only post entries matching ANY of the tags + if not self._matches_tag_filter(entry.tags, tags): + continue + if not self._matches_date_filter(entry.created_at, date_filter): + continue + if self._import_entry(entry): + thread_count += 1 + console.print_action("%d threads" % thread_count) + console.print_action("%d total threads" % thread_count, nowipe = True) + + @transaction.autocommit + def _import_comments(self, question, ticket): + """Import Zendesk Ticket Comments into Askbot as answers. + + :param question: (obj) askbot Post object with post_type='question' to + create the answers for. + :param ticket: (obj) zendesk_models.Ticket object to pull the comments + from for creating answers. + """ + first = True + i=0 + for comment in zendesk_models.Comment.objects.filter( + ticket_id=ticket.ticket_id, is_public=True + ).order_by('created_at'): + # create answers, first comment is a copy of the one on the ticket + if first: + first = False + continue + i+=1 + answer = post_answer_from_comment(comment, question=question) + if not answer: + continue + comment.ab_id = answer.id + comment.save() + + @transaction.autocommit + def import_tickets(self, tags, date_filter): + """Import Zendesk Tickets into Askbot as questions. + + :param tags: (list) tags (str) to filter Zendesk Tickets by. + Tickets that match ANY of the tags will be posted as questions. Tags + are case-insensitive in this import regardless of your settings in + Askbot. + :param date_filter: (tuple) two-element tuple representing the start + date and end date to filter Zendesk Tickets by date range. The + tuple values are datetime objects or None. The date_filter is matched + against Ticket.created_at. + """ + # todo: optimmize with smart query + # Ticket.objects.get( + # Q(created_at__gt=date_filter[0]), + # Q(created_at__lt=date_filter[1]), + # Q(tags__icontains='foo') | Q(tags__icontains='bar') + # ) + if tags: + print "Filtering tickets by tags: %s" % tags + if date_filter: + print "Filtering tickets by dates between %s and %s" % (date_filter[0], date_filter[1]) + sys.stdout.write("Importing tickets: ") + ticket_count = 0 + for ticket in zendesk_models.Ticket.objects.all(): + # filters + # if provided, only post entries matching ANY of the tags + if not self._matches_tag_filter(ticket.current_tags, tags): + continue + if not self._matches_date_filter(ticket.created_at, date_filter): + continue + question = post_question_from_ticket(ticket) + if not question: + continue + ticket.ab_id = question.id + ticket.save() + self._import_comments(question, ticket) + ticket_count += 1 + console.print_action("%d tickets" % ticket_count) + console.print_action("%d total tickets" % ticket_count, nowipe = True) + + def _matches_tag_filter(self, item_tags, tag_filter): + """Determine if an item's tags satisfy the tag filter. The comparison + is case-insensitive. + + :param item_tags: (str) space-separated string of tags associated with + the item. + + :param filter_tags: (str) space-separated string of tags being filtered + for. + + :returns: (bool) True if ANY of the tags in item_tags match the tags + in tag_filter. False if no matches are found. + """ + if not tag_filter: + return True + if not item_tags: + return False + item_tags_list = item_tags.lower().split() + for t in tag_filter: + if t.lower() in item_tags_list: + return True + return False + + def _matches_date_filter(self, item_date, date_filter): + """determine if an item's datetime stamp satisfies the date filter. + + :param item_date: (datetime) generally the item's created_at datetime + object + + :param date_filter: (tuple) pair of datetime objects representing the + start and end dates to filter items by. If the first object is None, + then the filter implies all items before the second datetime. Conversely, + if the second datetime is None, the filter implies all items after the + first datetime. + + :returns: (bool) True if date_filter is an empty tuple OR the item_date + falls within the date_filter tuple. + """ + if not date_filter: + return True + start_date = date_filter[0] + end_date = date_filter[1] + if not start_date: + start_date = datetime.min + if not end_date: + end_date = datetime.max + return item_date > start_date and item_date < end_date diff --git a/askbot/importers/zendesk/models.py b/askbot/importers/zendesk/models.py index 6a321915..da16bb51 100644 --- a/askbot/importers/zendesk/models.py +++ b/askbot/importers/zendesk/models.py @@ -4,9 +4,64 @@ from django.contrib.auth.models import User as DjangoUser from django.utils.html import strip_tags from askbot.utils.html import unescape -TAGS = {}#internal cache for mappings forum id -> forum name +TAGS = {}#internal cache for mappings forum id _> forum name + +# todo: don't allow nulls in char fields that should just allow empty strings + +class Entry(models.Model): + """ + Top level topic posts in a forum + """ + body = models.TextField() + created_at = models.DateTimeField() + tags = models.CharField(max_length = 255, null = True) + flag_type_id = models.IntegerField() # topic type + forum_id = models.IntegerField() # forum entry is in + hits = models.IntegerField(null = True) # number of views + entry_id = models.IntegerField() + is_highlighted = models.BooleanField(default = False) # ignored + is_locked = models.BooleanField(default = False) # close + is_pinned = models.BooleanField(default = False) # ignored + is_public = models.BooleanField(default = True) + organization_id = models.IntegerField(null = True) + position = models.IntegerField(null = True) # ignored + posts_count = models.IntegerField(null = True) + submitter_id = models.IntegerField() + title = models.CharField(max_length = 300) + updated_at = models.DateTimeField() + votes_count = models.IntegerField(null = True, default = 0) + ab_id = models.IntegerField(null = True) + + def get_author(self): + """returns author of the post, from the Django user table""" + zendesk_user = User.objects.get(zendesk_user_id = self.submitter_id) + return DjangoUser.objects.get(id = zendesk_user.askbot_user_id) + + def get_body_text(self): + """unescapes html entities in the body text, + saves in the internal cache and returns the value""" + if not hasattr(self, '_body_text'): + self._body_text = unescape(self.body) + return self._body_text + + def get_tag_names(self): + """return tags on entry as well as forum title as a tag""" + # if self.forum_id not in TAGS: + # forum = Forum.objects.get(forum_id = self.forum_id) + # tag_name = re.sub(r'\s+', '_', forum.name.lower()) + # TAGS[self.forum_id] = tag_name + # tags = TAGS[self.forum_id] + # if self.tags: + # tags += " %s" % self.tags + if not self.tags: + return "forum" + else: + return "forum %s" % self.tags.lower() class Post(models.Model): + """ + comments on an Entry in a Forum + """ body = models.TextField() created_at = models.DateTimeField() updated_at = models.DateTimeField() @@ -15,11 +70,11 @@ class Post(models.Model): forum_id = models.IntegerField() user_id = models.IntegerField() is_informative = models.BooleanField() - is_processed = models.BooleanField(default = False) + ab_id = models.IntegerField(null = True) def get_author(self): """returns author of the post, from the Django user table""" - zendesk_user = User.objects.get(user_id = self.user_id) + zendesk_user = User.objects.get(zendesk_user_id = self.user_id) return DjangoUser.objects.get(id = zendesk_user.askbot_user_id) def get_body_text(self): @@ -29,31 +84,31 @@ class Post(models.Model): self._body_text = unescape(self.body) return self._body_text - def get_fake_title(self): - """extract first 10 words from the body text and strip tags""" - words = re.split(r'\s+', self.get_body_text()) - if len(words) > 10: - words = words[:10] - return strip_tags(' '.join(words)) - - def get_tag_name(self): - if self.forum_id not in TAGS: - forum = Forum.objects.get(forum_id = self.forum_id) - tag_name = re.sub(r'\s+', '-', forum.name.lower()) - TAGS[self.forum_id] = tag_name - return TAGS[self.forum_id] +class Organization(models.Model): + created_at = models.DateTimeField() + default = models.CharField(max_length = 255, null=True) + details = models.TextField(null=True) + external_id = models.IntegerField(null = True) + group_id = models.IntegerField(null = True) + organization_id = models.IntegerField(unique=True) + is_shared = models.BooleanField() + is_shared_comments = models.BooleanField() + name = models.CharField(max_length = 255) + notes = models.TextField(null=True) + suspended = models.BooleanField() + updated_at = models.DateTimeField() class User(models.Model): - user_id = models.IntegerField() + zendesk_user_id = models.IntegerField() askbot_user_id = models.IntegerField(null = True) created_at = models.DateTimeField() is_active = models.BooleanField() last_login = models.DateTimeField(null = True) name = models.CharField(max_length = 255) openid_url = models.URLField(null = True) - organization_id = models.IntegerField(null = True) phone = models.CharField(max_length = 32, null = True) restriction_id = models.IntegerField() + organization_id = models.IntegerField(null=True) roles = models.IntegerField() time_zone = models.CharField(max_length = 255) updated_at = models.DateTimeField() @@ -61,6 +116,10 @@ class User(models.Model): email = models.EmailField(null = True) is_verified = models.BooleanField() photo_url = models.URLField() + # can't use foreign keys because Zendesk doesn't necessarily remove + # the user's organization_id if it's deleted which then causes an + # integrity error when trying to import here + # organization = models.ForeignKey(Organization, to_field='organization_id', null=True) class Forum(models.Model): description = models.CharField(max_length = 255, null = True) @@ -76,3 +135,98 @@ class Forum(models.Model): use_for_suggestions = models.BooleanField() visibility_restriction_id = models.IntegerField() is_public = models.BooleanField() + + def viewable_to_public(self): + """There are two ways to restrict visibility of the forum. If is_public + is False, then it's not public, duh. But for + visibility_restriction_id: + 1=viewable to everyone + 2=viewable to logged in users only + 3=viewable to logged in agents only + organization_id: + if not null, this forum is restricted to a specific organization + on top of other restrictions + """ + if (not self.is_public or self.visibility_restriction_id != 1 or + self.organization_id): + return False + else: + return True + +class Ticket(models.Model): + """todo: custom fields""" + assigned_at = models.DateTimeField(null=True) + assignee_id = models.IntegerField(null=True) + base_score = models.IntegerField() + created_at = models.DateTimeField() + current_collaborators = models.CharField(max_length = 255, null=True) + current_tags = models.CharField(max_length = 255, null=True) + description = models.CharField(max_length = 1000, null=True) + due_date = models.DateTimeField(null=True) + entry_id = models.IntegerField(null = True) + external_id = models.IntegerField(null = True) + group_id = models.IntegerField(null = True) + initially_assigned_at = models.DateTimeField(null=True) + latest_recipients = models.CharField(max_length = 255, null = True) + ticket_id = models.IntegerField() + organization_id = models.IntegerField(null = True) + original_recipient_address = models.CharField(max_length = 255, null = True) + priority_id = models.IntegerField() + recipient = models.CharField(max_length = 255, null=True) + requester_id = models.IntegerField() + resolution_time = models.IntegerField(null = True) + solved_at = models.DateTimeField(null=True) + status_id = models.IntegerField() + status_updated_at = models.DateTimeField() + subject = models.CharField(max_length = 255, null=True) + submitter_id = models.IntegerField() + ticket_type_id = models.IntegerField() + updated_at = models.DateTimeField() + updated_by_type_id = models.IntegerField(null = True) + via_id = models.IntegerField() + score = models.IntegerField() + problem_id = models.IntegerField(null = True) + has_incidents = models.BooleanField(default = False) + ab_id = models.IntegerField(null = True) + + def get_author(self): + """returns author of the comment, from the Django user table""" + zendesk_user = User.objects.get(zendesk_user_id = self.requester_id) + return DjangoUser.objects.get(id = zendesk_user.askbot_user_id) + + def get_body_text(self): + """unescapes html entities in the body text, + saves in the internal cache and returns the value""" + if not hasattr(self, '_body_text'): + self._body_text = unescape(self.description) + return self._body_text + + def get_tag_names(self): + if not self.current_tags: + return "ticket" + else: + return "ticket %s" % self.current_tags.lower() + +class Comment(models.Model): + """todo: attachments""" + author_id = models.IntegerField() + created_at = models.DateTimeField() + is_public = models.BooleanField(default = True) + type = models.CharField(max_length = 255) + value = models.CharField(max_length = 1000) + via_id = models.IntegerField() + ticket_id = models.IntegerField() + ab_id = models.IntegerField(null = True) + + def get_author(self): + """returns author of the comment, from the Django user table""" + zendesk_user = User.objects.get(zendesk_user_id = self.author_id) + return DjangoUser.objects.get(id = zendesk_user.askbot_user_id) + + def get_body_text(self): + """unescapes html entities in the body text, + saves in the internal cache and returns the value""" + if not hasattr(self, '_body_text'): + self._body_text = unescape(self.value) + return self._body_text + |