diff options
author | Evgeny Fadeev <evgeny.fadeev@gmail.com> | 2010-11-21 22:20:45 -0500 |
---|---|---|
committer | Evgeny Fadeev <evgeny.fadeev@gmail.com> | 2010-11-21 22:20:45 -0500 |
commit | ee9335d17ca228778123a1968ceccb312863c022 (patch) | |
tree | 4fdfc9364478b9ae0299826e7ad8a0fead9396f0 | |
parent | e811e9d5b152c01428a954a44bbab348fbe374e8 (diff) | |
download | askbot-ee9335d17ca228778123a1968ceccb312863c022.tar.gz askbot-ee9335d17ca228778123a1968ceccb312863c022.tar.bz2 askbot-ee9335d17ca228778123a1968ceccb312863c022.zip |
added rename_tags, rename_tags_id and delete_unused_tags management commands
-rw-r--r-- | askbot/__init__.py | 2 | ||||
-rw-r--r-- | askbot/api.py | 29 | ||||
-rw-r--r-- | askbot/const/__init__.py | 3 | ||||
-rw-r--r-- | askbot/deployment/__init__.py | 12 | ||||
-rw-r--r-- | askbot/deployment/dialogs.py | 20 | ||||
-rw-r--r-- | askbot/deployment/path_utils.py | 14 | ||||
-rw-r--r-- | askbot/importers/stackexchange/management/commands/load_stackexchange.py | 15 | ||||
-rw-r--r-- | askbot/management/commands/create_thousand_tags.py | 18 | ||||
-rw-r--r-- | askbot/management/commands/delete_unused_tags.py | 39 | ||||
-rw-r--r-- | askbot/management/commands/rename_tags.py | 151 | ||||
-rw-r--r-- | askbot/management/commands/rename_tags_id.py | 176 | ||||
-rw-r--r-- | askbot/models/__init__.py | 2 | ||||
-rw-r--r-- | askbot/models/question.py | 18 | ||||
-rw-r--r-- | askbot/utils/console.py | 23 |
14 files changed, 486 insertions, 36 deletions
diff --git a/askbot/__init__.py b/askbot/__init__.py index 823fec97..e3b1f1a2 100644 --- a/askbot/__init__.py +++ b/askbot/__init__.py @@ -19,4 +19,4 @@ def get_version(): """returns version of the askbot app this version is meaningful for pypi only """ - return '0.6.34' + return '0.6.35' diff --git a/askbot/api.py b/askbot/api.py index c79f29dd..8b788016 100644 --- a/askbot/api.py +++ b/askbot/api.py @@ -4,6 +4,7 @@ in the askbot.models module, but api must become a place to manupulate the data in the askbot application so that other implementations of the data storage could be possible """ +from django.db.models import Q from askbot import models from askbot import const @@ -32,3 +33,31 @@ def get_info_on_moderation_items(user): 'seen_count': seen_count, 'new_count': new_count } + +def get_admin(seed_user_id = None): + """returns user objects with id == seed_user_id + if the user with that id is not an administrator, + the function will try to find another admin or moderator + who has the smallest user id + + if the user is not found, or there are no moderators/admins + User.DoesNotExist will be raised + + The reason this function is here and not on a manager of + the user object is because we still patch the django-auth User table + and it's probably better not to patch the manager + """ + + if seed_user_id: + user = models.User.objects.get(id = seed_user_id)#let it raise error here + if user.is_administrator() or user.is_moderator(): + return user + try: + return models.User.objects.filter( + Q(is_superuser=True) | Q(status='m') + ).order_by('id')[0] + except IndexError: + raise models.User.DoesNotExist( + """Please add a moderator or an administrator to the forum first + there don't seem to be any""" + ) diff --git a/askbot/const/__init__.py b/askbot/const/__init__.py index 592b421e..eb1ba23d 100644 --- a/askbot/const/__init__.py +++ b/askbot/const/__init__.py @@ -80,7 +80,8 @@ UNANSWERED_QUESTION_MEANING_CHOICES = ( #however it will be hard to expect that people will type #correct regexes - plus this must be an anchored regex #to do full string match -TAG_REGEX = r'^[\w\+\.\-#]+$' +TAG_CHARS = '\w\+\.\-#' +TAG_REGEX = r'^[%s]+$' % TAG_CHARS TAG_SPLIT_REGEX = r'[ ,]+' TYPE_ACTIVITY_ASK_QUESTION=1 diff --git a/askbot/deployment/__init__.py b/askbot/deployment/__init__.py index 9f4ebaf9..59bb1a03 100644 --- a/askbot/deployment/__init__.py +++ b/askbot/deployment/__init__.py @@ -2,8 +2,8 @@ module for deploying askbot """ import os.path +from askbot.utils import console from askbot.deployment import messages -from askbot.deployment import dialogs from askbot.deployment import path_utils def startforum(): @@ -41,9 +41,10 @@ def startforum(): { 'path': directory } - should_add_app = dialogs.multiple_choice_input( + should_add_app = console.choice_dialog( message, - options = ['yes','no'] + choices = ['yes','no'], + invalid_phrase = messages.INVALID_INPUT ) if should_add_app == 'yes': assert(create_new == False) @@ -71,9 +72,10 @@ def startforum(): continue else: message = messages.format_msg_create(directory) - should_create_new = dialogs.multiple_choice_input( + should_create_new = console.choice_dialog( message, - options = ['yes','no'] + choices = ['yes','no'], + invalid_phrase = messages.INVALID_INPUT ) if should_create_new == 'yes': if path_utils.dir_name_acceptable(directory): diff --git a/askbot/deployment/dialogs.py b/askbot/deployment/dialogs.py deleted file mode 100644 index 40f0d2ee..00000000 --- a/askbot/deployment/dialogs.py +++ /dev/null @@ -1,20 +0,0 @@ -"""functions that directly handle user input -""" -from askbot.deployment import messages -import time - -def multiple_choice_input(prompt_phrase, options = None): - """prints a prompt, accepts keyboard input - and makes sure that user response is one of given - in the options argument, which is required - and must be a list - """ - assert(isinstance(options, list)) - while 1: - response = raw_input('\n%s (type %s): ' % (prompt_phrase, '/'.join(options))) - if response in options: - return response - else: - opt_string = ','.join(options) - print messages.INVALID_INPUT % {'opt_string': opt_string} - time.sleep(1) diff --git a/askbot/deployment/path_utils.py b/askbot/deployment/path_utils.py index 71e66182..eb32e4e5 100644 --- a/askbot/deployment/path_utils.py +++ b/askbot/deployment/path_utils.py @@ -115,11 +115,19 @@ def deploy_into(directory, new_project = None): assert(new_project is not None) if new_project: copy_files = ('__init__.py', 'settings.py', 'manage.py', 'urls.py') - print 'copying files: ', + blank_files = ('__init__.py', 'manage.py') + print 'Copying files: ' for file_name in copy_files: src = os.path.join(SOURCE_DIR, 'setup_templates', file_name) - print '%s ' % file_name, - shutil.copy(src, directory) + if os.path.exists(os.path.join(directory, file_name)): + if file_name in blank_files: + continue + else: + print '* %s' % file_name, + print "- you already have one, please add contents of %s" % src + else: + print '* %s ' % file_name + shutil.copy(src, directory) #copy log directory src = os.path.join(SOURCE_DIR, 'setup_templates', 'log') dst = os.path.join(directory, 'log') diff --git a/askbot/importers/stackexchange/management/commands/load_stackexchange.py b/askbot/importers/stackexchange/management/commands/load_stackexchange.py index a813716a..fb3b520f 100644 --- a/askbot/importers/stackexchange/management/commands/load_stackexchange.py +++ b/askbot/importers/stackexchange/management/commands/load_stackexchange.py @@ -5,7 +5,7 @@ import re import sys import askbot.importers.stackexchange.parse_models as se_parser from xml.etree import ElementTree as et -from django.db import models +from django.db import models, transaction import askbot.models as askbot import askbot.deps.django_authopenid.models as askbot_openid import askbot.importers.stackexchange.models as se @@ -271,6 +271,7 @@ class Command(BaseCommand): help = 'Loads StackExchange data from unzipped directory of XML files into the ASKBOT database' args = 'se_dump_dir' + @transaction.commit_manually def handle(self, *arg, **kwarg): if len(arg) < 1 or not os.path.isdir(arg[0]): print 'Error: first argument must be a directory with all the SE *.xml files' @@ -282,6 +283,7 @@ class Command(BaseCommand): xml_path = self.get_xml_path(xml) table_name = self.get_table_name(xml) self.load_xml_file(xml_path, table_name) + transaction.commit() #this is important so that when we clean up messages #automatically generated by the procedures below @@ -296,35 +298,46 @@ class Command(BaseCommand): print 'Transferring users...', sys.stdout.flush() self.transfer_users() + transaction.commit() print 'done.' print 'Transferring content edits...', sys.stdout.flush() self.transfer_question_and_answer_activity() + transaction.commit() print 'done.' print 'Transferring view counts...', sys.stdout.flush() self.transfer_question_view_counts() + transaction.commit() print 'done.' print 'Transferring comments...', sys.stdout.flush() self.transfer_comments() + transaction.commit() print 'done.' print 'Transferring badges and badge awards...', sys.stdout.flush() self.transfer_badges() + transaction.commit() print 'done.' print 'Transferring votes...', sys.stdout.flush() self.transfer_votes()#includes favorites, accepts and flags + transaction.commit() print 'done.' self.cleanup_messages()#delete autogenerated messages + transaction.commit() self.transfer_messages() + transaction.commit() #todo: these are not clear how to go about self.transfer_update_subscriptions() + transaction.commit() self.transfer_tag_preferences() + transaction.commit() self.transfer_meta_pages() + transaction.commit() def save_askbot_message_id_list(self): id_list = list(DjangoMessage.objects.all().values('id')) diff --git a/askbot/management/commands/create_thousand_tags.py b/askbot/management/commands/create_thousand_tags.py new file mode 100644 index 00000000..05c77f46 --- /dev/null +++ b/askbot/management/commands/create_thousand_tags.py @@ -0,0 +1,18 @@ +from django.core.management.base import NoArgsCommand +from django.db import transaction +from askbot import models +import sys + +class Command(NoArgsCommand): + @transaction.commit_manually + def handle_noargs(self, **options): + user = models.User.objects.get(id=2) + for i in xrange(1000): + name = 'tag' + str(i) + models.Tag.objects.create( + name = name, + created_by = user + ) + if i % 1000 == 0: + transaction.commit() + transaction.commit() diff --git a/askbot/management/commands/delete_unused_tags.py b/askbot/management/commands/delete_unused_tags.py new file mode 100644 index 00000000..e5e340d0 --- /dev/null +++ b/askbot/management/commands/delete_unused_tags.py @@ -0,0 +1,39 @@ +from django.core.management.base import NoArgsCommand +from django.db import transaction +from askbot import models +import sys + +class Command(NoArgsCommand): + @transaction.commit_manually + def handle_noargs(self, **options): + tags = models.Tag.objects.all() + count = 0 + print "Searching for unused tags:", + total = tags.count() + deleted_tags = list() + for tag in tags: + if tag.questions.all().count() == 0: + deleted_tags.append(tag.name) + tag.delete() + transaction.commit() + count += 1 + sys.stdout.write('%6.2f%%' % (100*float(count)/float(total))) + sys.stdout.flush() + sys.stdout.write('\b'*7) + sys.stdout.write('\n') + + if deleted_tags: + found_count = len(deleted_tags) + if found_count == 1: + print "Found an unused tag %s" % deleted_tags[0] + else: + sys.stdout.write("Found %d unused tags" % found_count) + if found_count > 50: + print ", first 50 are:", + print ', '.join(deleted_tags[:50]) + '.' + else: + print ": " + ', '.join(deleted_tags) + '.' + print "Deleted." + else: + print "Did not find any." + diff --git a/askbot/management/commands/rename_tags.py b/askbot/management/commands/rename_tags.py new file mode 100644 index 00000000..bb7e54e2 --- /dev/null +++ b/askbot/management/commands/rename_tags.py @@ -0,0 +1,151 @@ +"""management command that renames a tag or merges +it to another, all corresponding questions are automatically +retagged +""" +import sys +from optparse import make_option +from django.db import transaction +from django.core import management +from django.core.management.base import BaseCommand, CommandError +from askbot import api, models +from askbot.utils import console + +def get_admin(seed_user_id = None): + """requests admin with an optional seeded user id + """ + try: + admin = api.get_admin(seed_user_id = seed_user_id) + except models.User.DoesNotExist, e: + raise CommandError(e) + + if admin.id != seed_user_id: + if seed_user_id is None: + prompt = """You have not provided user id for the moderator +who to assign as the performer this operation, the default moderator is +%s, id=%s. Will that work?""" % (admin.username, admin.id) + else: + prompt = """User with id=%s is not a moderator +would you like to select default moderator %s, id=%d +to run this operation?""" % (seed_user_id, admin.username, admin.id) + choice = console.choice_dialog(prompt, choices = ('yes', 'no')) + if choice == 'no': + print 'Canceled' + sys.exit() + return admin + +def parse_tag_names(input): + decoded_input = input.decode(sys.stdin.encoding) + return set(decoded_input.strip().split(' ')) + +def format_tag_ids(tag_list): + return ' '.join([str(tag.id) for tag in tag_list]) + +class Command(BaseCommand): + "The command object itself" + + help = """Retags questions tagged with <from_names> to <to_names>. + +If in the end some tags end up being unused, they are automatically removed. +Tag names are case sensitive, non-ascii characters are also accepted. + +* if --user-id is provided, it will be used to set the user performing the operation +* The user must be either administrator or moderator +* if --user-id is not given, the earliest active site administrator will be assigned + +Both --to and --from arguments accept multiple tags, but the argument must be quoted +in that case (e.g. --from="raw material" --to="raw-material"), thus tags +can be renamed, merged or split. It is highly recommended to first inspect the +list of questions that are to be affected before running this operation. + +The tag rename operation cannot be undone, but the command will +ask you to confirm your action before making changes. + """ + option_list = BaseCommand.option_list + ( + make_option('--from', + action = 'store', + type = 'str', + dest = 'from', + default = None, + help = 'list of tag names which needs to be replaced' + ), + make_option('--to', + action = 'store', + type = 'str', + dest = 'to', + default = None, + help = 'list of tag names that are to be used instead' + ), + make_option('--user-id', + action = 'store', + type = 'int', + dest = 'user_id', + default = None, + help = 'id of the user who will be marked as a performer of this operation' + ), + ) + + #@transaction.commit_manually + def handle(self, *args, **options): + """command handle function. reads tag names, decodes + them using the standard input encoding and attempts to find + the matching tags + + If "from" tags are not resolved, command fails + if one of "to" tag is not resolved, a new tag is created + + The data of tag id's is then delegated to the command "rename_tag_id" + """ + if options['from'] is None: + raise CommandError('the --from argument is required') + if options['to'] is None: + raise CommandError('the --to argument is required') + from_tag_names = parse_tag_names(options['from']) + to_tag_names = parse_tag_names(options['to']) + + in_both = from_tag_names & to_tag_names + if in_both: + in_both_str = u' '.join(in_both) + if len(in_both) > 1: + error_message = 'Tags %s appear to be ' % in_both_str + else: + error_message = 'Tag %s appears to be ' % in_both_str + raise CommandError(error_message + 'in both --from and --to sets') + + from_tags = list() + try: + for tag_name in from_tag_names: + from_tags.append(models.Tag.objects.get(name = tag_name)) + except models.Tag.DoesNotExist: + error_message = u"""tag %s was not found. It is possible that the tag +exists but we were not able to match it's unicode value +or you may have misspelled the tag. Please remember that +tag names are case sensitive. + +Also, you can try command "rename_tag_id" +""" % tag_name + raise CommandError(error_message) + except models.Tag.MultipleObjectsReturned: + raise CommandError(u'found more than one tag named %s' % from_tag_name) + + admin = get_admin(seed_user_id = options['user_id']) + + to_tags = list() + for tag_name in to_tag_names: + try: + to_tags.append(models.Tag.objects.get(name = tag_name)) + except models.Tag.DoesNotExist: + to_tags.append( + models.Tag.objects.create( + name = tag_name, + created_by = admin + ) + ) + except models.Tag.MultipleObjectsReturned: + raise CommandError(u'found more than one tag named %s' % tag_name) + #transaction.commit() + + options['user_id'] = admin.id + options['from'] = format_tag_ids(from_tags) + options['to'] = format_tag_ids(to_tags) + + management.call_command('rename_tags_id', *args, **options) diff --git a/askbot/management/commands/rename_tags_id.py b/askbot/management/commands/rename_tags_id.py new file mode 100644 index 00000000..6e67b441 --- /dev/null +++ b/askbot/management/commands/rename_tags_id.py @@ -0,0 +1,176 @@ +"""management command that transfer tag usage data from +one tag to another and deletes the "from" tag + +both "from" and "to" tags are identified by id + +also, corresponding questions are retagged +""" +import re +import sys +from optparse import make_option +from django.core.management.base import BaseCommand, CommandError +from django.db import transaction +from askbot import const, models +from askbot.utils import console +from askbot.management.commands.rename_tags import get_admin + +def get_tags_by_ids(tag_ids): + tags = list() + for tag_id in tag_ids: + try: + tags.append(models.Tag.objects.get(id = tag_id)) + except models.Tag.DoesNotExist: + raise CommandError('tag with id=%s not found' % tag_id) + return tags + +def get_similar_tags_from_strings(tag_strings, tag_name): + """returns a list of tags, similar to tag_name from a set of questions""" + + grab_pattern = r'\b([%(ch)s]*%(nm)s[%(ch)s]*)\b' % \ + {'ch': const.TAG_CHARS, 'nm': from_tag.name} + grab_re = re.compile(grab_pattern, RE.IGNORECASE) + + similar_tags = set() + for tag_string in tag_strings: + similar_tags.update( + grab_re.findall(tag_string) + ) + return similar_tags + +def parse_tag_ids(input): + input = input.strip().split(' ') + return set([int(i) for i in input]) + +def get_tag_names(tag_list): + return set([tag.name for tag in tag_list]) + +def format_tag_name_list(tag_list): + name_list = get_tag_names(tag_list) + return u', '.join(name_list) + +class Command(BaseCommand): + "The command object itself" + + help = """Retags questions from one set of tags to another, like +rename_tags, but using tag id's + + +""" + option_list = BaseCommand.option_list + ( + make_option('--from', + action = 'store', + type = 'str', + dest = 'from', + default = None, + help = 'list of tag IDs which needs to be replaced' + ), + make_option('--to', + action = 'store', + type = 'str', + dest = 'to', + default = None, + help = 'list of tag IDs that are to be used instead' + ), + make_option('--user-id', + action = 'store', + type = 'int', + dest = 'user_id', + default = None, + help = 'id of the user who will be marked as a performer of this operation' + ), + ) + + #@transaction.commit_manually + def handle(self, *args, **options): + """command handle function. retrieves tags by id + """ + try: + from_tag_ids = parse_tag_ids(options['from']) + to_tag_ids = parse_tag_ids(options['to']) + except: + raise CommandError('Tag IDs must be integer') + + in_both = from_tag_ids & to_tag_ids + if in_both: + tag_str = ', '.join([str(i) for i in in_both]) + if len(in_both) > 1: + error_message = 'Tags with IDs %s appear ' % tag_str + else: + error_message = 'Tag with ID %s appears ' % tag_str + raise CommandError(error_message + 'in both --from and --to sets') + + from_tags = get_tags_by_ids(from_tag_ids) + to_tags = get_tags_by_ids(to_tag_ids) + admin = get_admin(options['user_id']) + + questions = models.Question.objects.all() + for from_tag in from_tags: + questions = questions.filter(tags = from_tag) + + #print some feedback here and give a chance to bail out + question_count = questions.count() + if question_count == 0: + print """Did not find any matching questions, +you might want to run prune_unused_tags +or repost a bug, if that does not help""" + elif question_count == 1: + print "One question matches:" + elif question_count <= 10: + print "%d questions match:" % question_count + if question_count > 10: + print "%d questions match." % question_count + print "First 10 are:" + for question in questions[:10]: + print '* %s' % question.title.strip() + + from_tag_names = format_tag_name_list(from_tags) + to_tag_names = format_tag_name_list(to_tags) + + prompt = 'Rename tags %s --> %s?' % (from_tag_names, to_tag_names) + choice = console.choice_dialog(prompt, choices=('yes', 'no')) + if choice == 'no': + print 'Canceled' + sys.exit() + else: + sys.stdout.write('Processing:') + + #actual processing stage, only after this point we start to + #modify stuff in the database, one question per transaction + from_tag_names = get_tag_names(from_tags) + to_tag_names = get_tag_names(to_tags) + i = 0 + for question in questions: + tag_names = set(question.get_tag_names()) + tag_names.update(to_tag_names) + tag_names.difference_update(from_tag_names) + + admin.retag_question( + question = question, + tags = u' '.join(tag_names), + #silent = True #do we want to timestamp activity on question + ) + i += 1 + sys.stdout.write('%6.2f%%' % (100*float(i)/float(question_count))) + sys.stdout.write('\b'*7) + sys.stdout.flush() + + sys.stdout.write('\n') + #transaction.commit() + + #may need to run assertions on that there are + #print 'Searching for similar tags...', + #leftover_questions = models.Question.objects.filter( + # icontains = from_tag.name + # ) + #if leftover_questions.count() > 0: + # tag_strings = leftover_questions.values_list('tagnames', flat=True) + # similar_tags = get_similar_tags_from_strings( + # tag_strings, + # from_tag.name + # ) + # print '%d found:' % len(similar_tags), + # print '\n*'.join(sorted(list(similar_tags))) + #else: + # print "None found." + #print "Done." + #transaction.commit() diff --git a/askbot/models/__init__.py b/askbot/models/__init__.py index 7c84cf4a..4166bd82 100644 --- a/askbot/models/__init__.py +++ b/askbot/models/__init__.py @@ -697,12 +697,14 @@ def user_retag_question( question = None, tags = None, timestamp = None, + silent = False ): self.assert_can_retag_question(question) question.retag( retagged_by = self, retagged_at = timestamp, tagnames = tags, + silent = silent ) @auto_now_timestamp diff --git a/askbot/models/question.py b/askbot/models/question.py index 9ab91688..e4a0e799 100644 --- a/askbot/models/question.py +++ b/askbot/models/question.py @@ -497,15 +497,16 @@ class Question(content.Content, DeletableContent): recipients -= set(exclude_list) return recipients - def retag(self, retagged_by=None, retagged_at=None, tagnames=None): + def retag(self, retagged_by=None, retagged_at=None, tagnames=None, silent=False): if None in (retagged_by, retagged_at, tagnames): raise Exception('arguments retagged_at, retagged_by and tagnames are required') # Update the Question itself self.tagnames = tagnames - self.last_edited_at = retagged_at - self.last_activity_at = retagged_at - self.last_edited_by = retagged_by - self.last_activity_by = retagged_by + if silent == False: + self.last_edited_at = retagged_at + self.last_activity_at = retagged_at + self.last_edited_by = retagged_by + self.last_activity_by = retagged_by self.save() # Update the Question's tag associations @@ -596,6 +597,13 @@ class Question(content.Content, DeletableContent): """Creates a list of Tag names from the ``tagnames`` attribute.""" return self.tagnames.split(u' ') + def set_tag_names(self, tag_names): + """expects some iterable of unicode string tag names + joins the names with a space and assigns to self.tagnames + does not save the object + """ + self.tagnames = u' '.join(tag_names) + def tagname_meta_generator(self): return u','.join([unicode(tag) for tag in self.get_tag_names()]) diff --git a/askbot/utils/console.py b/askbot/utils/console.py new file mode 100644 index 00000000..0a83894c --- /dev/null +++ b/askbot/utils/console.py @@ -0,0 +1,23 @@ +"""functions that directly handle user input +""" +import time + +def choice_dialog(prompt_phrase, choices = None, invalid_phrase = None): + """prints a prompt, accepts keyboard input + and makes sure that user response is one of given + in the choices argument, which is required + and must be a list + + invalid_phrase must be a string with %(opt_string)s + placeholder + """ + assert(hasattr(choices, '__iter__')) + assert(not isinstance(choices, basestring)) + while 1: + response = raw_input('\n%s (type %s): ' % (prompt_phrase, '/'.join(choices))) + if response in choices: + return response + elif invalid_phrase != None: + opt_string = ','.join(choices) + print invalid_phrase % {'opt_string': opt_string} + time.sleep(1) |