summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEvgeny Fadeev <evgeny.fadeev@gmail.com>2013-05-28 02:23:46 -0400
committerEvgeny Fadeev <evgeny.fadeev@gmail.com>2013-05-28 02:23:46 -0400
commit336c7e3ae202efad4fa10e242f66f0f35b6c1a07 (patch)
treeffd6c272a9cdb7bd05d9076222a1fd354d964b32
parent4a06eed283eb00558d78dc8725e0c10e14e448a4 (diff)
downloadaskbot-336c7e3ae202efad4fa10e242f66f0f35b6c1a07.tar.gz
askbot-336c7e3ae202efad4fa10e242f66f0f35b6c1a07.tar.bz2
askbot-336c7e3ae202efad4fa10e242f66f0f35b6c1a07.zip
added management command apply_hinted_tags
-rw-r--r--askbot/doc/source/changelog.rst1
-rw-r--r--askbot/doc/source/management-commands.rst5
-rw-r--r--askbot/management/commands/apply_hinted_tags.py58
-rw-r--r--askbot/models/question.py58
4 files changed, 122 insertions, 0 deletions
diff --git a/askbot/doc/source/changelog.rst b/askbot/doc/source/changelog.rst
index 85a7015e..48afb5ea 100644
--- a/askbot/doc/source/changelog.rst
+++ b/askbot/doc/source/changelog.rst
@@ -3,6 +3,7 @@ Changes in Askbot
Development version
-------------------
+* Added management command `apply_hinted_tags` to batch-apply tags from a list
* Added hovercard on the user's karma display in the header
* Added option to hide ad blocks from logged in users
* Applied Askbot templates to the settings control panel
diff --git a/askbot/doc/source/management-commands.rst b/askbot/doc/source/management-commands.rst
index cc5e952f..da93dcb5 100644
--- a/askbot/doc/source/management-commands.rst
+++ b/askbot/doc/source/management-commands.rst
@@ -25,6 +25,11 @@ The bulk of the management commands fall into this group and will probably be th
| `add_admin <user_id>` | Turn user into an administrator |
| | `<user_id>` is a numeric user id of the account |
+---------------------------------+-------------------------------------------------------------+
+| `apply_hinted_tags | Apply tags to all questions in batch given the list of tags |
+| --tag-names <file>` | provided with a file. The file must contain tags - |
+| | one per line. If many tags match - only the most frequent |
+| | will be selected. |
++---------------------------------+-------------------------------------------------------------+
| `remove_admin <user_id>` | Remove admin status from a user account - the opposite of |
| | the `add_admin` command |
+---------------------------------+-------------------------------------------------------------+
diff --git a/askbot/management/commands/apply_hinted_tags.py b/askbot/management/commands/apply_hinted_tags.py
new file mode 100644
index 00000000..94bf2383
--- /dev/null
+++ b/askbot/management/commands/apply_hinted_tags.py
@@ -0,0 +1,58 @@
+import datetime
+from django.core.management.base import BaseCommand
+from django.core.management.base import CommandError
+from optparse import make_option
+from askbot.utils.console import ProgressBar
+from askbot.models import Thread
+from askbot.models import User
+
+class Command(BaseCommand):
+ help = """Adds tags to questions. Tags should be given via a file
+ with one tag per line. The tags will be matched with the words
+ found in the question title. Then, most frequently used matching tags
+ will be applied. This command respects the maximum number of tags
+ allowed per question.
+ """
+ option_list = BaseCommand.option_list + (
+ make_option('--tags-file', '-t',
+ action = 'store',
+ type = 'str',
+ dest = 'tags_file',
+ default = None,
+ help = 'file containing tag names, one per line'
+ ),
+ )
+ def handle(self, *args, **kwargs):
+ """reads the tags file, parses it,
+ then applies tags to questions by matching them
+ with the question titles and content
+ """
+ if kwargs['tags_file'] is None:
+ raise CommandError('parameter --tags-file is required')
+ try:
+ tags_input = open(kwargs['tags_file']).read()
+ except IOError:
+ raise CommandError('file "%s" not found' % kwargs['tags_file'])
+
+ tags_list = map(lambda v: v.strip(), tags_input.split('\n'))
+
+ multiword_tags = list()
+ for tag in tags_list:
+ if ' ' in tag:
+ multiword_tags.append(tag)
+
+ if len(multiword_tags):
+ message = 'multiword tags tags not allowed, have: %s' % ', '.join(multiword_tags)
+ raise CommandError(message)
+
+ threads = Thread.objects.all()
+ count = threads.count()
+ message = 'Applying tags to questions'
+
+ user = User.objects.all().order_by('-id')[0]
+ now = datetime.datetime.now()
+
+ for thread in ProgressBar(threads.iterator(), count, message):
+ thread.apply_hinted_tags(
+ tags_list, user=user, timestamp=now, silent=True
+ )
diff --git a/askbot/models/question.py b/askbot/models/question.py
index 3dd9fc6b..70060eb2 100644
--- a/askbot/models/question.py
+++ b/askbot/models/question.py
@@ -602,6 +602,64 @@ class Thread(models.Model):
self._question_cache = Post.objects.get(post_type='question', thread=self)
return self._question_cache
+ def apply_hinted_tags(self, hints=None, user=None, timestamp=None, silent=False):
+ """match words in title and body with hints
+ and apply some of the hints as tags,
+ so that total number of tags in no more
+ than the maximum allowed number of tags"""
+
+ #1) see how many tags we're missing,
+ #if we don't need more we return
+ existing_tags = self.get_tag_names()
+ tags_count = len(existing_tags)
+ if tags_count >= askbot_settings.MAX_TAGS_PER_POST:
+ return
+
+ #2) get set of words from title and body
+ post_text = self.title + ' ' + self._question_post().text
+ post_text = post_text.lower()#normalize
+ post_words = set(post_text.split())
+
+ #3) get intersection set
+ #normalize hints and tags and remember the originals
+ orig_hints = dict()
+ for hint in hints:
+ orig_hints[hint.lower()] = hint
+
+ norm_hints = orig_hints.keys()
+ norm_tags = map(lambda v: v.lower(), existing_tags)
+
+ common_words = (set(norm_hints) & post_words) - set(norm_tags)
+
+ #4) for each common word count occurances in corpus
+ counts = dict()
+ for word in common_words:
+ counts[word] = sum(map(lambda w: w.lower() == word.lower(), post_words))
+
+ #5) sort words by count
+ sorted_words = sorted(
+ common_words,
+ lambda a, b: cmp(counts[b], counts[a])
+ )
+
+ #6) extract correct number of most frequently used tags
+ need_tags = askbot_settings.MAX_TAGS_PER_POST - len(existing_tags)
+ add_tags = sorted_words[0:need_tags]
+ add_tags = map(lambda h: orig_hints[h], add_tags)
+
+ tagnames = ' '.join(existing_tags + add_tags)
+
+ if askbot_settings.FORCE_LOWERCASE_TAGS:
+ tagnames = tagnames.lower()
+
+ self.retag(
+ retagged_by=user,
+ retagged_at=timestamp or datetime.datetime.now(),
+ tagnames =' '.join(existing_tags + add_tags),
+ silent=silent
+ )
+
+
def get_absolute_url(self):
return self._question_post().get_absolute_url(thread = self)
#question_id = self._question_post().id