From 336c7e3ae202efad4fa10e242f66f0f35b6c1a07 Mon Sep 17 00:00:00 2001 From: Evgeny Fadeev Date: Tue, 28 May 2013 02:23:46 -0400 Subject: added management command apply_hinted_tags --- askbot/models/question.py | 58 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) (limited to 'askbot/models/question.py') diff --git a/askbot/models/question.py b/askbot/models/question.py index 3dd9fc6b..70060eb2 100644 --- a/askbot/models/question.py +++ b/askbot/models/question.py @@ -602,6 +602,64 @@ class Thread(models.Model): self._question_cache = Post.objects.get(post_type='question', thread=self) return self._question_cache + def apply_hinted_tags(self, hints=None, user=None, timestamp=None, silent=False): + """match words in title and body with hints + and apply some of the hints as tags, + so that total number of tags in no more + than the maximum allowed number of tags""" + + #1) see how many tags we're missing, + #if we don't need more we return + existing_tags = self.get_tag_names() + tags_count = len(existing_tags) + if tags_count >= askbot_settings.MAX_TAGS_PER_POST: + return + + #2) get set of words from title and body + post_text = self.title + ' ' + self._question_post().text + post_text = post_text.lower()#normalize + post_words = set(post_text.split()) + + #3) get intersection set + #normalize hints and tags and remember the originals + orig_hints = dict() + for hint in hints: + orig_hints[hint.lower()] = hint + + norm_hints = orig_hints.keys() + norm_tags = map(lambda v: v.lower(), existing_tags) + + common_words = (set(norm_hints) & post_words) - set(norm_tags) + + #4) for each common word count occurances in corpus + counts = dict() + for word in common_words: + counts[word] = sum(map(lambda w: w.lower() == word.lower(), post_words)) + + #5) sort words by count + sorted_words = sorted( + common_words, + lambda a, b: cmp(counts[b], counts[a]) + ) + + #6) extract correct number of most frequently used tags + need_tags = askbot_settings.MAX_TAGS_PER_POST - len(existing_tags) + add_tags = sorted_words[0:need_tags] + add_tags = map(lambda h: orig_hints[h], add_tags) + + tagnames = ' '.join(existing_tags + add_tags) + + if askbot_settings.FORCE_LOWERCASE_TAGS: + tagnames = tagnames.lower() + + self.retag( + retagged_by=user, + retagged_at=timestamp or datetime.datetime.now(), + tagnames =' '.join(existing_tags + add_tags), + silent=silent + ) + + def get_absolute_url(self): return self._question_post().get_absolute_url(thread = self) #question_id = self._question_post().id -- cgit v1.2.3-1-g7c22