From a18560e89a80f0c7a55aa00d2976023abc39870a Mon Sep 17 00:00:00 2001 From: Evgeny Fadeev Date: Fri, 1 Jul 2011 04:22:35 -0400 Subject: allowed adding tag, user and title parameters in the search query --- askbot/models/question.py | 26 +++++++++--- askbot/search/state_manager.py | 87 +++++++++++++++++++++++++++++++++++++- askbot/tests/search_state_tests.py | 42 +++++++++++++++++- askbot/utils/functions.py | 6 +++ 4 files changed, 153 insertions(+), 8 deletions(-) diff --git a/askbot/models/question.py b/askbot/models/question.py index 7a2be48c..1171cda2 100644 --- a/askbot/models/question.py +++ b/askbot/models/question.py @@ -189,12 +189,26 @@ class QuestionQuerySet(models.query.QuerySet): qs = qs.filter(tags__name = tag) if search_query: - qs = qs.get_by_text_query(search_query) - #a patch for postgres search sort method - if askbot.conf.should_show_sort_by_relevance(): - if sort_method == 'relevance-desc': - qs= qs.extra(order_by = ['-relevance',]) - + if search_state.stripped_query: + qs = qs.get_by_text_query(search_state.stripped_query) + #a patch for postgres search sort method + if askbot.conf.should_show_sort_by_relevance(): + if sort_method == 'relevance-desc': + qs = qs.extra(order_by = ['-relevance',]) + if search_state.query_title: + qs = qs.filter(title__icontains = search_state.query_title) + if len(search_state.query_tags) > 0: + qs = qs.filter(tags__name__in = search_state.query_tags) + if len(search_state.query_users) > 0: + query_users = list() + for username in search_state.query_users: + try: + user = User.objects.get(username__iexact = username) + query_users.append(user) + except User.DoesNotExist: + pass + if len(query_users) > 0: + qs = qs.filter(author__in = query_users) #have to import this at run time, otherwise there #a circular import dependency... diff --git a/askbot/search/state_manager.py b/askbot/search/state_manager.py index 9392190a..d441e33b 100644 --- a/askbot/search/state_manager.py +++ b/askbot/search/state_manager.py @@ -1,10 +1,13 @@ #search state manager object #that lives in the session and takes care of the state #persistece during the search session +import re +import copy import askbot import askbot.conf from askbot import const from askbot.conf import settings as askbot_settings +from askbot.utils.functions import strip_plus import logging ACTIVE_COMMANDS = ( @@ -20,10 +23,82 @@ def some_in(what, where): return True return False +def extract_matching_token(text, regexes): + """if text matches any of the regexes, + * the entire match is removed from text + * repeating spaces in the remaining string are replaced with one + * returned is a tuple of: first group from the regex, remaining text + """ + for regex in regexes: + m = regex.search(text) + if m: + text = regex.sub('', text) + extracted_match = m.group(1) + return (strip_plus(extracted_match), strip_plus(text)) + return ('', text.strip()) + +def extract_all_matching_tokens(text, regexes): + """the same as the ``extract_matching_token`` + but returns a tuple of: list of first group matches from the regexes + and the remains of the input text + """ + matching_tokens = set() + for regex in regexes: + matches = regex.findall(text) + if len(matches) > 0: + text = regex.sub('', text) + matching_tokens.update([match.strip() for match in matches]) + return ([strip_plus(token) for token in matching_tokens], strip_plus(text)) + + +def parse_query(query): + """takes hand-typed search query string as an argument + returns a dictionary with keys (and values in parens): + * stripped_query (query with the items below stripped) + * query_tags (list of tag names) + * query_users (list of user names, not validated) + * query_title (question title) + Note: the stripped_query is the actual string + against which global search will be performed + the original query will still all be shown in the search + query input box + """ + title_re1 = re.compile(r'\[title:(.+?)\]') + title_re2 = re.compile(r'title:"([^"]+?)"') + title_re3 = re.compile(r"title:'([^']+?)'") + title_regexes = (title_re1, title_re2, title_re3) + (query_title, query) = extract_matching_token(query, title_regexes) + + tag_re1 = re.compile(r'\[([^:]+?)\]') + tag_re2 = re.compile(r'\[tag:\s*([\S]+)\s*]') + tag_re3 = re.compile(r'#(\S+)') + tag_regexes = (tag_re1, tag_re2, tag_re3) + (query_tags, query) = extract_all_matching_tokens(query, tag_regexes) + + user_re1 = re.compile(r'\[user:([^\]]+?)\]') + user_re2 = re.compile(r'user:"([^"]+?)"') + user_re3 = re.compile(r"user:'([^']+?)'") + user_re4 = re.compile(r"""@([^'"\s]+)""") + user_re5 = re.compile(r'@"([^"]+)"') + user_re6 = re.compile(r"@'([^']+)'") + user_regexes = (user_re1, user_re2, user_re3, user_re4, user_re5, user_re6) + (query_users, stripped_query) = extract_all_matching_tokens(query, user_regexes) + + return { + 'stripped_query': stripped_query, + 'query_title': query_title, + 'query_tags': query_tags, + 'query_users': query_users + } + class SearchState(object): def __init__(self): self.scope = const.DEFAULT_POST_SCOPE self.query = None + self.stripped_query = None + self.query_tags = [] + self.query_users = [] + self.query_title = None self.search = None self.tags = None self.author = None @@ -151,7 +226,17 @@ class SearchState(object): self.update_value('author', input_dict) if 'query' in input_dict: - self.update_value('query', input_dict) + query_bits = parse_query(input_dict['query']) + tmp_input_dict = copy.deepcopy(input_dict) + tmp_input_dict.update(query_bits) + self.update_value('query', tmp_input_dict)#the original query + #pull out values of [title:xxx], [user:some one] + #[tag: sometag], title:'xxx', title:"xxx", @user, @'some user', + #and #tag - (hash symbol to delineate the tag + self.update_value('stripped_query', tmp_input_dict) + self.update_value('query_tags', tmp_input_dict) + self.update_value('query_users', tmp_input_dict) + self.update_value('query_title', tmp_input_dict) self.sort = 'relevance-desc' elif 'search' in input_dict: #a case of use nulling search query by hand diff --git a/askbot/tests/search_state_tests.py b/askbot/tests/search_state_tests.py index b4b66a65..b944a2e4 100644 --- a/askbot/tests/search_state_tests.py +++ b/askbot/tests/search_state_tests.py @@ -1,6 +1,8 @@ +import re +import unittest from django.test import TestCase from django.contrib.auth.models import AnonymousUser -from askbot.search.state_manager import SearchState, ViewLog +from askbot.search.state_manager import SearchState, ViewLog, parse_query from askbot import const DEFAULT_SORT = const.DEFAULT_POST_SORT_METHOD @@ -65,3 +67,41 @@ class SearchStateTests(TestCase): self.assertEquals(self.state.sort, 'age-asc') self.update({}) self.assertEquals(self.state.sort, DEFAULT_SORT) +class ParseQueryTests(unittest.TestCase): + def test_extract_users(self): + text = '@anna haha @"maria fernanda" @\'diego maradona\' hehe [user:karl marx] hoho user:\' george bush \'' + parse_results = parse_query(text) + self.assertEquals( + sorted(parse_results['query_users']), + sorted(['anna', 'maria fernanda', 'diego maradona', 'karl marx', 'george bush']) + ) + self.assertEquals(parse_results['stripped_query'], 'haha hehe hoho') + + def test_extract_tags(self): + text = '#tag1 [tag: tag2] some text [tag3] query' + parse_results = parse_query(text) + self.assertEquals(set(parse_results['query_tags']), set(['tag1', 'tag2', 'tag3'])) + self.assertEquals(parse_results['stripped_query'], 'some text query') + + def test_extract_title1(self): + text = 'some text query [title: what is this?]' + parse_results = parse_query(text) + self.assertEquals(parse_results['query_title'], 'what is this?') + self.assertEquals(parse_results['stripped_query'], 'some text query') + + def test_extract_title2(self): + text = 'some text query title:"what is this?"' + parse_results = parse_query(text) + self.assertEquals(parse_results['query_title'], 'what is this?') + self.assertEquals(parse_results['stripped_query'], 'some text query') + + def test_extract_title3(self): + text = 'some text query title:\'what is this?\'' + parse_results = parse_query(text) + self.assertEquals(parse_results['query_title'], 'what is this?') + self.assertEquals(parse_results['stripped_query'], 'some text query') + + def test_negative_match(self): + text = 'some query text' + parse_results = parse_query(text) + self.assertEquals(parse_results['stripped_query'], 'some query text') diff --git a/askbot/utils/functions.py b/askbot/utils/functions.py index 7e5ccfc4..a56ed897 100644 --- a/askbot/utils/functions.py +++ b/askbot/utils/functions.py @@ -27,6 +27,12 @@ MOBILE_REGEX = re.compile( ) +def strip_plus(text): + """returns text with redundant spaces replaced with just one, + and stripped leading and the trailing spaces""" + return re.sub('\s+', ' ', text).strip() + + def not_a_robot_request(request): if 'HTTP_ACCEPT_LANGUAGE' not in request.META: -- cgit v1.2.3-1-g7c22