allowed adding tag, user and title parameters in the search query

author: Evgeny Fadeev <evgeny.fadeev@gmail.com> 2011-07-01 04:22:35 -0400
committer: Evgeny Fadeev <evgeny.fadeev@gmail.com> 2011-07-01 04:22:35 -0400
commit: a18560e89a80f0c7a55aa00d2976023abc39870a (patch)
tree: 00a956f62e281be692c843da00a49ffdf2a66f2a
parent: 65f2791b29c8c9fe5eb2681c0d8ab97e34e87301 (diff)
download: askbot-a18560e89a80f0c7a55aa00d2976023abc39870a.tar.gz
askbot-a18560e89a80f0c7a55aa00d2976023abc39870a.tar.bz2
askbot-a18560e89a80f0c7a55aa00d2976023abc39870a.zip
4 files changed, 153 insertions, 8 deletions
diff --git a/askbot/models/question.py b/askbot/models/question.py
index 7a2be48c..1171cda2 100644
--- a/askbot/models/question.py
+++ b/askbot/models/question.py
@@ -189,12 +189,26 @@ class QuestionQuerySet(models.query.QuerySet):
                 qs = qs.filter(tags__name = tag)
 
         if search_query:
-            qs = qs.get_by_text_query(search_query)
-            #a patch for postgres search sort method
-            if askbot.conf.should_show_sort_by_relevance():
-                if sort_method == 'relevance-desc':
-                    qs= qs.extra(order_by = ['-relevance',])
-
+            if search_state.stripped_query:
+                qs = qs.get_by_text_query(search_state.stripped_query)
+                #a patch for postgres search sort method
+                if askbot.conf.should_show_sort_by_relevance():
+                    if sort_method == 'relevance-desc':
+                        qs = qs.extra(order_by = ['-relevance',])
+            if search_state.query_title:
+                qs = qs.filter(title__icontains = search_state.query_title)
+            if len(search_state.query_tags) > 0:
+                qs = qs.filter(tags__name__in = search_state.query_tags)
+            if len(search_state.query_users) > 0:
+                query_users = list()
+                for username in search_state.query_users:
+                    try:
+                        user = User.objects.get(username__iexact = username)
+                        query_users.append(user)
+                    except User.DoesNotExist:
+                        pass
+                if len(query_users) > 0:
+                    qs = qs.filter(author__in = query_users)
 
         #have to import this at run time, otherwise there
         #a circular import dependency...
diff --git a/askbot/search/state_manager.py b/askbot/search/state_manager.py
index 9392190a..d441e33b 100644
--- a/askbot/search/state_manager.py
+++ b/askbot/search/state_manager.py
@@ -1,10 +1,13 @@
 #search state manager object
 #that lives in the session and takes care of the state
 #persistece during the search session
+import re
+import copy
 import askbot
 import askbot.conf
 from askbot import const
 from askbot.conf import settings as askbot_settings
+from askbot.utils.functions import strip_plus
 import logging
 
 ACTIVE_COMMANDS = (
@@ -20,10 +23,82 @@ def some_in(what, where):
             return True
     return False
 
+def extract_matching_token(text, regexes):
+    """if text matches any of the regexes,
+    * the entire match is removed from text
+    * repeating spaces in the remaining string are replaced with one
+    * returned is a tuple of: first group from the regex, remaining text
+    """
+    for regex in regexes:
+        m = regex.search(text)
+        if m:
+            text = regex.sub('', text)
+            extracted_match = m.group(1)
+            return (strip_plus(extracted_match), strip_plus(text))
+    return ('', text.strip())
+
+def extract_all_matching_tokens(text, regexes):
+    """the same as the ``extract_matching_token``
+    but returns a tuple of: list of first group matches from the regexes
+    and the remains of the input text
+    """
+    matching_tokens = set()
+    for regex in regexes:
+        matches = regex.findall(text)
+        if len(matches) > 0:
+            text = regex.sub('', text)
+            matching_tokens.update([match.strip() for match in matches])
+    return ([strip_plus(token) for token in matching_tokens], strip_plus(text))
+
+
+def parse_query(query):
+    """takes hand-typed search query string as an argument
+    returns a dictionary with keys (and values in parens):
+    * stripped_query (query with the items below stripped)
+    * query_tags (list of tag names)
+    * query_users (list of user names, not validated)
+    * query_title (question title)
+    Note: the stripped_query is the actual string
+    against which global search will be performed
+    the original query will still all be shown in the search
+    query input box
+    """
+    title_re1 = re.compile(r'\[title:(.+?)\]')
+    title_re2 = re.compile(r'title:"([^"]+?)"')
+    title_re3 = re.compile(r"title:'([^']+?)'")
+    title_regexes = (title_re1, title_re2, title_re3)
+    (query_title, query) = extract_matching_token(query, title_regexes)
+
+    tag_re1 = re.compile(r'\[([^:]+?)\]')
+    tag_re2 = re.compile(r'\[tag:\s*([\S]+)\s*]')
+    tag_re3 = re.compile(r'#(\S+)')
+    tag_regexes = (tag_re1, tag_re2, tag_re3)
+    (query_tags, query) = extract_all_matching_tokens(query, tag_regexes)
+
+    user_re1 = re.compile(r'\[user:([^\]]+?)\]')
+    user_re2 = re.compile(r'user:"([^"]+?)"')
+    user_re3 = re.compile(r"user:'([^']+?)'")
+    user_re4 = re.compile(r"""@([^'"\s]+)""")
+    user_re5 = re.compile(r'@"([^"]+)"')
+    user_re6 = re.compile(r"@'([^']+)'")
+    user_regexes = (user_re1, user_re2, user_re3, user_re4, user_re5, user_re6)
+    (query_users, stripped_query) = extract_all_matching_tokens(query, user_regexes)
+
+    return {
+        'stripped_query': stripped_query,
+        'query_title': query_title,
+        'query_tags': query_tags,
+        'query_users': query_users
+    }
+
 class SearchState(object):
     def __init__(self):
         self.scope = const.DEFAULT_POST_SCOPE
         self.query = None
+        self.stripped_query = None
+        self.query_tags = []
+        self.query_users = []
+        self.query_title = None
         self.search = None
         self.tags = None
         self.author = None
@@ -151,7 +226,17 @@ class SearchState(object):
         self.update_value('author', input_dict)
 
         if 'query' in input_dict:
-            self.update_value('query', input_dict)
+            query_bits = parse_query(input_dict['query'])
+            tmp_input_dict = copy.deepcopy(input_dict)
+            tmp_input_dict.update(query_bits)
+            self.update_value('query', tmp_input_dict)#the original query
+            #pull out values of [title:xxx], [user:some one]
+            #[tag: sometag], title:'xxx', title:"xxx", @user, @'some user',
+            #and  #tag - (hash symbol to delineate the tag
+            self.update_value('stripped_query', tmp_input_dict)
+            self.update_value('query_tags', tmp_input_dict)
+            self.update_value('query_users', tmp_input_dict)
+            self.update_value('query_title', tmp_input_dict)
             self.sort = 'relevance-desc'
         elif 'search' in input_dict:
             #a case of use nulling search query by hand
diff --git a/askbot/tests/search_state_tests.py b/askbot/tests/search_state_tests.py
index b4b66a65..b944a2e4 100644
--- a/askbot/tests/search_state_tests.py
+++ b/askbot/tests/search_state_tests.py
@@ -1,6 +1,8 @@
+import re
+import unittest
 from django.test import TestCase
 from django.contrib.auth.models import AnonymousUser
-from askbot.search.state_manager import SearchState, ViewLog
+from askbot.search.state_manager import SearchState, ViewLog, parse_query
 from askbot import const
 
 DEFAULT_SORT = const.DEFAULT_POST_SORT_METHOD
@@ -65,3 +67,41 @@ class SearchStateTests(TestCase):
         self.assertEquals(self.state.sort, 'age-asc')
         self.update({})
         self.assertEquals(self.state.sort, DEFAULT_SORT)
+class ParseQueryTests(unittest.TestCase):
+    def test_extract_users(self):
+        text = '@anna haha @"maria fernanda" @\'diego maradona\' hehe [user:karl  marx] hoho  user:\' george bush  \''
+        parse_results = parse_query(text)
+        self.assertEquals(
+            sorted(parse_results['query_users']),
+            sorted(['anna', 'maria fernanda', 'diego maradona', 'karl marx', 'george bush'])
+        )
+        self.assertEquals(parse_results['stripped_query'], 'haha hehe hoho')
+
+    def test_extract_tags(self):
+        text = '#tag1 [tag: tag2] some text [tag3] query'
+        parse_results = parse_query(text)
+        self.assertEquals(set(parse_results['query_tags']), set(['tag1', 'tag2', 'tag3']))
+        self.assertEquals(parse_results['stripped_query'], 'some text query')
+
+    def test_extract_title1(self):
+        text = 'some text query [title: what is this?]'
+        parse_results = parse_query(text)
+        self.assertEquals(parse_results['query_title'], 'what is this?')
+        self.assertEquals(parse_results['stripped_query'], 'some text query')
+
+    def test_extract_title2(self):
+        text = 'some text query title:"what is this?"'
+        parse_results = parse_query(text)
+        self.assertEquals(parse_results['query_title'], 'what is this?')
+        self.assertEquals(parse_results['stripped_query'], 'some text query')
+
+    def test_extract_title3(self):
+        text = 'some text query title:\'what is this?\''
+        parse_results = parse_query(text)
+        self.assertEquals(parse_results['query_title'], 'what is this?')
+        self.assertEquals(parse_results['stripped_query'], 'some text query')
+
+    def test_negative_match(self):
+        text = 'some query text'
+        parse_results = parse_query(text)
+        self.assertEquals(parse_results['stripped_query'], 'some query text')
diff --git a/askbot/utils/functions.py b/askbot/utils/functions.py
index 7e5ccfc4..a56ed897 100644
--- a/askbot/utils/functions.py
+++ b/askbot/utils/functions.py
@@ -27,6 +27,12 @@ MOBILE_REGEX = re.compile(
 )
 
 
+def strip_plus(text):
+    """returns text with redundant spaces replaced with just one,
+    and stripped leading and the trailing spaces"""
+    return re.sub('\s+', ' ', text).strip()
+
+
 def not_a_robot_request(request):
 
     if 'HTTP_ACCEPT_LANGUAGE' not in request.META:
author	Evgeny Fadeev <evgeny.fadeev@gmail.com>	2011-07-01 04:22:35 -0400
committer	Evgeny Fadeev <evgeny.fadeev@gmail.com>	2011-07-01 04:22:35 -0400
commit	a18560e89a80f0c7a55aa00d2976023abc39870a (patch)
tree	00a956f62e281be692c843da00a49ffdf2a66f2a
parent	65f2791b29c8c9fe5eb2681c0d8ab97e34e87301 (diff)
download	askbot-a18560e89a80f0c7a55aa00d2976023abc39870a.tar.gz askbot-a18560e89a80f0c7a55aa00d2976023abc39870a.tar.bz2 askbot-a18560e89a80f0c7a55aa00d2976023abc39870a.zip