1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
|
"""Search state manager object"""
import re
import urllib
import copy
from django.core import urlresolvers
from django.utils.http import urlencode
from django.utils.encoding import smart_str
import askbot
import askbot.conf
from askbot.conf import settings as askbot_settings
from askbot import const
from askbot.utils.functions import strip_plus
def extract_matching_token(text, regexes):
"""if text matches any of the regexes,
* the entire match is removed from text
* repeating spaces in the remaining string are replaced with one
* returned is a tuple of: first group from the regex, remaining text
"""
for regex in regexes:
m = regex.search(text)
if m:
text = regex.sub('', text)
extracted_match = m.group(1)
return (strip_plus(extracted_match), strip_plus(text))
return ('', text.strip())
def extract_all_matching_tokens(text, regexes):
"""the same as the ``extract_matching_token``
but returns a tuple of: list of first group matches from the regexes
and the remains of the input text
"""
matching_tokens = set()
for regex in regexes:
matches = regex.findall(text)
if len(matches) > 0:
text = regex.sub('', text)
matching_tokens.update([match.strip() for match in matches])
return ([strip_plus(token) for token in matching_tokens], strip_plus(text))
def parse_query(query):
"""takes hand-typed search query string as an argument
returns a dictionary with keys (and values in parens):
* stripped_query (query with the items below stripped)
* query_tags (list of tag names)
* query_users (list of user names, not validated)
* query_title (question title)
Note: the stripped_query is the actual string
against which global search will be performed
the original query will still all be shown in the search
query input box
"""
title_re1 = re.compile(r'\[title:(.+?)\]')
title_re2 = re.compile(r'title:"([^"]+?)"')
title_re3 = re.compile(r"title:'([^']+?)'")
title_regexes = (title_re1, title_re2, title_re3)
(query_title, query) = extract_matching_token(query, title_regexes)
tag_re1 = re.compile(r'\[([^:]+?)\]')
tag_re2 = re.compile(r'\[tag:\s*([\S]+)\s*]')
tag_re3 = re.compile(r'#(\S+)')
tag_regexes = (tag_re1, tag_re2, tag_re3)
(query_tags, query) = extract_all_matching_tokens(query, tag_regexes)
user_re1 = re.compile(r'\[user:([^\]]+?)\]')
user_re2 = re.compile(r'user:"([^"]+?)"')
user_re3 = re.compile(r"user:'([^']+?)'")
user_re4 = re.compile(r"""@([^'"\s]+)""")
user_re5 = re.compile(r'@"([^"]+)"')
user_re6 = re.compile(r"@'([^']+)'")
user_regexes = (user_re1, user_re2, user_re3, user_re4, user_re5, user_re6)
(query_users, stripped_query) = extract_all_matching_tokens(query, user_regexes)
return {
'stripped_query': stripped_query,
'query_title': query_title,
'query_tags': query_tags,
'query_users': query_users
}
class SearchState(object):
@classmethod
def get_empty(cls):
return cls(scope=None, sort=None, query=None, tags=None, author=None, page=None, user_logged_in=None)
def __init__(self, scope, sort, query, tags, author, page, user_logged_in):
# INFO: zip(*[('a', 1), ('b', 2)])[0] == ('a', 'b')
if (scope not in zip(*const.POST_SCOPE_LIST)[0]) or (scope == 'followed' and not user_logged_in):
if user_logged_in:
self.scope = askbot_settings.DEFAULT_SCOPE_AUTHENTICATED
else:
self.scope = askbot_settings.DEFAULT_SCOPE_ANONYMOUS
else:
self.scope = scope
self.query = query.strip() if query else None
if self.query:
#pull out values of [title:xxx], [user:some one]
#[tag: sometag], title:'xxx', title:"xxx", @user, @'some user',
#and #tag - (hash symbol to delineate the tag
query_bits = parse_query(self.query)
self.stripped_query = query_bits['stripped_query']
self.query_tags = query_bits['query_tags']
self.query_users = query_bits['query_users']
self.query_title = query_bits['query_title']
else:
self.stripped_query = None
self.query_tags = None
self.query_users = None
self.query_title = None
if (sort not in zip(*const.POST_SORT_METHODS)[0]) or (sort == 'relevance-desc' and (not self.query or not askbot.conf.should_show_sort_by_relevance())):
self.sort = const.DEFAULT_POST_SORT_METHOD
else:
self.sort = sort
#patch for empty stripped query, relevance sorting is useless then
if self.stripped_query in (None, '') and sort == 'relevance-desc':
self.sort = const.DEFAULT_POST_SORT_METHOD
self.tags = []
if tags:
for t in tags.split(const.TAG_SEP):
tag = t.strip()
if tag not in self.tags:
self.tags.append(tag)
self.author = int(author) if author else None
self.page = int(page) if page else 1
if self.page == 0: # in case someone likes jokes :)
self.page = 1
self._questions_url = urlresolvers.reverse('questions')
def __str__(self):
return self.query_string()
def full_url(self):
return self._questions_url + self.query_string()
def ask_query_string(self): # TODO: test me
"""returns string to prepopulate title field on the "Ask your question" page"""
ask_title = self.stripped_query or self.query or ''
if not ask_title:
return ''
return '?' + urlencode({'title': ask_title})
def full_ask_url(self):
return urlresolvers.reverse('ask') + self.ask_query_string()
def unified_tags(self):
"Returns tags both from tag selector and extracted from query"
return (self.query_tags or []) + (self.tags or [])
#
# Safe characters in urlquote() according to http://www.ietf.org/rfc/rfc1738.txt:
#
# Thus, only alphanumerics, the special characters "$-_.+!*'(),", and
# reserved characters used for their reserved purposes may be used
# unencoded within a URL.
#
# Tag separator (const.TAG_SEP) remains unencoded to clearly mark tag boundaries
# _+.- stay unencoded to keep tags in URL as verbose as possible
# (note that urllib.quote() in Python 2.7 treats _.- as safe chars, but let's be explicit)
# Hash (#) is not safe and has to be encodeded, as it's used as URL has delimiter
#
SAFE_CHARS = const.TAG_SEP + '_+.-'
def query_string(self):
"""returns part of the url to the main page,
responsible to display the full text search results,
taking into account sort method, selected scope
and search tags"""
lst = [
'scope:' + self.scope,
'sort:' + self.sort
]
if self.query:
lst.append('query:' + urllib.quote(smart_str(self.query), safe=self.SAFE_CHARS))
if self.tags:
lst.append('tags:' + urllib.quote(smart_str(const.TAG_SEP.join(self.tags)), safe=self.SAFE_CHARS))
if self.author:
lst.append('author:' + str(self.author))
if self.page:
lst.append('page:' + str(self.page))
return '/'.join(lst) + '/'
def deepcopy(self): # TODO: test me
"Used to contruct a new SearchState for manipulation, e.g. for adding/removing tags"
ss = copy.copy(self) #SearchState.get_empty()
#ss.scope = self.scope
#ss.sort = self.sort
#ss.query = self.query
if ss.tags is not None: # it's important to test against None, because empty lists should also be cloned!
ss.tags = ss.tags[:] # create a copy
#ss.author = self.author
#ss.page = self.page
#ss.stripped_query = self.stripped_query
if ss.query_tags: # Here we don't have empty lists, only None
ss.query_tags = ss.query_tags[:]
if ss.query_users:
ss.query_users = ss.query_users[:]
#ss.query_title = self.query_title
#ss._questions_url = self._questions_url
return ss
def add_tag(self, tag):
ss = self.deepcopy()
if tag not in ss.tags:
ss.tags.append(tag)
ss.page = 1 # state change causes page reset
return ss
def remove_author(self):
ss = self.deepcopy()
ss.author = None
ss.page = 1
return ss
def remove_tags(self, tags = None):
ss = self.deepcopy()
if tags:
ss.tags = list(
set(ss.tags) - set(tags)
)
else:
ss.tags = []
ss.page = 1
return ss
def change_scope(self, new_scope):
ss = self.deepcopy()
ss.scope = new_scope
ss.page = 1
return ss
def change_sort(self, new_sort):
ss = self.deepcopy()
ss.sort = new_sort
ss.page = 1
return ss
def change_page(self, new_page):
ss = self.deepcopy()
ss.page = new_page
return ss
class DummySearchState(object): # Used for caching question/thread summaries
def add_tag(self, tag):
self.tag = tag
return self
def change_scope(self, new_scope):
return self
def full_url(self):
return '<<<%s>>>' % self.tag
|