diff options
author | Evgeny Fadeev <evgeny.fadeev@gmail.com> | 2010-04-25 17:15:26 -0400 |
---|---|---|
committer | Evgeny Fadeev <evgeny.fadeev@gmail.com> | 2010-04-25 17:15:26 -0400 |
commit | cc8337da9046bff5243672e20f1dea9c18b00da6 (patch) | |
tree | 77ade69869105f1838df5e8616bb994844507cec /forum/search | |
parent | 3122fb8a2599944e623c8e21f285a9e4dd9e132a (diff) | |
parent | 02510a462392dd2e9e46e945d51efb374e0dc06f (diff) | |
download | askbot-cc8337da9046bff5243672e20f1dea9c18b00da6.tar.gz askbot-cc8337da9046bff5243672e20f1dea9c18b00da6.tar.bz2 askbot-cc8337da9046bff5243672e20f1dea9c18b00da6.zip |
merged newer ui branch to master
Diffstat (limited to 'forum/search')
-rw-r--r-- | forum/search/README | 5 | ||||
-rw-r--r-- | forum/search/__init__.py | 0 | ||||
-rw-r--r-- | forum/search/indexer.py | 9 | ||||
-rw-r--r-- | forum/search/sphinx/README | 4 | ||||
-rw-r--r-- | forum/search/sphinx/sphinx.conf | 127 | ||||
-rw-r--r-- | forum/search/state_manager.py | 152 |
6 files changed, 297 insertions, 0 deletions
diff --git a/forum/search/README b/forum/search/README new file mode 100644 index 00000000..c15dc221 --- /dev/null +++ b/forum/search/README @@ -0,0 +1,5 @@ +module dealing with search functions +at this time only question and answer search + +that among other things contains +available full text search implementations diff --git a/forum/search/__init__.py b/forum/search/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/forum/search/__init__.py diff --git a/forum/search/indexer.py b/forum/search/indexer.py new file mode 100644 index 00000000..c7c45c59 --- /dev/null +++ b/forum/search/indexer.py @@ -0,0 +1,9 @@ +from django.conf import settings +from django.db import connection + +def create_fulltext_indexes(): + if settings.DATABASE_ENGINE == 'mysql': + cursor = connection.cursor() + cursor.execute('ALTER TABLE question ADD FULLTEXT (title, text, tagnames)') + cursor.execute('ALTER TABLE answer ADD FULLTEXT (title, text, tagnames)') + diff --git a/forum/search/sphinx/README b/forum/search/sphinx/README new file mode 100644 index 00000000..8c008a23 --- /dev/null +++ b/forum/search/sphinx/README @@ -0,0 +1,4 @@ +This directory contains sample configuration for sphinx search + +Sphinx is a full text search engine for MySQL (only) with full +word stemming in English and Russion (other languages are not supported) diff --git a/forum/search/sphinx/sphinx.conf b/forum/search/sphinx/sphinx.conf new file mode 100644 index 00000000..bf4bdc8b --- /dev/null +++ b/forum/search/sphinx/sphinx.conf @@ -0,0 +1,127 @@ +#if you have many posts, it's best to configure another index for new posts and +#periodically merge the diff index to the main +#this is not important until you get to hundreds of thousands posts + +source src_cnprog +{ + # data source + type = mysql + sql_host = localhost + sql_user = cnprog #replace with your db username + sql_pass = secret #replace with your db password + sql_db = cnprog #replace with your db name + # these two are optional + #sql_port = 3306 + #sql_sock = /var/lib/mysql/mysql.sock + + # pre-query, executed before the main fetch query + sql_query_pre = SET NAMES utf8 + + # main document fetch query - change the table names if you are using a prefix + # this query creates a flat document from each question that includes only latest + # revisions of the question and all of it's answers + sql_query = SELECT q.id as id, q.title AS title, q.tagnames as tags, qr.text AS text, answers_combined.text AS answers \ + FROM question AS q \ + INNER JOIN \ + ( \ + SELECT MAX(id) as id, question_id \ + FROM question_revision \ + GROUP BY question_id \ + ) \ + AS mqr \ + ON q.id=mqr.question_id \ + INNER JOIN question_revision AS qr ON qr.id=mqr.id \ + LEFT JOIN \ + ( \ + SELECT GROUP_CONCAT(answer_current.text SEPARATOR '. ') AS text, \ + question_id \ + FROM \ + ( \ + SELECT a.question_id as question_id, ar.text as text \ + FROM answer AS a \ + INNER JOIN \ + ( \ + SELECT MAX(id) as id, answer_id \ + FROM answer_revision \ + GROUP BY answer_id \ + ) \ + AS mar \ + ON mar.answer_id = a.id \ + INNER JOIN answer_revision AS ar ON ar.id=mar.id \ + WHERE a.deleted=0 \ + ) \ + AS answer_current \ + GROUP BY question_id \ + ) \ + AS answers_combined ON q.id=answers_combined.question_id \ + WHERE q.deleted=0; + + # optional - used by command-line search utility to display document information + sql_query_info = SELECT title, id FROM question WHERE id=$id +} + +index cnprog { + # which document source to index + source = src_cnprog + + # this is path and index file name without extension + # you may need to change this path or create this folder + path = /var/data/sphinx/cnprog_main + + # docinfo (ie. per-document attribute values) storage strategy + docinfo = extern + + # morphology + morphology = stem_en + + # stopwords file + #stopwords = /var/data/sphinx/stopwords.txt + + # minimum word length + min_word_len = 1 + + # uncomment next 2 lines to allow wildcard (*) searches + #min_infix_len = 1 + #enable_star = 1 + + # charset encoding type + charset_type = utf-8 +} + +# indexer settings +indexer +{ + # memory limit (default is 32M) + mem_limit = 64M +} + +# searchd settings +searchd +{ + # IP address on which search daemon will bind and accept + # optional, default is to listen on all addresses, + # ie. address = 0.0.0.0 + address = 127.0.0.1 + + # port on which search daemon will listen + port = 3312 + + # searchd run info is logged here - create or change the folder + log = /var/log/sphinx/searchd.log + + # all the search queries are logged here + query_log = /var/log/sphinx/query.log + + # client read timeout, seconds + read_timeout = 5 + + # maximum amount of children to fork + max_children = 30 + + # a file which will contain searchd process ID + pid_file = /var/log/sphinx/searchd.pid + + # maximum amount of matches this daemon would ever retrieve + # from each index and serve to client + max_matches = 1000 +} diff --git a/forum/search/state_manager.py b/forum/search/state_manager.py new file mode 100644 index 00000000..cb1908c6 --- /dev/null +++ b/forum/search/state_manager.py @@ -0,0 +1,152 @@ +#search state manager object +#that lives in the session and takes care of the state +#persistece during the search session +from forum import const +import logging + +ACTIVE_COMMANDS = ( + 'sort', 'search', 'query', + 'reset_query', 'reset_author', 'reset_tags', + 'tags', 'scope', 'page_size', 'start_over', + 'page' +) + +def some_in(what, where): + for element in what: + if element in where: + return True + return False + +class SearchState(object): + def __init__(self): + self.scope= const.DEFAULT_POST_SCOPE + self.query = None + self.tags = None + self.author = None + self.sort = const.DEFAULT_POST_SORT_METHOD + self.page_size = const.DEFAULT_QUESTIONS_PAGE_SIZE + self.page = 1 + self.logged_in = False + logging.debug('new search state initialized') + + def __str__(self): + out = 'scope=%s\n' % self.scope + out += 'query=%s\n' % self.query + if self.tags: + out += 'tags=%s\n' % ','.join(self.tags) + out += 'author=%s\n' % self.author + out += 'sort=%s\n' % self.sort + out += 'page_size=%d\n' % self.page_size + out += 'page=%d\n' % self.page + out += 'logged_in=%s\n' % str(self.logged_in) + return out + + def set_logged_out(self): + if self.scope == 'favorite': + self.scope = None + self.logged_in = False + + def set_logged_in(self): + self.logged_in = True + + def reset(self): + #re-initialize, but keep login state + is_logged_in = self.logged_in + self.__init__() + self.logged_in = is_logged_in + + def update_value(self, key, store): + if key in store: + old_value = getattr(self, key) + new_value = store[key] + if new_value != old_value: + setattr(self, key, new_value) + self.reset_page() + + def relax_stickiness(self, input, view_log): + if view_log.get_previous(1) == 'questions': + if not some_in(ACTIVE_COMMANDS, input): + self.reset() + #todo also relax if 'all' scope was clicked twice + + def update_from_user_input(self,input,raw_input = {}): + #todo: this function will probably not + #fit the case of multiple parameters entered at the same tiem + if 'start_over' in input: + self.reset() + + if 'page' in input: + self.page = input['page'] + #special case - on page flip no other input is accepted + return + + if 'page_size' in input: + self.update_value('page_size',input) + self.reset_page()#todo may be smarter here - start with ~same q + #same as with page - return right away + return + + if 'scope' in input: + if input['scope'] == 'favorite' and self.logged_in == False: + self.reset_scope() + else: + self.update_value('scope',input) + + if 'tags' in input: + if self.tags: + old_tags = self.tags.copy() + self.tags = self.tags.union(input['tags']) + if self.tags != old_tags: + self.reset_page() + else: + self.tags = input['tags'] + + #all resets just return + if 'reset_tags' in input: + if self.tags: + self.tags = None + self.reset_page() + return + + #todo: handle case of deleting tags one-by-one + if 'reset_author' in input: + if self.author: + self.author = None + self.reset_page() + return + + if 'reset_query' in input: + self.reset_query() + return + + self.update_value('author',input) + + if 'query' in input: + self.update_value('query',input) + self.sort = 'relevant' + elif 'search' in raw_input:#a case of use nulling search query by hand + self.reset_query() + return + + + if 'sort' in input: + if input['sort'] == 'relevant' and self.query is None: + self.reset_sort() + else: + self.update_value('sort',input) + + def reset_page(self): + self.page = 1 + + def reset_query(self): + if self.query: + self.query = None + self.reset_page() + if self.sort == 'relevant': + self.reset_sort() + + def reset_sort(self): + self.sort = const.DEFAULT_POST_SORT_METHOD + + def reset_scope(self): + self.scope = const.DEFAULT_POST_SCOPE |