summaryrefslogtreecommitdiffstats
path: root/forum/search
diff options
context:
space:
mode:
authorEvgeny Fadeev <evgeny.fadeev@gmail.com>2010-04-25 17:15:26 -0400
committerEvgeny Fadeev <evgeny.fadeev@gmail.com>2010-04-25 17:15:26 -0400
commitcc8337da9046bff5243672e20f1dea9c18b00da6 (patch)
tree77ade69869105f1838df5e8616bb994844507cec /forum/search
parent3122fb8a2599944e623c8e21f285a9e4dd9e132a (diff)
parent02510a462392dd2e9e46e945d51efb374e0dc06f (diff)
downloadaskbot-cc8337da9046bff5243672e20f1dea9c18b00da6.tar.gz
askbot-cc8337da9046bff5243672e20f1dea9c18b00da6.tar.bz2
askbot-cc8337da9046bff5243672e20f1dea9c18b00da6.zip
merged newer ui branch to master
Diffstat (limited to 'forum/search')
-rw-r--r--forum/search/README5
-rw-r--r--forum/search/__init__.py0
-rw-r--r--forum/search/indexer.py9
-rw-r--r--forum/search/sphinx/README4
-rw-r--r--forum/search/sphinx/sphinx.conf127
-rw-r--r--forum/search/state_manager.py152
6 files changed, 297 insertions, 0 deletions
diff --git a/forum/search/README b/forum/search/README
new file mode 100644
index 00000000..c15dc221
--- /dev/null
+++ b/forum/search/README
@@ -0,0 +1,5 @@
+module dealing with search functions
+at this time only question and answer search
+
+that among other things contains
+available full text search implementations
diff --git a/forum/search/__init__.py b/forum/search/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/forum/search/__init__.py
diff --git a/forum/search/indexer.py b/forum/search/indexer.py
new file mode 100644
index 00000000..c7c45c59
--- /dev/null
+++ b/forum/search/indexer.py
@@ -0,0 +1,9 @@
+from django.conf import settings
+from django.db import connection
+
+def create_fulltext_indexes():
+ if settings.DATABASE_ENGINE == 'mysql':
+ cursor = connection.cursor()
+ cursor.execute('ALTER TABLE question ADD FULLTEXT (title, text, tagnames)')
+ cursor.execute('ALTER TABLE answer ADD FULLTEXT (title, text, tagnames)')
+
diff --git a/forum/search/sphinx/README b/forum/search/sphinx/README
new file mode 100644
index 00000000..8c008a23
--- /dev/null
+++ b/forum/search/sphinx/README
@@ -0,0 +1,4 @@
+This directory contains sample configuration for sphinx search
+
+Sphinx is a full text search engine for MySQL (only) with full
+word stemming in English and Russion (other languages are not supported)
diff --git a/forum/search/sphinx/sphinx.conf b/forum/search/sphinx/sphinx.conf
new file mode 100644
index 00000000..bf4bdc8b
--- /dev/null
+++ b/forum/search/sphinx/sphinx.conf
@@ -0,0 +1,127 @@
+#if you have many posts, it's best to configure another index for new posts and
+#periodically merge the diff index to the main
+#this is not important until you get to hundreds of thousands posts
+
+source src_cnprog
+{
+ # data source
+ type = mysql
+ sql_host = localhost
+ sql_user = cnprog #replace with your db username
+ sql_pass = secret #replace with your db password
+ sql_db = cnprog #replace with your db name
+ # these two are optional
+ #sql_port = 3306
+ #sql_sock = /var/lib/mysql/mysql.sock
+
+ # pre-query, executed before the main fetch query
+ sql_query_pre = SET NAMES utf8
+
+ # main document fetch query - change the table names if you are using a prefix
+ # this query creates a flat document from each question that includes only latest
+ # revisions of the question and all of it's answers
+ sql_query = SELECT q.id as id, q.title AS title, q.tagnames as tags, qr.text AS text, answers_combined.text AS answers \
+ FROM question AS q \
+ INNER JOIN \
+ ( \
+ SELECT MAX(id) as id, question_id \
+ FROM question_revision \
+ GROUP BY question_id \
+ ) \
+ AS mqr \
+ ON q.id=mqr.question_id \
+ INNER JOIN question_revision AS qr ON qr.id=mqr.id \
+ LEFT JOIN \
+ ( \
+ SELECT GROUP_CONCAT(answer_current.text SEPARATOR '. ') AS text, \
+ question_id \
+ FROM \
+ ( \
+ SELECT a.question_id as question_id, ar.text as text \
+ FROM answer AS a \
+ INNER JOIN \
+ ( \
+ SELECT MAX(id) as id, answer_id \
+ FROM answer_revision \
+ GROUP BY answer_id \
+ ) \
+ AS mar \
+ ON mar.answer_id = a.id \
+ INNER JOIN answer_revision AS ar ON ar.id=mar.id \
+ WHERE a.deleted=0 \
+ ) \
+ AS answer_current \
+ GROUP BY question_id \
+ ) \
+ AS answers_combined ON q.id=answers_combined.question_id \
+ WHERE q.deleted=0;
+
+ # optional - used by command-line search utility to display document information
+ sql_query_info = SELECT title, id FROM question WHERE id=$id
+}
+
+index cnprog {
+ # which document source to index
+ source = src_cnprog
+
+ # this is path and index file name without extension
+ # you may need to change this path or create this folder
+ path = /var/data/sphinx/cnprog_main
+
+ # docinfo (ie. per-document attribute values) storage strategy
+ docinfo = extern
+
+ # morphology
+ morphology = stem_en
+
+ # stopwords file
+ #stopwords = /var/data/sphinx/stopwords.txt
+
+ # minimum word length
+ min_word_len = 1
+
+ # uncomment next 2 lines to allow wildcard (*) searches
+ #min_infix_len = 1
+ #enable_star = 1
+
+ # charset encoding type
+ charset_type = utf-8
+}
+
+# indexer settings
+indexer
+{
+ # memory limit (default is 32M)
+ mem_limit = 64M
+}
+
+# searchd settings
+searchd
+{
+ # IP address on which search daemon will bind and accept
+ # optional, default is to listen on all addresses,
+ # ie. address = 0.0.0.0
+ address = 127.0.0.1
+
+ # port on which search daemon will listen
+ port = 3312
+
+ # searchd run info is logged here - create or change the folder
+ log = /var/log/sphinx/searchd.log
+
+ # all the search queries are logged here
+ query_log = /var/log/sphinx/query.log
+
+ # client read timeout, seconds
+ read_timeout = 5
+
+ # maximum amount of children to fork
+ max_children = 30
+
+ # a file which will contain searchd process ID
+ pid_file = /var/log/sphinx/searchd.pid
+
+ # maximum amount of matches this daemon would ever retrieve
+ # from each index and serve to client
+ max_matches = 1000
+}
diff --git a/forum/search/state_manager.py b/forum/search/state_manager.py
new file mode 100644
index 00000000..cb1908c6
--- /dev/null
+++ b/forum/search/state_manager.py
@@ -0,0 +1,152 @@
+#search state manager object
+#that lives in the session and takes care of the state
+#persistece during the search session
+from forum import const
+import logging
+
+ACTIVE_COMMANDS = (
+ 'sort', 'search', 'query',
+ 'reset_query', 'reset_author', 'reset_tags',
+ 'tags', 'scope', 'page_size', 'start_over',
+ 'page'
+)
+
+def some_in(what, where):
+ for element in what:
+ if element in where:
+ return True
+ return False
+
+class SearchState(object):
+ def __init__(self):
+ self.scope= const.DEFAULT_POST_SCOPE
+ self.query = None
+ self.tags = None
+ self.author = None
+ self.sort = const.DEFAULT_POST_SORT_METHOD
+ self.page_size = const.DEFAULT_QUESTIONS_PAGE_SIZE
+ self.page = 1
+ self.logged_in = False
+ logging.debug('new search state initialized')
+
+ def __str__(self):
+ out = 'scope=%s\n' % self.scope
+ out += 'query=%s\n' % self.query
+ if self.tags:
+ out += 'tags=%s\n' % ','.join(self.tags)
+ out += 'author=%s\n' % self.author
+ out += 'sort=%s\n' % self.sort
+ out += 'page_size=%d\n' % self.page_size
+ out += 'page=%d\n' % self.page
+ out += 'logged_in=%s\n' % str(self.logged_in)
+ return out
+
+ def set_logged_out(self):
+ if self.scope == 'favorite':
+ self.scope = None
+ self.logged_in = False
+
+ def set_logged_in(self):
+ self.logged_in = True
+
+ def reset(self):
+ #re-initialize, but keep login state
+ is_logged_in = self.logged_in
+ self.__init__()
+ self.logged_in = is_logged_in
+
+ def update_value(self, key, store):
+ if key in store:
+ old_value = getattr(self, key)
+ new_value = store[key]
+ if new_value != old_value:
+ setattr(self, key, new_value)
+ self.reset_page()
+
+ def relax_stickiness(self, input, view_log):
+ if view_log.get_previous(1) == 'questions':
+ if not some_in(ACTIVE_COMMANDS, input):
+ self.reset()
+ #todo also relax if 'all' scope was clicked twice
+
+ def update_from_user_input(self,input,raw_input = {}):
+ #todo: this function will probably not
+ #fit the case of multiple parameters entered at the same tiem
+ if 'start_over' in input:
+ self.reset()
+
+ if 'page' in input:
+ self.page = input['page']
+ #special case - on page flip no other input is accepted
+ return
+
+ if 'page_size' in input:
+ self.update_value('page_size',input)
+ self.reset_page()#todo may be smarter here - start with ~same q
+ #same as with page - return right away
+ return
+
+ if 'scope' in input:
+ if input['scope'] == 'favorite' and self.logged_in == False:
+ self.reset_scope()
+ else:
+ self.update_value('scope',input)
+
+ if 'tags' in input:
+ if self.tags:
+ old_tags = self.tags.copy()
+ self.tags = self.tags.union(input['tags'])
+ if self.tags != old_tags:
+ self.reset_page()
+ else:
+ self.tags = input['tags']
+
+ #all resets just return
+ if 'reset_tags' in input:
+ if self.tags:
+ self.tags = None
+ self.reset_page()
+ return
+
+ #todo: handle case of deleting tags one-by-one
+ if 'reset_author' in input:
+ if self.author:
+ self.author = None
+ self.reset_page()
+ return
+
+ if 'reset_query' in input:
+ self.reset_query()
+ return
+
+ self.update_value('author',input)
+
+ if 'query' in input:
+ self.update_value('query',input)
+ self.sort = 'relevant'
+ elif 'search' in raw_input:#a case of use nulling search query by hand
+ self.reset_query()
+ return
+
+
+ if 'sort' in input:
+ if input['sort'] == 'relevant' and self.query is None:
+ self.reset_sort()
+ else:
+ self.update_value('sort',input)
+
+ def reset_page(self):
+ self.page = 1
+
+ def reset_query(self):
+ if self.query:
+ self.query = None
+ self.reset_page()
+ if self.sort == 'relevant':
+ self.reset_sort()
+
+ def reset_sort(self):
+ self.sort = const.DEFAULT_POST_SORT_METHOD
+
+ def reset_scope(self):
+ self.scope = const.DEFAULT_POST_SCOPE