From cd5d1978ba9f2a0dfdd9a59bfef51669863de6f1 Mon Sep 17 00:00:00 2001 From: Evgeny Fadeev Date: Tue, 17 Nov 2009 22:13:51 -0500 Subject: full text search with Sphinx, added session-based greeting message for the first time visitor --- INSTALL | 40 ++++++- context.py | 1 + forum/models.py | 11 ++ forum/templatetags/extra_tags.py | 4 +- forum/views.py | 12 +- middleware/anon_user.py | 8 ++ settings.py | 1 + settings_local.py.dist | 27 +++-- sphinx/sphinx.conf | 127 +++++++++++++++++++++ .../authopenid/external_legacy_login_info.html | 3 +- templates/content/style/style.css | 5 +- templates/questions.html | 45 +++++++- templates/user_info.html | 2 +- 13 files changed, 261 insertions(+), 25 deletions(-) create mode 100644 sphinx/sphinx.conf diff --git a/INSTALL b/INSTALL index b028a788..38d3a365 100644 --- a/INSTALL +++ b/INSTALL @@ -6,7 +6,8 @@ B. INSTALLATION 2. Database 3. Running CNPROG in the development server 4. Installation under Apache/WSGI - 5. Miscellaneous + 5. Full text search + 6. Miscellaneous C. CONFIGURATION PARAMETERS (settings_local.py) @@ -37,6 +38,12 @@ sudo easy_install markdown2 6. Django Debug Toolbar http://github.com/robhudson/django-debug-toolbar/tree/master +7. djangosphinx (optional - for full text questions+answer+tag) +http://github.com/dcramer/django-sphinx/tree/master/djangosphinx + +8. sphinx search engine (optional, works together with djangosphinx) +http://sphinxsearch.com/downloads.html + NOTES: django_authopenid is included into CNPROG code and is significantly modified. http://code.google.com/p/django-authopenid/ no need to install this library @@ -165,7 +172,36 @@ WSGIPythonEggs /var/python/eggs #must be readable and writable by apache ------------- -5. Miscellaneous +5. Full text search (using sphinx search) + Currently full text search works only with sphinx search engine + Sphinx at this time supports only MySQL and PostgreSQL databases + to enable this, install sphinx search engine and djangosphinx + + configure sphinx, sample configuration can be found in + sphinx/sphinx.conf file usually goes somewhere in /etc tree + + build cnprog index first time manually + + % indexer --config /path/to/sphinx.conf --index cnprog + + setup cron job to rebuild index periodically with command + your crontab entry may be something like + + 0 9,15,21 * * * /usr/local/bin/indexer --config /etc/sphinx/sphinx.conf --all --rotate >/dev/null 2>&1 + adjust it as necessary this one will reindex three times a day at 9am 3pm and 9pm + + if your forum grows very big ( good luck with that :) you'll + need to two search indices one diff index and one main + please refer to online sphinx search documentation for the information + on the subject http://sphinxsearch.com/docs/ + + in settings_local.py set + USE_SPHINX_SEARCH=True + adjust other settings that have SPHINX_* prefix accordingly + remember that there must be trailing comma in parentheses for + SHPINX_SEARCH_INDICES tuple - particlarly with just one item! + +6. Miscellaneous There are some demo scripts under sql_scripts folder, including badges and test accounts for CNProg.com. You diff --git a/context.py b/context.py index 680d1c3c..26d326a7 100644 --- a/context.py +++ b/context.py @@ -14,6 +14,7 @@ def application_settings(context): 'WIKI_ON':settings.WIKI_ON, 'USE_EXTERNAL_LEGACY_LOGIN':settings.USE_EXTERNAL_LEGACY_LOGIN, 'RESOURCE_REVISION':settings.RESOURCE_REVISION, + 'USE_SPHINX_SEARCH':settings.USE_SPHINX_SEARCH, } return {'settings':my_settings} diff --git a/forum/models.py b/forum/models.py index 39058bea..60fcee0a 100644 --- a/forum/models.py +++ b/forum/models.py @@ -14,6 +14,10 @@ from django.utils.translation import ugettext as _ from django.utils.safestring import mark_safe import django.dispatch import settings +import logging + +if settings.USE_SPHINX_SEARCH == True: + from djangosphinx.models import SphinxSearch from forum.managers import * from const import * @@ -184,6 +188,13 @@ class Question(models.Model): votes = generic.GenericRelation(Vote) flagged_items = generic.GenericRelation(FlaggedItem) + if settings.USE_SPHINX_SEARCH == True: + search = SphinxSearch( + index=' '.join(settings.SPHINX_SEARCH_INDICES), + mode='SPH_MATCH_ALL', + ) + logging.debug('have sphinx search') + objects = QuestionManager() def save(self, **kwargs): diff --git a/forum/templatetags/extra_tags.py b/forum/templatetags/extra_tags.py index 8bd0e128..96dc9024 100644 --- a/forum/templatetags/extra_tags.py +++ b/forum/templatetags/extra_tags.py @@ -332,8 +332,8 @@ class BlockResourceNode(template.Node): for item in self.items: bit = item.render(context) out += bit - out = out.replace(' ','') - return os.path.normpath(out) + '?v=%d' % settings.RESOURCE_REVISION + out = os.path.normpath(out) + '?v=%d' % settings.RESOURCE_REVISION + return out.replace(' ','') @register.tag(name='blockresource') def blockresource(parser,token): diff --git a/forum/views.py b/forum/views.py index e4ccaa16..8f7d07fd 100644 --- a/forum/views.py +++ b/forum/views.py @@ -2193,10 +2193,16 @@ def search(request): view_id = "latest" orderby = "-added_at" - objects = Question.objects.filter(deleted=False).extra(where=['title like %s'], params=['%' + keywords + '%']).order_by(orderby) + if settings.USE_SPHINX_SEARCH == True: + #search index is now free of delete questions and answers + #so there is not "antideleted" filtering here + objects = Question.search.query(keywords) + #no related selection either because we're relying on full text search here + else: + objects = Question.objects.filter(deleted=False).extra(where=['title like %s'], params=['%' + keywords + '%']).order_by(orderby) + # RISK - inner join queries + objects = objects.select_related(); - # RISK - inner join queries - objects = objects.select_related(); objects_list = Paginator(objects, pagesize) questions = objects_list.page(page) diff --git a/middleware/anon_user.py b/middleware/anon_user.py index c7ff05bc..8422d89b 100644 --- a/middleware/anon_user.py +++ b/middleware/anon_user.py @@ -1,6 +1,8 @@ from django.http import HttpResponseRedirect from django_authopenid.util import get_next_url +from django.utils.translation import ugettext as _ from user_messages import create_message, get_and_delete_messages +import settings import logging class AnonymousMessageManager(object): @@ -24,3 +26,9 @@ class ConnectToSessionMessagesMiddleware(object): request.user.__deepcopy__ = dummy_deepcopy #plug on deepcopy which may be called by django db "driver" request.user.message_set = AnonymousMessageManager(request) #here request is linked to anon user request.user.get_and_delete_messages = request.user.message_set.get_and_delete + + #also set the first greeting one time per session only + if 'greeting_set' not in request.session: + request.session['greeting_set'] = True + msg = _('first time greeting with %(url)s') % {'url':settings.GREETING_URL} + request.user.message_set.create(message=msg) diff --git a/settings.py b/settings.py index daada933..3fc37dce 100644 --- a/settings.py +++ b/settings.py @@ -73,6 +73,7 @@ INSTALLED_APPS = ( 'django.contrib.humanize', 'forum', 'django_authopenid', + 'djangosphinx', #'debug_toolbar' , 'user_messages', ) diff --git a/settings_local.py.dist b/settings_local.py.dist index 685c9fb8..33402d35 100644 --- a/settings_local.py.dist +++ b/settings_local.py.dist @@ -29,13 +29,6 @@ EMAIL_USE_TLS=False #LOCALIZATIONS TIME_ZONE = 'America/Tijuana' -#OTHER SETTINGS -APP_TITLE = u'CNPROG Q&A Forum' -APP_KEYWORDS = u'CNPROG,forum,community' -APP_DESCRIPTION = u'Ask and answer questions.' -APP_INTRO = u'

Ask and answer questions, make the world better!

' -APP_COPYRIGHT = 'Copyright CNPROG, 2009. Some rights reserved under creative commons license.' - ########################### # # this will allow running your forum with url like http://site.com/forum @@ -47,6 +40,16 @@ APP_COPYRIGHT = 'Copyright CNPROG, 2009. Some rights reserved under creative com # FORUM_SCRIPT_ALIAS = '' #no leading slash, default = '' empty string + +#OTHER SETTINGS +APP_TITLE = u'CNPROG Q&A Forum' +APP_KEYWORDS = u'CNPROG,forum,community' +APP_DESCRIPTION = u'Ask and answer questions.' +APP_INTRO = u'

Ask and answer questions, make the world better!

' +APP_COPYRIGHT = 'Copyright CNPROG, 2009. Some rights reserved under creative commons license.' +LOGIN_URL = '/%s%s%s' % (FORUM_SCRIPT_ALIAS,'account/','signin/') +GREETING_URL = LOGIN_URL #may be url of "faq" page or "about", etc + USE_I18N = True LANGUAGE_CODE = 'en' EMAIL_VALIDATION = 'off' #string - on|off @@ -62,7 +65,15 @@ EXTERNAL_LEGACY_LOGIN_HOST = 'login.cnprog.com' EXTERNAL_LEGACY_LOGIN_PORT = 80 EXTERNAL_LEGACY_LOGIN_PROVIDER_NAME = 'CNPROG' FEEDBACK_SITE_URL = None #None or url -LOGIN_URL = '/%s%s%s' % (FORUM_SCRIPT_ALIAS,'account/','signin/') DJANGO_VERSION = 1.1 RESOURCE_REVISION=4 + +USE_SPHINX_SEARCH = True #if True all SPHINX_* settings are required +#also sphinx search engine and djangosphinxs app must be installed +#sample sphinx configuration file is /sphinx/sphinx.conf +SPHINX_API_VERSION = 0x113 #refer to djangosphinx documentation +SPHINX_SEARCH_INDICES=('cnprog',) #a tuple of index names remember about a comma after the +#last item, especially if you have just one :) +SPHINX_SERVER='localhost' +SPHINX_PORT=3312 diff --git a/sphinx/sphinx.conf b/sphinx/sphinx.conf new file mode 100644 index 00000000..bf4bdc8b --- /dev/null +++ b/sphinx/sphinx.conf @@ -0,0 +1,127 @@ +#if you have many posts, it's best to configure another index for new posts and +#periodically merge the diff index to the main +#this is not important until you get to hundreds of thousands posts + +source src_cnprog +{ + # data source + type = mysql + sql_host = localhost + sql_user = cnprog #replace with your db username + sql_pass = secret #replace with your db password + sql_db = cnprog #replace with your db name + # these two are optional + #sql_port = 3306 + #sql_sock = /var/lib/mysql/mysql.sock + + # pre-query, executed before the main fetch query + sql_query_pre = SET NAMES utf8 + + # main document fetch query - change the table names if you are using a prefix + # this query creates a flat document from each question that includes only latest + # revisions of the question and all of it's answers + sql_query = SELECT q.id as id, q.title AS title, q.tagnames as tags, qr.text AS text, answers_combined.text AS answers \ + FROM question AS q \ + INNER JOIN \ + ( \ + SELECT MAX(id) as id, question_id \ + FROM question_revision \ + GROUP BY question_id \ + ) \ + AS mqr \ + ON q.id=mqr.question_id \ + INNER JOIN question_revision AS qr ON qr.id=mqr.id \ + LEFT JOIN \ + ( \ + SELECT GROUP_CONCAT(answer_current.text SEPARATOR '. ') AS text, \ + question_id \ + FROM \ + ( \ + SELECT a.question_id as question_id, ar.text as text \ + FROM answer AS a \ + INNER JOIN \ + ( \ + SELECT MAX(id) as id, answer_id \ + FROM answer_revision \ + GROUP BY answer_id \ + ) \ + AS mar \ + ON mar.answer_id = a.id \ + INNER JOIN answer_revision AS ar ON ar.id=mar.id \ + WHERE a.deleted=0 \ + ) \ + AS answer_current \ + GROUP BY question_id \ + ) \ + AS answers_combined ON q.id=answers_combined.question_id \ + WHERE q.deleted=0; + + # optional - used by command-line search utility to display document information + sql_query_info = SELECT title, id FROM question WHERE id=$id +} + +index cnprog { + # which document source to index + source = src_cnprog + + # this is path and index file name without extension + # you may need to change this path or create this folder + path = /var/data/sphinx/cnprog_main + + # docinfo (ie. per-document attribute values) storage strategy + docinfo = extern + + # morphology + morphology = stem_en + + # stopwords file + #stopwords = /var/data/sphinx/stopwords.txt + + # minimum word length + min_word_len = 1 + + # uncomment next 2 lines to allow wildcard (*) searches + #min_infix_len = 1 + #enable_star = 1 + + # charset encoding type + charset_type = utf-8 +} + +# indexer settings +indexer +{ + # memory limit (default is 32M) + mem_limit = 64M +} + +# searchd settings +searchd +{ + # IP address on which search daemon will bind and accept + # optional, default is to listen on all addresses, + # ie. address = 0.0.0.0 + address = 127.0.0.1 + + # port on which search daemon will listen + port = 3312 + + # searchd run info is logged here - create or change the folder + log = /var/log/sphinx/searchd.log + + # all the search queries are logged here + query_log = /var/log/sphinx/query.log + + # client read timeout, seconds + read_timeout = 5 + + # maximum amount of children to fork + max_children = 30 + + # a file which will contain searchd process ID + pid_file = /var/log/sphinx/searchd.pid + + # maximum amount of matches this daemon would ever retrieve + # from each index and serve to client + max_matches = 1000 +} diff --git a/templates/authopenid/external_legacy_login_info.html b/templates/authopenid/external_legacy_login_info.html index e2f4713e..c200b29d 100644 --- a/templates/authopenid/external_legacy_login_info.html +++ b/templates/authopenid/external_legacy_login_info.html @@ -8,9 +8,8 @@ {% spaceless %}
-fill in template templates/authopenid/external_legacy_login_info.html -and explain how to change password, recover password, etc. +{% trans "how to login with password through external login website" %}
{% endspaceless %} {% endblock %} diff --git a/templates/content/style/style.css b/templates/content/style/style.css index 6c1d6a3f..65a323db 100644 --- a/templates/content/style/style.css +++ b/templates/content/style/style.css @@ -163,7 +163,7 @@ blockquote border-right:1px solid #b4b48e; border-bottom:1px solid #b4b48e;*/ background: white;/* #f9f7ed;*/ - margin:10px 0 10px 0; + margin:10px 0 0 0; /*background:url(../images/quest-bg.gif) repeat-x top;*/ } #listA .qstA thumb {float:left; } @@ -1144,6 +1144,9 @@ ul.bulleta li {background:url(../images/bullet_green.gif) no-repeat 0px 2px; pad .message p { margin-bottom:0px; } +.message p.space-above { + margin-top:10px; +} .warning{color:red;} .darkred{color:darkred;} diff --git a/templates/questions.html b/templates/questions.html index f298381e..47bda129 100644 --- a/templates/questions.html +++ b/templates/questions.html @@ -21,7 +21,21 @@ {% endblock %} {% block content %}
-
{% if searchtag %}{% trans "Found by tags" %}{% else %}{% if searchtitle %}{% trans "Found by title" %}{% else %}{% trans "All questions" %}{% endif %}{% endif %}
+
+ {% if searchtag %} + {% trans "Found by tags" %} + {% else %} + {% if searchtitle %} + {% if settings.USE_SPHINX_SEARCH %} + {% trans "Search results" %} + {% else %} + {% trans "Found by title" %} + {% endif %} + {% else %} + {% trans "All questions" %} + {% endif %} + {% endif %} +
{% endfor %} + {% if searchtitle %} + {% if questions_count == 0 %} +

+ {% trans "Did not find anything?" %} + {% else %} +

+ {% trans "Did not find what you were looking for?" %} + {% endif %} + {% trans "Please, post your question!" %} +

+ {% endif %} {% endblock %} @@ -130,11 +155,19 @@ {% endblocktrans %} {% else %} {% if searchtitle %} - {% blocktrans count questions_count as cnt with questions_count|intcomma as q_num %} - have total {{q_num}} questions containing {{searchtitle}} - {% plural %} - have total {{q_num}} questions containing {{searchtitle}} - {% endblocktrans %} + {% if settings.USE_SPHINX_SEARCH %} + {% blocktrans count questions_count as cnt with questions_count|intcomma as q_num %} + have total {{q_num}} questions containing {{searchtitle}} in full text + {% plural %} + have total {{q_num}} questions containing {{searchtitle}} in full text + {% endblocktrans %} + {% else %} + {% blocktrans count questions_count as cnt with questions_count|intcomma as q_num %} + have total {{q_num}} questions containing {{searchtitle}} + {% plural %} + have total {{q_num}} questions containing {{searchtitle}} + {% endblocktrans %} + {% endif %} {% else %} {% blocktrans count questions as cnt with questions_count|intcomma as q_num %} have total {{q_num}} questions diff --git a/templates/user_info.html b/templates/user_info.html index 4ebcddd6..c550e13f 100644 --- a/templates/user_info.html +++ b/templates/user_info.html @@ -19,7 +19,7 @@
{{view_user.reputation|intcomma}}
-

{% trans "karma" %}

+

{% trans "reputation" %}

-- cgit v1.2.3-1-g7c22