summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEvgeny Fadeev <evgeny.fadeev@gmail.com>2011-12-04 16:43:49 -0300
committerEvgeny Fadeev <evgeny.fadeev@gmail.com>2011-12-04 16:43:49 -0300
commit7736c91d0c75eadd75b7c99d1b43068e5e7f1c65 (patch)
tree324db86230dd5236d2a0a193bb877c51e2cf5b60
parent606ae357adc0c3d8e92497d4e78c65af3932973a (diff)
downloadaskbot-7736c91d0c75eadd75b7c99d1b43068e5e7f1c65.tar.gz
askbot-7736c91d0c75eadd75b7c99d1b43068e5e7f1c65.tar.bz2
askbot-7736c91d0c75eadd75b7c99d1b43068e5e7f1c65.zip
first pass change of search index setups for sphinx and postgres, not tested
-rw-r--r--askbot/management/commands/setup_postgresql_full_text_search.plsql66
-rw-r--r--askbot/search/sphinx/sphinx.conf4
2 files changed, 59 insertions, 11 deletions
diff --git a/askbot/management/commands/setup_postgresql_full_text_search.plsql b/askbot/management/commands/setup_postgresql_full_text_search.plsql
index 35180003..5a723e35 100644
--- a/askbot/management/commands/setup_postgresql_full_text_search.plsql
+++ b/askbot/management/commands/setup_postgresql_full_text_search.plsql
@@ -66,15 +66,24 @@ $$ LANGUAGE plpgsql;
SELECT setup_aggregates();
-/* calculates text search vector for question
-DOES not include answers or comments */
-CREATE OR REPLACE FUNCTION get_question_tsv(title text, text text, tagnames text)
+/* calculates text search vector for the individual thread row
+DOES not include question body post, answers or comments */
+CREATE OR REPLACE FUNCTION get_thread_tsv(tagnames text)
RETURNS tsvector AS
$$
BEGIN
- RETURN setweight(to_tsvector('english', coalesce(title, '')), 'A') ||
+ /* todo: add title as it belongs to the whole thread */
+ RETURN setweight(to_tsvector('english', coalesce(tagnames, '')), 'A');
+END;
+$$ LANGUAGE plpgsql;
+
+/* calculates text seanch vector for the individual question row */
+CREATE OR REPLACE FUNCTION get_question_tsv(title text, text text)
+RETURNS tsvector AS
+$$
+ /* todo: move title to get_thread_tsv */
+ RETURN setweight(to_tsvector('english', coalesce(title, '')), 'A') ||
setweight(to_tsvector('english', coalesce(text, '')), 'B') ||
- setweight(to_tsvector('english', coalesce(tagnames, '')), 'A');
END;
$$ LANGUAGE plpgsql;
@@ -94,6 +103,27 @@ BEGIN
END;
$$ LANGUAGE plpgsql;
+/* calculates text search vector for the question body part by thread id
+here we extract question title and the text by thread_id and then
+calculate the text search vector. In the future question
+title will be moved to the askbot_thread table and this function
+will be simpler.
+*/
+CREATE OR REPLACE FUNCTION get_thread_question_tsv(thread_id)
+RETURNS tsvector AS
+$$
+DECLARE
+ query text;
+ onerow record;
+BEGIN
+ query = 'SELECT title, text FROM question WHERE thread_id=' || thread_id;
+ FOR onerow in EXECUTE query LOOP
+ RETURN get_question_tsv(onerow.title, onerow.text);
+ END LOOP;
+ RETURN to_tsvector('');
+END;
+$$ LANGUAGE plpgsql;
+
CREATE OR REPLACE FUNCTION get_dependent_comments_tsv(object_id integer, tablename text)
RETURNS tsvector AS
$$
@@ -102,7 +132,7 @@ DECLARE
onerow record;
BEGIN
query = 'SELECT concat_tsvectors(text_search_vector) FROM comment' ||
- ' WHERE object_id=' ||object_id|| ' AND content_type_id=(' ||
+ ' WHERE object_id=' || object_id || ' AND content_type_id=(' ||
' SELECT id FROM django_content_type' ||
' WHERE app_label=''askbot'' AND name=''' || tablename || ''')';
FOR onerow IN EXECUTE query LOOP
@@ -130,6 +160,7 @@ END;
$$ LANGUAGE plpgsql;
/* create tsvector columns in the content tables */
+SELECT add_tsvector_column('text_search_vector', 'askbot_thread');
SELECT add_tsvector_column('text_search_vector', 'question');
SELECT add_tsvector_column('text_search_vector', 'answer');
SELECT add_tsvector_column('text_search_vector', 'comment');
@@ -144,7 +175,8 @@ UPDATE answer as a SET text_search_vector = text_search_vector ||
get_dependent_comments_tsv(a.id, 'answer');
--question tsvectors
-UPDATE question SET text_search_vector = get_question_tsv(title, text, tagnames);
+UPDATE askbot_thread SET text_search_vector = get_thread_tsv(tagnames);
+UPDATE question SET text_search_vector = get_post_question_tsv(title, text);
UPDATE question as q SET text_search_vector = text_search_vector ||
get_dependent_comments_tsv(q.id, 'question');
@@ -153,10 +185,12 @@ UPDATE question as q SET text_search_vector = text_search_vector ||
get_dependent_answers_tsv(q.id);
/* set up update triggers */
-CREATE OR REPLACE FUNCTION question_trigger() RETURNS trigger AS
+CREATE OR REPLACE FUNCTION thread_trigger() RETURNS trigger AS
$$
BEGIN
- new.text_search_vector = get_question_tsv(new.title, new.text, new.tagnames);
+ new.text_search_vector = get_thread_tsv(new.tagnames);
+ new.text_search_vector = new.text_search_vector ||
+ get_thread_question_tsv(new.id)
new.text_search_vector = new.text_search_vector ||
get_dependent_comments_tsv(new.id, 'question');
new.text_search_vector = new.text_search_vector ||
@@ -164,8 +198,20 @@ BEGIN
RETURN new;
END;
$$ LANGUAGE plpgsql;
+DROP TRIGGER IF EXISTS thread_search_vector_update_trigger on question;
+CREATE TRIGGER thread_search_vector_update_trigger
+BEFORE INSERT OR UPDATE ON question FOR EACH ROW EXECUTE PROCEDURE thread_trigger();
+
+/* question trigger */
+CREATE OR REPLACE FUNCTION question_trigger RETURNS trigger AS
+$$
+BEGIN
+ new.text_search_vector = get_question_tsv(new.title, new.text);
+ return new;
+END;
+$$ LANGUAGE plpgsql;
DROP TRIGGER IF EXISTS question_search_vector_update_trigger on question;
-CREATE TRIGGER question_search_vector_update_trigger
+CREATE TRIGGER question_search_vector_update_trigger
BEFORE INSERT OR UPDATE ON question FOR EACH ROW EXECUTE PROCEDURE question_trigger();
/* comment trigger */
diff --git a/askbot/search/sphinx/sphinx.conf b/askbot/search/sphinx/sphinx.conf
index cdf0f004..2c252f3d 100644
--- a/askbot/search/sphinx/sphinx.conf
+++ b/askbot/search/sphinx/sphinx.conf
@@ -20,8 +20,10 @@ source src_askbot
# main document fetch query - change the table names if you are using a prefix
# this query creates a flat document from each question that includes only latest
# revisions of the question and all of it's answers
- sql_query = SELECT q.id as id, q.title AS title, q.tagnames as tags, qr.text AS text, answers_combined.text AS answers \
+ sql_query = SELECT q.id as id, q.title AS title, t.tagnames as tags, qr.text AS text, answers_combined.text AS answers \
FROM question AS q \
+ INNER JOIN askbot_thread as t \
+ ON t.id=q.thread_id \
INNER JOIN \
( \
SELECT MAX(id) as id, question_id \