summaryrefslogtreecommitdiffstats
path: root/askbot/search
diff options
context:
space:
mode:
authorEvgeny Fadeev <evgeny.fadeev@gmail.com>2012-01-16 19:09:08 -0300
committerEvgeny Fadeev <evgeny.fadeev@gmail.com>2012-01-16 19:09:08 -0300
commit76b140c4513282e3d326e813b3b9881240a67f73 (patch)
tree98574298f6ad2f1381c4e6cb4c5120a9ceb7723b /askbot/search
parentca08155700b547e4afa9c4b8bd2da8dfb7eec705 (diff)
downloadaskbot-76b140c4513282e3d326e813b3b9881240a67f73.tar.gz
askbot-76b140c4513282e3d326e813b3b9881240a67f73.tar.bz2
askbot-76b140c4513282e3d326e813b3b9881240a67f73.zip
made dedicated scripts for full text search setup in postgres - per version of the database schema, fts is still broken
Diffstat (limited to 'askbot/search')
-rw-r--r--askbot/search/postgresql/__init__.py21
-rw-r--r--askbot/search/postgresql/question_answer_comment_models.plsql197
-rw-r--r--askbot/search/postgresql/thread_and_post_models_01162012.plsql222
3 files changed, 440 insertions, 0 deletions
diff --git a/askbot/search/postgresql/__init__.py b/askbot/search/postgresql/__init__.py
new file mode 100644
index 00000000..a802a5eb
--- /dev/null
+++ b/askbot/search/postgresql/__init__.py
@@ -0,0 +1,21 @@
+"""Procedures to initialize the full text search in PostgresQL"""
+from django.db import connection
+
+def setup_full_text_search(script_path):
+ """using postgresql database connection,
+ installs the plsql language, if necessary
+ and runs the stript, whose path is given as an argument
+ """
+ fts_init_query = open(script_path).read()
+
+ cursor = connection.cursor()
+ try:
+ #test if language exists
+ cursor.execute("SELECT * FROM pg_language WHERE lanname='plpgsql'")
+ lang_exists = cursor.fetchone()
+ if not lang_exists:
+ cursor.execute("CREATE LANGUAGE plpgsql")
+ #run the main query
+ cursor.execute(fts_init_query)
+ finally:
+ cursor.close()
diff --git a/askbot/search/postgresql/question_answer_comment_models.plsql b/askbot/search/postgresql/question_answer_comment_models.plsql
new file mode 100644
index 00000000..35180003
--- /dev/null
+++ b/askbot/search/postgresql/question_answer_comment_models.plsql
@@ -0,0 +1,197 @@
+/* function testing for existence of a column in a table
+ if table does not exists, function will return "false" */
+CREATE OR REPLACE FUNCTION column_exists(colname text, tablename text)
+RETURNS boolean AS
+$$
+DECLARE
+ q text;
+ onerow record;
+BEGIN
+
+ q = 'SELECT attname FROM pg_attribute WHERE attrelid = ( SELECT oid FROM pg_class WHERE relname = '''||tablename||''') AND attname = '''||colname||'''';
+
+ FOR onerow IN EXECUTE q LOOP
+ RETURN true;
+ END LOOP;
+
+ RETURN false;
+END;
+$$ LANGUAGE plpgsql;
+
+/* function adding tsvector column to table if it does not exists */
+CREATE OR REPLACE FUNCTION add_tsvector_column(colname text, tablename text)
+RETURNS boolean AS
+$$
+DECLARE
+ q text;
+BEGIN
+ IF NOT column_exists(colname, tablename) THEN
+ q = 'ALTER TABLE ' || tablename || ' ADD COLUMN ' || colname || ' tsvector';
+ EXECUTE q;
+ RETURN true;
+ ELSE
+ q = 'UPDATE ' || tablename || ' SET ' || colname || '=NULL';
+ EXECUTE q;
+ RETURN false;
+ END IF;
+END;
+$$ LANGUAGE plpgsql;
+
+/* aggregate function that concatenates tsvectors */
+CREATE OR REPLACE FUNCTION tsv_add(tsv1 tsvector, tsv2 tsvector)
+RETURNS tsvector AS
+$$
+BEGIN
+ RETURN tsv1 || tsv2;
+END;
+$$ LANGUAGE plpgsql;
+
+CREATE OR REPLACE FUNCTION setup_aggregates() RETURNS boolean AS
+$$
+DECLARE
+ onerow record;
+BEGIN
+ FOR onerow IN SELECT * FROM pg_proc WHERE proname = 'concat_tsvectors' AND proisagg LOOP
+ DROP AGGREGATE concat_tsvectors(tsvector);
+ END LOOP;
+ CREATE AGGREGATE concat_tsvectors (
+ BASETYPE = tsvector,
+ SFUNC = tsv_add,
+ STYPE = tsvector,
+ INITCOND = ''
+ );
+ RETURN true;
+END;
+$$ LANGUAGE plpgsql;
+
+SELECT setup_aggregates();
+
+/* calculates text search vector for question
+DOES not include answers or comments */
+CREATE OR REPLACE FUNCTION get_question_tsv(title text, text text, tagnames text)
+RETURNS tsvector AS
+$$
+BEGIN
+ RETURN setweight(to_tsvector('english', coalesce(title, '')), 'A') ||
+ setweight(to_tsvector('english', coalesce(text, '')), 'B') ||
+ setweight(to_tsvector('english', coalesce(tagnames, '')), 'A');
+END;
+$$ LANGUAGE plpgsql;
+
+/* calculates text search vector for answer text */
+CREATE OR REPLACE FUNCTION get_answer_tsv(text text) RETURNS tsvector AS
+$$
+BEGIN
+ RETURN setweight(to_tsvector('english', coalesce(text, '')), 'B');
+END;
+$$ LANGUAGE plpgsql;
+
+/* calculate text search vector for comment text */
+CREATE OR REPLACE FUNCTION get_comment_tsv(comment text) RETURNS tsvector AS
+$$
+BEGIN
+ RETURN setweight(to_tsvector('english', coalesce(comment, '')), 'C');
+END;
+$$ LANGUAGE plpgsql;
+
+CREATE OR REPLACE FUNCTION get_dependent_comments_tsv(object_id integer, tablename text)
+RETURNS tsvector AS
+$$
+DECLARE
+ query text;
+ onerow record;
+BEGIN
+ query = 'SELECT concat_tsvectors(text_search_vector) FROM comment' ||
+ ' WHERE object_id=' ||object_id|| ' AND content_type_id=(' ||
+ ' SELECT id FROM django_content_type' ||
+ ' WHERE app_label=''askbot'' AND name=''' || tablename || ''')';
+ FOR onerow IN EXECUTE query LOOP
+ RETURN onerow.concat_tsvectors;
+ END LOOP;
+ RETURN to_tsvector('');
+END;
+$$ LANGUAGE plpgsql;
+
+CREATE OR REPLACE FUNCTION get_dependent_answers_tsv(question_id integer)
+RETURNS tsvector AS
+$$
+DECLARE
+ query text;
+ onerow record;
+BEGIN
+ query = 'SELECT concat_tsvectors(text_search_vector) ' ||
+ 'FROM answer WHERE question_id = ' || question_id ||
+ ' AND deleted=false';
+ FOR onerow IN EXECUTE query LOOP
+ RETURN onerow.concat_tsvectors;
+ END LOOP;
+ RETURN to_tsvector('');
+END;
+$$ LANGUAGE plpgsql;
+
+/* create tsvector columns in the content tables */
+SELECT add_tsvector_column('text_search_vector', 'question');
+SELECT add_tsvector_column('text_search_vector', 'answer');
+SELECT add_tsvector_column('text_search_vector', 'comment');
+
+/* populate tsvectors with data */
+-- comment tsvectors
+UPDATE comment SET text_search_vector = get_comment_tsv(comment);
+
+-- answer tsvectors
+UPDATE answer SET text_search_vector = get_answer_tsv(text);
+UPDATE answer as a SET text_search_vector = text_search_vector ||
+ get_dependent_comments_tsv(a.id, 'answer');
+
+--question tsvectors
+UPDATE question SET text_search_vector = get_question_tsv(title, text, tagnames);
+
+UPDATE question as q SET text_search_vector = text_search_vector ||
+ get_dependent_comments_tsv(q.id, 'question');
+
+UPDATE question as q SET text_search_vector = text_search_vector ||
+ get_dependent_answers_tsv(q.id);
+
+/* set up update triggers */
+CREATE OR REPLACE FUNCTION question_trigger() RETURNS trigger AS
+$$
+BEGIN
+ new.text_search_vector = get_question_tsv(new.title, new.text, new.tagnames);
+ new.text_search_vector = new.text_search_vector ||
+ get_dependent_comments_tsv(new.id, 'question');
+ new.text_search_vector = new.text_search_vector ||
+ get_dependent_answers_tsv(new.id);
+ RETURN new;
+END;
+$$ LANGUAGE plpgsql;
+DROP TRIGGER IF EXISTS question_search_vector_update_trigger on question;
+CREATE TRIGGER question_search_vector_update_trigger
+BEFORE INSERT OR UPDATE ON question FOR EACH ROW EXECUTE PROCEDURE question_trigger();
+
+/* comment trigger */
+CREATE OR REPLACE FUNCTION comment_trigger() RETURNS trigger AS
+$$
+BEGIN
+ new.text_search_vector = get_comment_tsv(new.comment);
+ RETURN new;
+END;
+$$ LANGUAGE plpgsql;
+DROP TRIGGER IF EXISTS comment_search_vector_update_trigger on comment;
+CREATE TRIGGER comment_search_vector_update_trigger
+BEFORE INSERT OR UPDATE ON comment FOR EACH ROW EXECUTE PROCEDURE comment_trigger();
+
+/* answer trigger */
+CREATE OR REPLACE FUNCTION answer_trigger() RETURNS trigger AS
+$$
+BEGIN
+ new.text_search_vector = get_answer_tsv(new.text);
+ new.text_search_vector = new.text_search_vector ||
+ get_dependent_comments_tsv(new.id, 'answer');
+ RETURN new;
+END;
+$$ LANGUAGE plpgsql;
+DROP TRIGGER IF EXISTS answer_search_vector_update_trigger on answer;
+CREATE TRIGGER answer_search_vector_update_trigger
+BEFORE INSERT OR UPDATE ON answer FOR EACH ROW EXECUTE PROCEDURE answer_trigger();
+
+CREATE INDEX askbot_search_idx ON question USING gin(text_search_vector);
diff --git a/askbot/search/postgresql/thread_and_post_models_01162012.plsql b/askbot/search/postgresql/thread_and_post_models_01162012.plsql
new file mode 100644
index 00000000..7156833b
--- /dev/null
+++ b/askbot/search/postgresql/thread_and_post_models_01162012.plsql
@@ -0,0 +1,222 @@
+/* function testing for existence of a column in a table
+ if table does not exists, function will return "false" */
+CREATE OR REPLACE FUNCTION column_exists(colname text, tablename text)
+RETURNS boolean AS
+$$
+DECLARE
+ q text;
+ onerow record;
+BEGIN
+
+ q = 'SELECT attname FROM pg_attribute WHERE attrelid = ( SELECT oid FROM pg_class WHERE relname = '''||tablename||''') AND attname = '''||colname||'''';
+
+ FOR onerow IN EXECUTE q LOOP
+ RETURN true;
+ END LOOP;
+
+ RETURN false;
+END;
+$$ LANGUAGE plpgsql;
+
+/* function adding tsvector column to table if it does not exists */
+CREATE OR REPLACE FUNCTION add_tsvector_column(colname text, tablename text)
+RETURNS boolean AS
+$$
+DECLARE
+ q text;
+BEGIN
+ IF NOT column_exists(colname, tablename) THEN
+ q = 'ALTER TABLE ' || tablename || ' ADD COLUMN ' || colname || ' tsvector';
+ EXECUTE q;
+ RETURN true;
+ ELSE
+ q = 'UPDATE ' || tablename || ' SET ' || colname || '=NULL';
+ EXECUTE q;
+ RETURN false;
+ END IF;
+END;
+$$ LANGUAGE plpgsql;
+
+/* aggregate function that concatenates tsvectors */
+CREATE OR REPLACE FUNCTION tsv_add(tsv1 tsvector, tsv2 tsvector)
+RETURNS tsvector AS
+$$
+BEGIN
+ RETURN tsv1 || tsv2;
+END;
+$$ LANGUAGE plpgsql;
+
+CREATE OR REPLACE FUNCTION setup_aggregates() RETURNS boolean AS
+$$
+DECLARE
+ onerow record;
+BEGIN
+ FOR onerow IN SELECT * FROM pg_proc WHERE proname = 'concat_tsvectors' AND proisagg LOOP
+ DROP AGGREGATE concat_tsvectors(tsvector);
+ END LOOP;
+ CREATE AGGREGATE concat_tsvectors (
+ BASETYPE = tsvector,
+ SFUNC = tsv_add,
+ STYPE = tsvector,
+ INITCOND = ''
+ );
+ RETURN true;
+END;
+$$ LANGUAGE plpgsql;
+
+SELECT setup_aggregates();
+
+/* calculates text search vector for the individual thread row
+DOES not include question body post, answers or comments */
+CREATE OR REPLACE FUNCTION get_thread_tsv(title text, tagnames text)
+RETURNS tsvector AS
+$$
+BEGIN
+ /* todo add weight depending on votes */
+ RETURN setweight(to_tsvector('english', coalesce(title, '')), 'A') ||
+ setweight(to_tsvector('english', coalesce(tagnames, '')), 'A');
+END;
+$$ LANGUAGE plpgsql;
+
+/* calculates text seanch vector for the individual question row */
+CREATE OR REPLACE FUNCTION get_post_tsv(text text, post_type text)
+RETURNS tsvector AS
+$$
+BEGIN
+ /* todo adjust weights to reflect votes */
+ IF post_type='question' THEN
+ RETURN setweight(to_tsvector('english', coalesce(text, '')), 'B');
+ ELSIF post_type='answer' THEN
+ /* todo reflect whether the answer acepted or not */
+ RETURN setweight(to_tsvector('english', coalesce(text, '')), 'B');
+ ELSIF post_type='comment' THEN
+ RETURN setweight(to_tsvector('english', coalesce(text, '')), 'C');
+ ELSE
+ RETURN to_tsvector('');
+ END IF;
+END;
+$$ LANGUAGE plpgsql;
+
+/* calculates text search vector for the question body part by thread id
+here we extract question title and the text by thread_id and then
+calculate the text search vector. In the future question
+title will be moved to the askbot_thread table and this function
+will be simpler.
+*/
+CREATE OR REPLACE FUNCTION get_thread_question_tsv(thread_id integer)
+RETURNS tsvector AS
+$$
+DECLARE
+ query text;
+ onerow record;
+BEGIN
+ query = 'SELECT text FROM askbot_post WHERE thread_id=' || thread_id ||
+ ' AND post_type=''question'' AND deleted=false';
+ FOR onerow in EXECUTE query LOOP
+ RETURN get_post_tsv(onerow.text, 'question');
+ END LOOP;
+ RETURN to_tsvector('');
+END;
+$$ LANGUAGE plpgsql;
+
+CREATE OR REPLACE FUNCTION get_dependent_comments_tsv(parent_id integer)
+RETURNS tsvector AS
+$$
+DECLARE
+ query text;
+ onerow record;
+BEGIN
+ query = 'SELECT concat_tsvectors(text_search_vector) FROM askbot_post' ||
+ ' WHERE parent_id=' || parent_id ||
+ ' AND post_type=''comment'' AND deleted=false';
+ FOR onerow IN EXECUTE query LOOP
+ RETURN onerow.concat_tsvectors;
+ END LOOP;
+ RETURN to_tsvector('');
+END;
+$$ LANGUAGE plpgsql;
+
+CREATE OR REPLACE FUNCTION get_dependent_answers_tsv(thread_id integer)
+RETURNS tsvector AS
+$$
+DECLARE
+ query text;
+ onerow record;
+BEGIN
+ query = 'SELECT concat_tsvectors(text_search_vector) ' ||
+ 'FROM askbot_post WHERE thread_id = ' || thread_id ||
+ ' AND deleted=false';
+ FOR onerow IN EXECUTE query LOOP
+ RETURN onerow.concat_tsvectors;
+ END LOOP;
+ RETURN to_tsvector('');
+END;
+$$ LANGUAGE plpgsql;
+
+/* create tsvector columns in the content tables */
+SELECT add_tsvector_column('text_search_vector', 'askbot_thread');
+SELECT add_tsvector_column('text_search_vector', 'askbot_post');
+
+/* populate tsvectors with data */
+-- post tsvectors
+UPDATE askbot_post set text_search_vector = get_post_tsv(text, 'comment') WHERE post_type='comment';
+UPDATE askbot_post SET text_search_vector = get_post_tsv(text, 'answer') WHERE post_type='answer';
+UPDATE askbot_post SET text_search_vector = get_post_tsv(text, 'question') WHERE post_type='question';
+UPDATE askbot_post as q SET text_search_vector = text_search_vector ||
+ get_dependent_comments_tsv(q.id) WHERE post_type IN ('question', 'answer');
+
+--thread tsvector
+UPDATE askbot_thread SET text_search_vector = get_thread_tsv(title, tagnames);
+UPDATE askbot_thread as t SET text_search_vector = text_search_vector ||
+ get_dependent_answers_tsv(t.id) ||
+ get_thread_question_tsv(t.id);
+
+/* one trigger per table for tsv updates */
+
+/* set up update triggers */
+CREATE OR REPLACE FUNCTION thread_update_trigger() RETURNS trigger AS
+$$
+BEGIN
+ new.text_search_vector = get_thread_tsv(new.title, new.tagnames) ||
+ get_thread_question_tsv(new.id) ||
+ get_dependent_answers_tsv(new.id);
+ RETURN new;
+END;
+$$ LANGUAGE plpgsql;
+DROP TRIGGER IF EXISTS thread_search_vector_update_trigger on askbot_thread;
+CREATE TRIGGER thread_search_vector_update_trigger
+BEFORE UPDATE ON askbot_thread FOR EACH ROW EXECUTE PROCEDURE thread_update_trigger();
+
+CREATE OR REPLACE FUNCTION thread_insert_trigger() RETURNS trigger AS
+$$
+BEGIN
+ new.text_search_vector = get_thread_tsv(new.title, new.tagnames);
+ RETURN new;
+END;
+$$ LANGUAGE plpgsql;
+DROP TRIGGER IF EXISTS thread_search_vector_insert_trigger on askbot_thread;
+CREATE TRIGGER thread_search_vector_insert_trigger
+BEFORE INSERT ON askbot_thread FOR EACH ROW EXECUTE PROCEDURE thread_insert_trigger();
+
+/* post trigger */
+CREATE OR REPLACE FUNCTION post_trigger() RETURNS trigger AS
+$$
+BEGIN
+ IF new.post_type = 'question' THEN
+ new.text_search_vector = get_post_tsv(new.text, 'question') ||
+ get_dependent_comments_tsv(new.id);
+ ELSIF new.post_type = 'answer' THEN
+ new.text_search_vector = get_post_tsv(new.text, 'answer') ||
+ get_dependent_comments_tsv(new.id);
+ ELSIF new.post_type = 'comment' THEN
+ new.text_search_vector = get_post_tsv(new.text, 'comment');
+ END IF;
+ UPDATE askbot_thread SET id=new.thread_id WHERE id=new.thread_id;
+ return new;
+END;
+$$ LANGUAGE plpgsql;
+DROP TRIGGER IF EXISTS post_search_vector_update_trigger on askbot_post;
+CREATE TRIGGER post_search_vector_update_trigger
+BEFORE INSERT OR UPDATE ON askbot_post FOR EACH ROW EXECUTE PROCEDURE post_trigger();
+
+CREATE INDEX askbot_search_idx ON askbot_thread USING gin(text_search_vector);