diff options
author | Evgeny Fadeev <evgeny.fadeev@gmail.com> | 2012-01-16 19:09:08 -0300 |
---|---|---|
committer | Evgeny Fadeev <evgeny.fadeev@gmail.com> | 2012-01-16 19:09:08 -0300 |
commit | 76b140c4513282e3d326e813b3b9881240a67f73 (patch) | |
tree | 98574298f6ad2f1381c4e6cb4c5120a9ceb7723b /askbot/search | |
parent | ca08155700b547e4afa9c4b8bd2da8dfb7eec705 (diff) | |
download | askbot-76b140c4513282e3d326e813b3b9881240a67f73.tar.gz askbot-76b140c4513282e3d326e813b3b9881240a67f73.tar.bz2 askbot-76b140c4513282e3d326e813b3b9881240a67f73.zip |
made dedicated scripts for full text search setup in postgres - per version of the database schema, fts is still broken
Diffstat (limited to 'askbot/search')
-rw-r--r-- | askbot/search/postgresql/__init__.py | 21 | ||||
-rw-r--r-- | askbot/search/postgresql/question_answer_comment_models.plsql | 197 | ||||
-rw-r--r-- | askbot/search/postgresql/thread_and_post_models_01162012.plsql | 222 |
3 files changed, 440 insertions, 0 deletions
diff --git a/askbot/search/postgresql/__init__.py b/askbot/search/postgresql/__init__.py new file mode 100644 index 00000000..a802a5eb --- /dev/null +++ b/askbot/search/postgresql/__init__.py @@ -0,0 +1,21 @@ +"""Procedures to initialize the full text search in PostgresQL""" +from django.db import connection + +def setup_full_text_search(script_path): + """using postgresql database connection, + installs the plsql language, if necessary + and runs the stript, whose path is given as an argument + """ + fts_init_query = open(script_path).read() + + cursor = connection.cursor() + try: + #test if language exists + cursor.execute("SELECT * FROM pg_language WHERE lanname='plpgsql'") + lang_exists = cursor.fetchone() + if not lang_exists: + cursor.execute("CREATE LANGUAGE plpgsql") + #run the main query + cursor.execute(fts_init_query) + finally: + cursor.close() diff --git a/askbot/search/postgresql/question_answer_comment_models.plsql b/askbot/search/postgresql/question_answer_comment_models.plsql new file mode 100644 index 00000000..35180003 --- /dev/null +++ b/askbot/search/postgresql/question_answer_comment_models.plsql @@ -0,0 +1,197 @@ +/* function testing for existence of a column in a table + if table does not exists, function will return "false" */ +CREATE OR REPLACE FUNCTION column_exists(colname text, tablename text) +RETURNS boolean AS +$$ +DECLARE + q text; + onerow record; +BEGIN + + q = 'SELECT attname FROM pg_attribute WHERE attrelid = ( SELECT oid FROM pg_class WHERE relname = '''||tablename||''') AND attname = '''||colname||''''; + + FOR onerow IN EXECUTE q LOOP + RETURN true; + END LOOP; + + RETURN false; +END; +$$ LANGUAGE plpgsql; + +/* function adding tsvector column to table if it does not exists */ +CREATE OR REPLACE FUNCTION add_tsvector_column(colname text, tablename text) +RETURNS boolean AS +$$ +DECLARE + q text; +BEGIN + IF NOT column_exists(colname, tablename) THEN + q = 'ALTER TABLE ' || tablename || ' ADD COLUMN ' || colname || ' tsvector'; + EXECUTE q; + RETURN true; + ELSE + q = 'UPDATE ' || tablename || ' SET ' || colname || '=NULL'; + EXECUTE q; + RETURN false; + END IF; +END; +$$ LANGUAGE plpgsql; + +/* aggregate function that concatenates tsvectors */ +CREATE OR REPLACE FUNCTION tsv_add(tsv1 tsvector, tsv2 tsvector) +RETURNS tsvector AS +$$ +BEGIN + RETURN tsv1 || tsv2; +END; +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION setup_aggregates() RETURNS boolean AS +$$ +DECLARE + onerow record; +BEGIN + FOR onerow IN SELECT * FROM pg_proc WHERE proname = 'concat_tsvectors' AND proisagg LOOP + DROP AGGREGATE concat_tsvectors(tsvector); + END LOOP; + CREATE AGGREGATE concat_tsvectors ( + BASETYPE = tsvector, + SFUNC = tsv_add, + STYPE = tsvector, + INITCOND = '' + ); + RETURN true; +END; +$$ LANGUAGE plpgsql; + +SELECT setup_aggregates(); + +/* calculates text search vector for question +DOES not include answers or comments */ +CREATE OR REPLACE FUNCTION get_question_tsv(title text, text text, tagnames text) +RETURNS tsvector AS +$$ +BEGIN + RETURN setweight(to_tsvector('english', coalesce(title, '')), 'A') || + setweight(to_tsvector('english', coalesce(text, '')), 'B') || + setweight(to_tsvector('english', coalesce(tagnames, '')), 'A'); +END; +$$ LANGUAGE plpgsql; + +/* calculates text search vector for answer text */ +CREATE OR REPLACE FUNCTION get_answer_tsv(text text) RETURNS tsvector AS +$$ +BEGIN + RETURN setweight(to_tsvector('english', coalesce(text, '')), 'B'); +END; +$$ LANGUAGE plpgsql; + +/* calculate text search vector for comment text */ +CREATE OR REPLACE FUNCTION get_comment_tsv(comment text) RETURNS tsvector AS +$$ +BEGIN + RETURN setweight(to_tsvector('english', coalesce(comment, '')), 'C'); +END; +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION get_dependent_comments_tsv(object_id integer, tablename text) +RETURNS tsvector AS +$$ +DECLARE + query text; + onerow record; +BEGIN + query = 'SELECT concat_tsvectors(text_search_vector) FROM comment' || + ' WHERE object_id=' ||object_id|| ' AND content_type_id=(' || + ' SELECT id FROM django_content_type' || + ' WHERE app_label=''askbot'' AND name=''' || tablename || ''')'; + FOR onerow IN EXECUTE query LOOP + RETURN onerow.concat_tsvectors; + END LOOP; + RETURN to_tsvector(''); +END; +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION get_dependent_answers_tsv(question_id integer) +RETURNS tsvector AS +$$ +DECLARE + query text; + onerow record; +BEGIN + query = 'SELECT concat_tsvectors(text_search_vector) ' || + 'FROM answer WHERE question_id = ' || question_id || + ' AND deleted=false'; + FOR onerow IN EXECUTE query LOOP + RETURN onerow.concat_tsvectors; + END LOOP; + RETURN to_tsvector(''); +END; +$$ LANGUAGE plpgsql; + +/* create tsvector columns in the content tables */ +SELECT add_tsvector_column('text_search_vector', 'question'); +SELECT add_tsvector_column('text_search_vector', 'answer'); +SELECT add_tsvector_column('text_search_vector', 'comment'); + +/* populate tsvectors with data */ +-- comment tsvectors +UPDATE comment SET text_search_vector = get_comment_tsv(comment); + +-- answer tsvectors +UPDATE answer SET text_search_vector = get_answer_tsv(text); +UPDATE answer as a SET text_search_vector = text_search_vector || + get_dependent_comments_tsv(a.id, 'answer'); + +--question tsvectors +UPDATE question SET text_search_vector = get_question_tsv(title, text, tagnames); + +UPDATE question as q SET text_search_vector = text_search_vector || + get_dependent_comments_tsv(q.id, 'question'); + +UPDATE question as q SET text_search_vector = text_search_vector || + get_dependent_answers_tsv(q.id); + +/* set up update triggers */ +CREATE OR REPLACE FUNCTION question_trigger() RETURNS trigger AS +$$ +BEGIN + new.text_search_vector = get_question_tsv(new.title, new.text, new.tagnames); + new.text_search_vector = new.text_search_vector || + get_dependent_comments_tsv(new.id, 'question'); + new.text_search_vector = new.text_search_vector || + get_dependent_answers_tsv(new.id); + RETURN new; +END; +$$ LANGUAGE plpgsql; +DROP TRIGGER IF EXISTS question_search_vector_update_trigger on question; +CREATE TRIGGER question_search_vector_update_trigger +BEFORE INSERT OR UPDATE ON question FOR EACH ROW EXECUTE PROCEDURE question_trigger(); + +/* comment trigger */ +CREATE OR REPLACE FUNCTION comment_trigger() RETURNS trigger AS +$$ +BEGIN + new.text_search_vector = get_comment_tsv(new.comment); + RETURN new; +END; +$$ LANGUAGE plpgsql; +DROP TRIGGER IF EXISTS comment_search_vector_update_trigger on comment; +CREATE TRIGGER comment_search_vector_update_trigger +BEFORE INSERT OR UPDATE ON comment FOR EACH ROW EXECUTE PROCEDURE comment_trigger(); + +/* answer trigger */ +CREATE OR REPLACE FUNCTION answer_trigger() RETURNS trigger AS +$$ +BEGIN + new.text_search_vector = get_answer_tsv(new.text); + new.text_search_vector = new.text_search_vector || + get_dependent_comments_tsv(new.id, 'answer'); + RETURN new; +END; +$$ LANGUAGE plpgsql; +DROP TRIGGER IF EXISTS answer_search_vector_update_trigger on answer; +CREATE TRIGGER answer_search_vector_update_trigger +BEFORE INSERT OR UPDATE ON answer FOR EACH ROW EXECUTE PROCEDURE answer_trigger(); + +CREATE INDEX askbot_search_idx ON question USING gin(text_search_vector); diff --git a/askbot/search/postgresql/thread_and_post_models_01162012.plsql b/askbot/search/postgresql/thread_and_post_models_01162012.plsql new file mode 100644 index 00000000..7156833b --- /dev/null +++ b/askbot/search/postgresql/thread_and_post_models_01162012.plsql @@ -0,0 +1,222 @@ +/* function testing for existence of a column in a table + if table does not exists, function will return "false" */ +CREATE OR REPLACE FUNCTION column_exists(colname text, tablename text) +RETURNS boolean AS +$$ +DECLARE + q text; + onerow record; +BEGIN + + q = 'SELECT attname FROM pg_attribute WHERE attrelid = ( SELECT oid FROM pg_class WHERE relname = '''||tablename||''') AND attname = '''||colname||''''; + + FOR onerow IN EXECUTE q LOOP + RETURN true; + END LOOP; + + RETURN false; +END; +$$ LANGUAGE plpgsql; + +/* function adding tsvector column to table if it does not exists */ +CREATE OR REPLACE FUNCTION add_tsvector_column(colname text, tablename text) +RETURNS boolean AS +$$ +DECLARE + q text; +BEGIN + IF NOT column_exists(colname, tablename) THEN + q = 'ALTER TABLE ' || tablename || ' ADD COLUMN ' || colname || ' tsvector'; + EXECUTE q; + RETURN true; + ELSE + q = 'UPDATE ' || tablename || ' SET ' || colname || '=NULL'; + EXECUTE q; + RETURN false; + END IF; +END; +$$ LANGUAGE plpgsql; + +/* aggregate function that concatenates tsvectors */ +CREATE OR REPLACE FUNCTION tsv_add(tsv1 tsvector, tsv2 tsvector) +RETURNS tsvector AS +$$ +BEGIN + RETURN tsv1 || tsv2; +END; +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION setup_aggregates() RETURNS boolean AS +$$ +DECLARE + onerow record; +BEGIN + FOR onerow IN SELECT * FROM pg_proc WHERE proname = 'concat_tsvectors' AND proisagg LOOP + DROP AGGREGATE concat_tsvectors(tsvector); + END LOOP; + CREATE AGGREGATE concat_tsvectors ( + BASETYPE = tsvector, + SFUNC = tsv_add, + STYPE = tsvector, + INITCOND = '' + ); + RETURN true; +END; +$$ LANGUAGE plpgsql; + +SELECT setup_aggregates(); + +/* calculates text search vector for the individual thread row +DOES not include question body post, answers or comments */ +CREATE OR REPLACE FUNCTION get_thread_tsv(title text, tagnames text) +RETURNS tsvector AS +$$ +BEGIN + /* todo add weight depending on votes */ + RETURN setweight(to_tsvector('english', coalesce(title, '')), 'A') || + setweight(to_tsvector('english', coalesce(tagnames, '')), 'A'); +END; +$$ LANGUAGE plpgsql; + +/* calculates text seanch vector for the individual question row */ +CREATE OR REPLACE FUNCTION get_post_tsv(text text, post_type text) +RETURNS tsvector AS +$$ +BEGIN + /* todo adjust weights to reflect votes */ + IF post_type='question' THEN + RETURN setweight(to_tsvector('english', coalesce(text, '')), 'B'); + ELSIF post_type='answer' THEN + /* todo reflect whether the answer acepted or not */ + RETURN setweight(to_tsvector('english', coalesce(text, '')), 'B'); + ELSIF post_type='comment' THEN + RETURN setweight(to_tsvector('english', coalesce(text, '')), 'C'); + ELSE + RETURN to_tsvector(''); + END IF; +END; +$$ LANGUAGE plpgsql; + +/* calculates text search vector for the question body part by thread id +here we extract question title and the text by thread_id and then +calculate the text search vector. In the future question +title will be moved to the askbot_thread table and this function +will be simpler. +*/ +CREATE OR REPLACE FUNCTION get_thread_question_tsv(thread_id integer) +RETURNS tsvector AS +$$ +DECLARE + query text; + onerow record; +BEGIN + query = 'SELECT text FROM askbot_post WHERE thread_id=' || thread_id || + ' AND post_type=''question'' AND deleted=false'; + FOR onerow in EXECUTE query LOOP + RETURN get_post_tsv(onerow.text, 'question'); + END LOOP; + RETURN to_tsvector(''); +END; +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION get_dependent_comments_tsv(parent_id integer) +RETURNS tsvector AS +$$ +DECLARE + query text; + onerow record; +BEGIN + query = 'SELECT concat_tsvectors(text_search_vector) FROM askbot_post' || + ' WHERE parent_id=' || parent_id || + ' AND post_type=''comment'' AND deleted=false'; + FOR onerow IN EXECUTE query LOOP + RETURN onerow.concat_tsvectors; + END LOOP; + RETURN to_tsvector(''); +END; +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION get_dependent_answers_tsv(thread_id integer) +RETURNS tsvector AS +$$ +DECLARE + query text; + onerow record; +BEGIN + query = 'SELECT concat_tsvectors(text_search_vector) ' || + 'FROM askbot_post WHERE thread_id = ' || thread_id || + ' AND deleted=false'; + FOR onerow IN EXECUTE query LOOP + RETURN onerow.concat_tsvectors; + END LOOP; + RETURN to_tsvector(''); +END; +$$ LANGUAGE plpgsql; + +/* create tsvector columns in the content tables */ +SELECT add_tsvector_column('text_search_vector', 'askbot_thread'); +SELECT add_tsvector_column('text_search_vector', 'askbot_post'); + +/* populate tsvectors with data */ +-- post tsvectors +UPDATE askbot_post set text_search_vector = get_post_tsv(text, 'comment') WHERE post_type='comment'; +UPDATE askbot_post SET text_search_vector = get_post_tsv(text, 'answer') WHERE post_type='answer'; +UPDATE askbot_post SET text_search_vector = get_post_tsv(text, 'question') WHERE post_type='question'; +UPDATE askbot_post as q SET text_search_vector = text_search_vector || + get_dependent_comments_tsv(q.id) WHERE post_type IN ('question', 'answer'); + +--thread tsvector +UPDATE askbot_thread SET text_search_vector = get_thread_tsv(title, tagnames); +UPDATE askbot_thread as t SET text_search_vector = text_search_vector || + get_dependent_answers_tsv(t.id) || + get_thread_question_tsv(t.id); + +/* one trigger per table for tsv updates */ + +/* set up update triggers */ +CREATE OR REPLACE FUNCTION thread_update_trigger() RETURNS trigger AS +$$ +BEGIN + new.text_search_vector = get_thread_tsv(new.title, new.tagnames) || + get_thread_question_tsv(new.id) || + get_dependent_answers_tsv(new.id); + RETURN new; +END; +$$ LANGUAGE plpgsql; +DROP TRIGGER IF EXISTS thread_search_vector_update_trigger on askbot_thread; +CREATE TRIGGER thread_search_vector_update_trigger +BEFORE UPDATE ON askbot_thread FOR EACH ROW EXECUTE PROCEDURE thread_update_trigger(); + +CREATE OR REPLACE FUNCTION thread_insert_trigger() RETURNS trigger AS +$$ +BEGIN + new.text_search_vector = get_thread_tsv(new.title, new.tagnames); + RETURN new; +END; +$$ LANGUAGE plpgsql; +DROP TRIGGER IF EXISTS thread_search_vector_insert_trigger on askbot_thread; +CREATE TRIGGER thread_search_vector_insert_trigger +BEFORE INSERT ON askbot_thread FOR EACH ROW EXECUTE PROCEDURE thread_insert_trigger(); + +/* post trigger */ +CREATE OR REPLACE FUNCTION post_trigger() RETURNS trigger AS +$$ +BEGIN + IF new.post_type = 'question' THEN + new.text_search_vector = get_post_tsv(new.text, 'question') || + get_dependent_comments_tsv(new.id); + ELSIF new.post_type = 'answer' THEN + new.text_search_vector = get_post_tsv(new.text, 'answer') || + get_dependent_comments_tsv(new.id); + ELSIF new.post_type = 'comment' THEN + new.text_search_vector = get_post_tsv(new.text, 'comment'); + END IF; + UPDATE askbot_thread SET id=new.thread_id WHERE id=new.thread_id; + return new; +END; +$$ LANGUAGE plpgsql; +DROP TRIGGER IF EXISTS post_search_vector_update_trigger on askbot_post; +CREATE TRIGGER post_search_vector_update_trigger +BEFORE INSERT OR UPDATE ON askbot_post FOR EACH ROW EXECUTE PROCEDURE post_trigger(); + +CREATE INDEX askbot_search_idx ON askbot_thread USING gin(text_search_vector); |