summaryrefslogtreecommitdiffstats
path: root/askbot/search/postgresql/thread_and_post_models_01162012.plsql
blob: 2fca2d6a2811b8ed07701148070e137ce5f53ebd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
/* function testing for existence of a column in a table
   if table does not exists, function will return "false" */
CREATE OR REPLACE FUNCTION column_exists(colname text, tablename text)
RETURNS boolean AS 
$$
DECLARE
    q text;
    onerow record;
BEGIN

    q = 'SELECT attname FROM pg_attribute WHERE attrelid = ( SELECT oid FROM pg_class WHERE relname = '''||tablename||''') AND attname = '''||colname||''''; 

    FOR onerow IN EXECUTE q LOOP
        RETURN true;
    END LOOP;

    RETURN false;
END;
$$ LANGUAGE plpgsql;

/* function adding tsvector column to table if it does not exists */
CREATE OR REPLACE FUNCTION add_tsvector_column(colname text, tablename text)
RETURNS boolean AS
$$
DECLARE
    q text;
BEGIN
    IF NOT column_exists(colname, tablename) THEN
        q = 'ALTER TABLE ' || tablename || ' ADD COLUMN ' || colname || ' tsvector';
        EXECUTE q;
        RETURN true;
    ELSE
        q = 'UPDATE ' || tablename || ' SET ' || colname || '=NULL';
        EXECUTE q;
        RETURN false;
    END IF;
END;
$$ LANGUAGE plpgsql;

/* aggregate function that concatenates tsvectors */
CREATE OR REPLACE FUNCTION tsv_add(tsv1 tsvector, tsv2 tsvector)
RETURNS tsvector AS
$$
BEGIN
    RETURN tsv1 || tsv2;
END;
$$ LANGUAGE plpgsql;

CREATE OR REPLACE FUNCTION setup_aggregates() RETURNS boolean AS
$$
DECLARE
    onerow record;
BEGIN
    FOR onerow IN SELECT * FROM pg_proc WHERE proname = 'concat_tsvectors' AND proisagg LOOP
        DROP AGGREGATE concat_tsvectors(tsvector);
    END LOOP;
    CREATE AGGREGATE concat_tsvectors (
        BASETYPE = tsvector,
        SFUNC = tsv_add,
        STYPE = tsvector,
        INITCOND = ''
    );
    RETURN true;
END;
$$ LANGUAGE plpgsql;

SELECT setup_aggregates();

/* calculates text search vector for the individual thread row
DOES not include question body post, answers or comments */
CREATE OR REPLACE FUNCTION get_thread_tsv(title text, tagnames text)
RETURNS tsvector AS
$$
BEGIN
    /* todo add weight depending on votes */
    RETURN  setweight(to_tsvector('english', coalesce(title, '')), 'A') ||
            setweight(to_tsvector('english', coalesce(tagnames, '')), 'A');
END;
$$ LANGUAGE plpgsql;

/* calculates text seanch vector for the individual question row */
CREATE OR REPLACE FUNCTION get_post_tsv(text text, post_type text)
RETURNS tsvector AS
$$
BEGIN
    /* todo adjust weights to reflect votes */
    IF post_type='question' THEN
        RETURN setweight(to_tsvector('english', coalesce(text, '')), 'B');
    ELSIF post_type='answer' THEN
        /* todo reflect whether the answer acepted or not */
        RETURN setweight(to_tsvector('english', coalesce(text, '')), 'B');
    ELSIF post_type='comment' THEN
        RETURN setweight(to_tsvector('english', coalesce(text, '')), 'C');
    ELSE
        RETURN to_tsvector('');
    END IF;
END;
$$ LANGUAGE plpgsql;

/* calculates text search vector for the question body part by thread id
here we extract question title and the text by thread_id and then
calculate the text search vector. In the future question
title will be moved to the askbot_thread table and this function
will be simpler.
*/
CREATE OR REPLACE FUNCTION get_thread_question_tsv(thread_id integer)
RETURNS tsvector AS
$$
DECLARE
    query text;
    onerow record;
BEGIN
    query = 'SELECT text FROM askbot_post WHERE thread_id=' || thread_id ||
            ' AND post_type=''question'' AND deleted=false';
    FOR onerow in EXECUTE query LOOP
        RETURN get_post_tsv(onerow.text, 'question');
    END LOOP;
    RETURN to_tsvector('');
END;
$$ LANGUAGE plpgsql;

DROP FUNCTION IF EXISTS get_dependent_comments_tsv(object_id integer, tablename text);
CREATE OR REPLACE FUNCTION get_dependent_comments_tsv(parent_id integer)
RETURNS tsvector AS
$$
DECLARE
    query text;
    onerow record;
BEGIN
    query = 'SELECT concat_tsvectors(text_search_vector) FROM askbot_post' ||
        ' WHERE parent_id=' || parent_id || 
        ' AND post_type=''comment'' AND deleted=false';
    FOR onerow IN EXECUTE query LOOP
        RETURN onerow.concat_tsvectors;
    END LOOP;
    RETURN to_tsvector('');
END;
$$ LANGUAGE plpgsql;

DROP FUNCTION IF EXISTS get_dependent_answers_tsv(question_id integer);
CREATE OR REPLACE FUNCTION get_dependent_answers_tsv(thread_id integer)
RETURNS tsvector AS
$$
DECLARE
    query text;
    onerow record;
BEGIN
    query = 'SELECT concat_tsvectors(text_search_vector) ' ||
       'FROM askbot_post WHERE thread_id = ' || thread_id ||
       ' AND deleted=false';
    FOR onerow IN EXECUTE query LOOP
        RETURN onerow.concat_tsvectors;
    END LOOP;
    RETURN to_tsvector('');
END;
$$ LANGUAGE plpgsql;

/* create tsvector columns in the content tables */
SELECT add_tsvector_column('text_search_vector', 'askbot_thread');
SELECT add_tsvector_column('text_search_vector', 'askbot_post');

/* populate tsvectors with data */
-- post tsvectors
UPDATE askbot_post set text_search_vector = get_post_tsv(text, 'comment') WHERE post_type='comment';
UPDATE askbot_post SET text_search_vector = get_post_tsv(text, 'answer') WHERE post_type='answer';
UPDATE askbot_post SET text_search_vector = get_post_tsv(text, 'question') WHERE post_type='question';
UPDATE askbot_post as q SET text_search_vector = text_search_vector ||
    get_dependent_comments_tsv(q.id) WHERE post_type IN ('question', 'answer');

--thread tsvector
UPDATE askbot_thread SET text_search_vector = get_thread_tsv(title, tagnames);
UPDATE askbot_thread as t SET text_search_vector = text_search_vector ||
    get_dependent_answers_tsv(t.id) ||
    get_thread_question_tsv(t.id);

/* one trigger per table for tsv updates */

/* set up update triggers */
CREATE OR REPLACE FUNCTION thread_update_trigger() RETURNS trigger AS
$$
BEGIN
    new.text_search_vector = get_thread_tsv(new.title, new.tagnames) ||
                             get_thread_question_tsv(new.id) ||
                             get_dependent_answers_tsv(new.id);
    RETURN new;
END;
$$ LANGUAGE plpgsql;
DROP TRIGGER IF EXISTS thread_search_vector_update_trigger on askbot_thread;
CREATE TRIGGER thread_search_vector_update_trigger 
BEFORE UPDATE ON askbot_thread FOR EACH ROW EXECUTE PROCEDURE thread_update_trigger();

CREATE OR REPLACE FUNCTION thread_insert_trigger() RETURNS trigger AS
$$
BEGIN
    new.text_search_vector = get_thread_tsv(new.title, new.tagnames);
    RETURN new;
END;
$$ LANGUAGE plpgsql;
DROP TRIGGER IF EXISTS thread_search_vector_insert_trigger on askbot_thread;
CREATE TRIGGER thread_search_vector_insert_trigger
BEFORE INSERT ON askbot_thread FOR EACH ROW EXECUTE PROCEDURE thread_insert_trigger();

/* post trigger */
CREATE OR REPLACE FUNCTION post_trigger() RETURNS trigger AS
$$
BEGIN
    IF new.post_type = 'question' THEN
        new.text_search_vector = get_post_tsv(new.text, 'question') ||
                                 get_dependent_comments_tsv(new.id);
    ELSIF new.post_type = 'answer' THEN
        new.text_search_vector = get_post_tsv(new.text, 'answer') ||
                                 get_dependent_comments_tsv(new.id);
    ELSIF new.post_type = 'comment' THEN
        new.text_search_vector = get_post_tsv(new.text, 'comment');
    END IF;
    UPDATE askbot_thread SET id=new.thread_id WHERE id=new.thread_id;
    return new;
END;
$$ LANGUAGE plpgsql;
DROP TRIGGER IF EXISTS post_search_vector_update_trigger on askbot_post;
CREATE TRIGGER post_search_vector_update_trigger 
BEFORE INSERT OR UPDATE ON askbot_post FOR EACH ROW EXECUTE PROCEDURE post_trigger();

DROP INDEX IF EXISTS askbot_search_idx;
CREATE INDEX askbot_search_idx ON askbot_thread USING gin(text_search_vector);