From 5cb2d6678b62daf6f39e282cec557e20c08e8822 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 2 Feb 2010 20:25:01 +0000 Subject: added full text search support for postgresql, database setup is done as a response to the post_syncdb signal. search is done against question summary, title and tags with crescent weight respectively. everything works but is still in a early stage of development --- forum/views.py | 14 ++++++++++++-- pgfulltext/__init__.py | 0 pgfulltext/management.py | 23 +++++++++++++++++++++++ settings.py | 9 +++++++++ sql_scripts/pg_fts_install.sql | 41 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 85 insertions(+), 2 deletions(-) create mode 100644 pgfulltext/__init__.py create mode 100644 pgfulltext/management.py create mode 100644 sql_scripts/pg_fts_install.sql diff --git a/forum/views.py b/forum/views.py index 5a5462e2..4d214bad 100644 --- a/forum/views.py +++ b/forum/views.py @@ -2350,8 +2350,18 @@ def search(request): except KeyError: view_id = "latest" orderby = "-added_at" - - if settings.USE_SPHINX_SEARCH == True: + + if settings.USE_PG_FTS: + objects = Question.objects.filter(deleted=False).extra( + select={ + 'ranking': "ts_rank_cd(tsv, plainto_tsquery(%s), 32)", + }, + where=["tsv @@ plainto_tsquery(%s)"], + params=[keywords], + select_params=[keywords] + ).order_by('-ranking') + + elif settings.USE_SPHINX_SEARCH == True: #search index is now free of delete questions and answers #so there is not "antideleted" filtering here objects = Question.search.query(keywords) diff --git a/pgfulltext/__init__.py b/pgfulltext/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pgfulltext/management.py b/pgfulltext/management.py new file mode 100644 index 00000000..04303092 --- /dev/null +++ b/pgfulltext/management.py @@ -0,0 +1,23 @@ +import os + +from django.db import connection, transaction +from django.conf import settings + +import forum.models + +if settings.USE_PG_FTS: + from django.db.models.signals import post_syncdb + + def setup_pgfulltext(sender, **kwargs): + if sender == forum.models: + install_pg_fts() + + post_syncdb.connect(setup_pgfulltext) + +def install_pg_fts(): + f = open(os.path.join(os.path.dirname(__file__), '../sql_scripts/pg_fts_install.sql'), 'r') + cursor = connection.cursor() + cursor.execute(f.read()) + transaction.commit_unless_managed() + f.close() + \ No newline at end of file diff --git a/settings.py b/settings.py index 8609418e..c7319f87 100755 --- a/settings.py +++ b/settings.py @@ -80,6 +80,12 @@ if USE_SPHINX_SEARCH: if USE_FB_CONNECT: INSTALLED_APPS.append('fbconnect') +if DATABASE_ENGINE in ('postgresql_psycopg2', 'postgresql', ): + USE_PG_FTS = True + INSTALLED_APPS.append('pgfulltext') +else: + USE_PG_FTS = False + #load optional plugin module for external password login if 'USE_EXTERNAL_LEGACY_LOGIN' in locals() and USE_EXTERNAL_LEGACY_LOGIN: INSTALLED_APPS.append(EXTERNAL_LEGACY_LOGIN_MODULE) @@ -92,3 +98,6 @@ if 'USE_EXTERNAL_LEGACY_LOGIN' in locals() and USE_EXTERNAL_LEGACY_LOGIN: return __import__(EXTERNAL_LEGACY_LOGIN_MODULE, [], [], ['api','forms','views']) else: LOAD_EXTERNAL_LOGIN_APP = lambda: None + + + diff --git a/sql_scripts/pg_fts_install.sql b/sql_scripts/pg_fts_install.sql new file mode 100644 index 00000000..345c5934 --- /dev/null +++ b/sql_scripts/pg_fts_install.sql @@ -0,0 +1,41 @@ +ALTER TABLE question ADD COLUMN tsv tsvector; + +CREATE OR REPLACE FUNCTION public.create_plpgsql_language () + RETURNS TEXT + AS $$ + CREATE LANGUAGE plpgsql; + SELECT 'language plpgsql created'::TEXT; + $$ +LANGUAGE 'sql'; + +SELECT CASE WHEN + (SELECT true::BOOLEAN + FROM pg_language + WHERE lanname='plpgsql') + THEN + (SELECT 'language already installed'::TEXT) + ELSE + (SELECT public.create_plpgsql_language()) + END; + +DROP FUNCTION public.create_plpgsql_language (); + +CREATE OR REPLACE FUNCTION set_question_tsv() RETURNS TRIGGER AS $$ +begin + new.tsv := + setweight(to_tsvector('english', coalesce(new.tagnames,'')), 'A') || + setweight(to_tsvector('english', coalesce(new.title,'')), 'B') || + setweight(to_tsvector('english', coalesce(new.summary,'')), 'C'); + RETURN new; +end +$$ LANGUAGE plpgsql; + +CREATE TRIGGER tsvectorupdate BEFORE INSERT OR UPDATE +ON question FOR EACH ROW EXECUTE PROCEDURE set_question_tsv(); + +CREATE INDEX blog_entry_tsv ON blog_entry USING gin(body_tsv); + +UPDATE question SET title = title; + +SELECT (ts_rank(p.factors, tsv, p.query)) AS "ranking", "question"."id", "question"."title", +"question"."author_id", "question"."added_at", "question"."wiki", "question"."wikified_at", "question"."answer_accepted", "question"."closed", "question"."closed_by_id", "question"."closed_at", "question"."close_reason", "question"."deleted", "question"."deleted_at", "question"."deleted_by_id", "question"."locked", "question"."locked_by_id", "question"."locked_at", "question"."score", "question"."vote_up_count", "question"."vote_down_count", "question"."answer_count", "question"."comment_count", "question"."view_count", "question"."offensive_flag_count", "question"."favourite_count", "question"."last_edited_at", "question"."last_edited_by_id", "question"."last_activity_at", "question"."last_activity_by_id", "question"."tagnames", "question"."summary", "question"."html" FROM "question" , "(SELECT '{0.2, 0.5, 0.75, 1.0}'::float4[] AS factors, plainto_tsquery(false) AS query) p" WHERE "question"."deleted" = E'robots' AND tsv @@ p.query ORDER BY "ranking" DESC LIMIT 21 \ No newline at end of file -- cgit v1.2.3-1-g7c22