提交 64def095 编写于 作者: T Teodor Sigaev

Add regression tests for ispell, synonym and thesaurus dictionaries.

Rename synonym.syn.sample and thesaurs.ths.sample to
synonym_sample.syn and thesaurs_sample.ths accordingly to be able to use they
in regression test.

Ispell dictionary uses synthetic simple dictionary files.
上级 c4b2b296
......@@ -4,7 +4,7 @@
#
# Copyright (c) 2006-2007, PostgreSQL Global Development Group
#
# $PostgreSQL: pgsql/src/backend/tsearch/Makefile,v 1.2 2007/08/22 06:11:56 tgl Exp $
# $PostgreSQL: pgsql/src/backend/tsearch/Makefile,v 1.3 2007/09/11 11:54:42 teodor Exp $
#
#-------------------------------------------------------------------------
subdir = src/backend/tsearch
......@@ -13,7 +13,8 @@ include $(top_builddir)/src/Makefile.global
DICTDIR=tsearch_data
DICTFILES=synonym.syn.sample thesaurus.ths.sample
DICTFILES=synonym_sample.syn thesaurus_sample.ths hunspell_sample.affix \
ispell_sample.affix ispell_sample.dict
OBJS = ts_locale.o ts_parse.o wparser.o wparser_def.o dict.o \
dict_simple.o dict_synonym.o dict_thesaurus.o \
......
COMPOUNDFLAG Z
ONLYINCOMPOUND L
PFX B Y 1
PFX B 0 re .
PFX U N 1
PFX U 0 un .
SFX J Y 1
SFX J 0 INGS [^E]
SFX G Y 1
SFX G 0 ING [^E]
SFX S Y 1
SFX S 0 S [^SXZHY]
SFX A Y 1
SFX A Y IES [^AEIOU]Y
SFX \ N 1
SFX \ 0 Y/L [^Y]
compoundwords controlled Z
prefixes
flag *B:
. > RE # As in enter > reenter
flag U:
. > UN # As in natural > unnatural
suffixes
flag *J:
[^E] > INGS # As in cross > crossings
flag *G:
[^E] > ING # As in cross > crossing
flag *S:
[^SXZHY] > S # As in bat > bats
flag *A:
[^AEIOU]Y > -Y,IES # As in imply > implies
flag ~\\:
[^Y] > Y #~ advarsel > advarsely-
book/GJUS
booking/SB
footballklubber
foot/ZS
football/Z
ball/SZ\
klubber/Z
sky/A
skies sky
booking book
bookings book
postgres pgsql
postgresql pgsql
postgre pgsql
gogle googl
......@@ -11,10 +11,8 @@ one two : *12
one : *1
two : *2
#foo bar : blah blah
#f bar : fbar
#e bar : ebar
#g bar bar : gbarbar
#asd:sdffff
#qwerty:qwer wert erty
supernovae stars : *sn
supernovae : *sn
booking tickets : order invitation cards
booking the tickets : order invitation Cards
--Test text search dictionaries and configurations
-- Test ISpell dictionary with ispell affix file
CREATE TEXT SEARCH DICTIONARY ispell (
Template=ispell,
DictFile=ispell_sample,
AffFile=ispell_sample
);
SELECT ts_lexize('ispell', 'skies');
ts_lexize
-----------
{sky}
(1 row)
SELECT ts_lexize('ispell', 'bookings');
ts_lexize
----------------
{booking,book}
(1 row)
SELECT ts_lexize('ispell', 'booking');
ts_lexize
----------------
{booking,book}
(1 row)
SELECT ts_lexize('ispell', 'foot');
ts_lexize
-----------
{foot}
(1 row)
SELECT ts_lexize('ispell', 'foots');
ts_lexize
-----------
{foot}
(1 row)
SELECT ts_lexize('ispell', 'rebookings');
ts_lexize
----------------
{booking,book}
(1 row)
SELECT ts_lexize('ispell', 'rebooking');
ts_lexize
----------------
{booking,book}
(1 row)
SELECT ts_lexize('ispell', 'rebook');
ts_lexize
-----------
(1 row)
SELECT ts_lexize('ispell', 'unbookings');
ts_lexize
-----------
{book}
(1 row)
SELECT ts_lexize('ispell', 'unbooking');
ts_lexize
-----------
{book}
(1 row)
SELECT ts_lexize('ispell', 'unbook');
ts_lexize
-----------
{book}
(1 row)
SELECT ts_lexize('ispell', 'footklubber');
ts_lexize
----------------
{foot,klubber}
(1 row)
SELECT ts_lexize('ispell', 'footballklubber');
ts_lexize
------------------------------------------------------
{footballklubber,foot,ball,klubber,football,klubber}
(1 row)
SELECT ts_lexize('ispell', 'ballyklubber');
ts_lexize
----------------
{ball,klubber}
(1 row)
SELECT ts_lexize('ispell', 'footballyklubber');
ts_lexize
---------------------
{foot,ball,klubber}
(1 row)
-- Test ISpell dictionary with hunspell affix file
CREATE TEXT SEARCH DICTIONARY hunspell (
Template=ispell,
DictFile=ispell_sample,
AffFile=hunspell_sample
);
SELECT ts_lexize('hunspell', 'skies');
ts_lexize
-----------
{sky}
(1 row)
SELECT ts_lexize('hunspell', 'bookings');
ts_lexize
----------------
{booking,book}
(1 row)
SELECT ts_lexize('hunspell', 'booking');
ts_lexize
----------------
{booking,book}
(1 row)
SELECT ts_lexize('hunspell', 'foot');
ts_lexize
-----------
{foot}
(1 row)
SELECT ts_lexize('hunspell', 'foots');
ts_lexize
-----------
{foot}
(1 row)
SELECT ts_lexize('hunspell', 'rebookings');
ts_lexize
----------------
{booking,book}
(1 row)
SELECT ts_lexize('hunspell', 'rebooking');
ts_lexize
----------------
{booking,book}
(1 row)
SELECT ts_lexize('hunspell', 'rebook');
ts_lexize
-----------
(1 row)
SELECT ts_lexize('hunspell', 'unbookings');
ts_lexize
-----------
{book}
(1 row)
SELECT ts_lexize('hunspell', 'unbooking');
ts_lexize
-----------
{book}
(1 row)
SELECT ts_lexize('hunspell', 'unbook');
ts_lexize
-----------
{book}
(1 row)
SELECT ts_lexize('hunspell', 'footklubber');
ts_lexize
----------------
{foot,klubber}
(1 row)
SELECT ts_lexize('hunspell', 'footballklubber');
ts_lexize
------------------------------------------------------
{footballklubber,foot,ball,klubber,football,klubber}
(1 row)
SELECT ts_lexize('hunspell', 'ballyklubber');
ts_lexize
----------------
{ball,klubber}
(1 row)
SELECT ts_lexize('hunspell', 'footballyklubber');
ts_lexize
---------------------
{foot,ball,klubber}
(1 row)
-- Synonim dictionary
CREATE TEXT SEARCH DICTIONARY synonym (
Template=synonym,
Synonyms=synonym_sample
);
SELECT ts_lexize('synonym', 'PoStGrEs');
ts_lexize
-----------
{pgsql}
(1 row)
SELECT ts_lexize('synonym', 'Gogle');
ts_lexize
-----------
{googl}
(1 row)
-- Create and simple test thesaurus dictionary
-- More test in configuration checks because of ts_lexize
-- can not give more tat one word as it may wish thesaurus.
CREATE TEXT SEARCH DICTIONARY thesaurus (
Template=thesaurus,
DictFile=thesaurus_sample,
Dictionary=english_stem
);
NOTICE: thesaurus word-sample "the" is recognized as stop-word, assign any stop-word (rule 8)
SELECT ts_lexize('thesaurus', 'one');
NOTICE: thesaurus word-sample "the" is recognized as stop-word, assign any stop-word (rule 8)
ts_lexize
-----------
{1}
(1 row)
-- Test ispell dictionary in configuration
CREATE TEXT SEARCH CONFIGURATION ispell_tst (
COPY=english
);
ALTER TEXT SEARCH CONFIGURATION ispell_tst ALTER MAPPING FOR
hword, lhword, lpart_hword, lword, nlhword, nlpart_hword, nlword, part_hword, word
WITH ispell, english_stem;
SELECT to_tsvector('ispell_tst', 'Booking the skies after rebookings for footballklubber from a foot');
to_tsvector
----------------------------------------------------------------------------------------------------
'sky':3 'ball':7 'book':1,5 'foot':7,10 'booking':1,5 'klubber':7 'football':7 'footballklubber':7
(1 row)
SELECT to_tsquery('ispell_tst', 'footballklubber');
to_tsquery
------------------------------------------------------------------------------
( 'footballklubber' | 'foot' & 'ball' & 'klubber' ) | 'football' & 'klubber'
(1 row)
SELECT to_tsquery('ispell_tst', 'footballyklubber:b & rebookings:A & sky');
to_tsquery
------------------------------------------------------------------------
'foot':B & 'ball':B & 'klubber':B & ( 'booking':A | 'book':A ) & 'sky'
(1 row)
-- Test ispell dictionary with hunspell affix in configuration
CREATE TEXT SEARCH CONFIGURATION hunspell_tst (
COPY=ispell_tst
);
ALTER TEXT SEARCH CONFIGURATION hunspell_tst ALTER MAPPING
REPLACE ispell WITH hunspell;
SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footballklubber from a foot');
to_tsvector
----------------------------------------------------------------------------------------------------
'sky':3 'ball':7 'book':1,5 'foot':7,10 'booking':1,5 'klubber':7 'football':7 'footballklubber':7
(1 row)
SELECT to_tsquery('hunspell_tst', 'footballklubber');
to_tsquery
------------------------------------------------------------------------------
( 'footballklubber' | 'foot' & 'ball' & 'klubber' ) | 'football' & 'klubber'
(1 row)
SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky');
to_tsquery
------------------------------------------------------------------------
'foot':B & 'ball':B & 'klubber':B & ( 'booking':A | 'book':A ) & 'sky'
(1 row)
-- Test synonym dictionary in configuration
CREATE TEXT SEARCH CONFIGURATION synonym_tst (
COPY=english
);
ALTER TEXT SEARCH CONFIGURATION synonym_tst ALTER MAPPING FOR
lword, lpart_hword, lhword
WITH synonym, english_stem;
SELECT to_tsvector('synonym_tst', 'Postgresql is often called as postgres or pgsql and pronounced as postgre');
to_tsvector
---------------------------------------------------
'call':4 'often':3 'pgsql':1,6,8,12 'pronounc':10
(1 row)
SELECT to_tsvector('synonym_tst', 'Most common mistake is to write Gogle instead of Google');
to_tsvector
----------------------------------------------------------
'googl':7,10 'write':6 'common':2 'mistak':3 'instead':8
(1 row)
-- test thesaurus in configuration
-- see thesaurus_sample.ths to understand 'odd' resulting tsvector
CREATE TEXT SEARCH CONFIGURATION thesaurus_tst (
COPY=synonym_tst
);
ALTER TEXT SEARCH CONFIGURATION thesaurus_tst ALTER MAPPING FOR
lword, lpart_hword, lhword
WITH synonym, thesaurus, english_stem;
SELECT to_tsvector('thesaurus_tst', 'one postgres one two one two three one');
to_tsvector
----------------------------------
'1':1,5 '12':3 '123':4 'pgsql':2
(1 row)
SELECT to_tsvector('thesaurus_tst', 'Supernovae star is very new star and usually called supernovae (abbrevation SN)');
to_tsvector
-------------------------------------------------------------
'sn':1,9,11 'new':4 'call':8 'star':5 'usual':7 'abbrev':10
(1 row)
SELECT to_tsvector('thesaurus_tst', 'Booking tickets is looking like a booking a tickets');
to_tsvector
-------------------------------------------------------
'card':3,10 'like':6 'look':5 'invit':2,9 'order':1,8
(1 row)
# ----------
# $PostgreSQL: pgsql/src/test/regress/parallel_schedule,v 1.43 2007/08/21 01:11:30 tgl Exp $
# $PostgreSQL: pgsql/src/test/regress/parallel_schedule,v 1.44 2007/09/11 11:54:42 teodor Exp $
#
# By convention, we put no more than twenty tests in any one parallel group;
# this limits the number of connections needed to run the tests.
......@@ -77,7 +77,7 @@ test: misc
# ----------
# Another group of parallel tests
# ----------
test: select_views portals_p2 rules foreign_key cluster dependency guc combocid tsearch
test: select_views portals_p2 rules foreign_key cluster dependency guc combocid tsearch tsdicts
# ----------
# Another group of parallel tests
......
# $PostgreSQL: pgsql/src/test/regress/serial_schedule,v 1.40 2007/08/21 01:11:30 tgl Exp $
# $PostgreSQL: pgsql/src/test/regress/serial_schedule,v 1.41 2007/09/11 11:54:42 teodor Exp $
# This should probably be in an order similar to parallel_schedule.
test: boolean
test: char
......@@ -102,6 +102,7 @@ test: rangefuncs
test: prepare
test: without_oid
test: conversion
test: tsdicts
test: truncate
test: alter_table
test: sequence
......
--Test text search dictionaries and configurations
-- Test ISpell dictionary with ispell affix file
CREATE TEXT SEARCH DICTIONARY ispell (
Template=ispell,
DictFile=ispell_sample,
AffFile=ispell_sample
);
SELECT ts_lexize('ispell', 'skies');
SELECT ts_lexize('ispell', 'bookings');
SELECT ts_lexize('ispell', 'booking');
SELECT ts_lexize('ispell', 'foot');
SELECT ts_lexize('ispell', 'foots');
SELECT ts_lexize('ispell', 'rebookings');
SELECT ts_lexize('ispell', 'rebooking');
SELECT ts_lexize('ispell', 'rebook');
SELECT ts_lexize('ispell', 'unbookings');
SELECT ts_lexize('ispell', 'unbooking');
SELECT ts_lexize('ispell', 'unbook');
SELECT ts_lexize('ispell', 'footklubber');
SELECT ts_lexize('ispell', 'footballklubber');
SELECT ts_lexize('ispell', 'ballyklubber');
SELECT ts_lexize('ispell', 'footballyklubber');
-- Test ISpell dictionary with hunspell affix file
CREATE TEXT SEARCH DICTIONARY hunspell (
Template=ispell,
DictFile=ispell_sample,
AffFile=hunspell_sample
);
SELECT ts_lexize('hunspell', 'skies');
SELECT ts_lexize('hunspell', 'bookings');
SELECT ts_lexize('hunspell', 'booking');
SELECT ts_lexize('hunspell', 'foot');
SELECT ts_lexize('hunspell', 'foots');
SELECT ts_lexize('hunspell', 'rebookings');
SELECT ts_lexize('hunspell', 'rebooking');
SELECT ts_lexize('hunspell', 'rebook');
SELECT ts_lexize('hunspell', 'unbookings');
SELECT ts_lexize('hunspell', 'unbooking');
SELECT ts_lexize('hunspell', 'unbook');
SELECT ts_lexize('hunspell', 'footklubber');
SELECT ts_lexize('hunspell', 'footballklubber');
SELECT ts_lexize('hunspell', 'ballyklubber');
SELECT ts_lexize('hunspell', 'footballyklubber');
-- Synonim dictionary
CREATE TEXT SEARCH DICTIONARY synonym (
Template=synonym,
Synonyms=synonym_sample
);
SELECT ts_lexize('synonym', 'PoStGrEs');
SELECT ts_lexize('synonym', 'Gogle');
-- Create and simple test thesaurus dictionary
-- More test in configuration checks because of ts_lexize
-- can not give more tat one word as it may wish thesaurus.
CREATE TEXT SEARCH DICTIONARY thesaurus (
Template=thesaurus,
DictFile=thesaurus_sample,
Dictionary=english_stem
);
SELECT ts_lexize('thesaurus', 'one');
-- Test ispell dictionary in configuration
CREATE TEXT SEARCH CONFIGURATION ispell_tst (
COPY=english
);
ALTER TEXT SEARCH CONFIGURATION ispell_tst ALTER MAPPING FOR
hword, lhword, lpart_hword, lword, nlhword, nlpart_hword, nlword, part_hword, word
WITH ispell, english_stem;
SELECT to_tsvector('ispell_tst', 'Booking the skies after rebookings for footballklubber from a foot');
SELECT to_tsquery('ispell_tst', 'footballklubber');
SELECT to_tsquery('ispell_tst', 'footballyklubber:b & rebookings:A & sky');
-- Test ispell dictionary with hunspell affix in configuration
CREATE TEXT SEARCH CONFIGURATION hunspell_tst (
COPY=ispell_tst
);
ALTER TEXT SEARCH CONFIGURATION hunspell_tst ALTER MAPPING
REPLACE ispell WITH hunspell;
SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footballklubber from a foot');
SELECT to_tsquery('hunspell_tst', 'footballklubber');
SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky');
-- Test synonym dictionary in configuration
CREATE TEXT SEARCH CONFIGURATION synonym_tst (
COPY=english
);
ALTER TEXT SEARCH CONFIGURATION synonym_tst ALTER MAPPING FOR
lword, lpart_hword, lhword
WITH synonym, english_stem;
SELECT to_tsvector('synonym_tst', 'Postgresql is often called as postgres or pgsql and pronounced as postgre');
SELECT to_tsvector('synonym_tst', 'Most common mistake is to write Gogle instead of Google');
-- test thesaurus in configuration
-- see thesaurus_sample.ths to understand 'odd' resulting tsvector
CREATE TEXT SEARCH CONFIGURATION thesaurus_tst (
COPY=synonym_tst
);
ALTER TEXT SEARCH CONFIGURATION thesaurus_tst ALTER MAPPING FOR
lword, lpart_hword, lhword
WITH synonym, thesaurus, english_stem;
SELECT to_tsvector('thesaurus_tst', 'one postgres one two one two three one');
SELECT to_tsvector('thesaurus_tst', 'Supernovae star is very new star and usually called supernovae (abbrevation SN)');
SELECT to_tsvector('thesaurus_tst', 'Booking tickets is looking like a booking a tickets');
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册