diff --git a/src/backend/tsearch/Makefile b/src/backend/tsearch/Makefile index d5076ef18d46afc3265e99b9156aacc8835c2bba..720a5366677af0b8e57e25d68f78d9a1e1287085 100644 --- a/src/backend/tsearch/Makefile +++ b/src/backend/tsearch/Makefile @@ -4,7 +4,7 @@ # # Copyright (c) 2006-2007, PostgreSQL Global Development Group # -# $PostgreSQL: pgsql/src/backend/tsearch/Makefile,v 1.2 2007/08/22 06:11:56 tgl Exp $ +# $PostgreSQL: pgsql/src/backend/tsearch/Makefile,v 1.3 2007/09/11 11:54:42 teodor Exp $ # #------------------------------------------------------------------------- subdir = src/backend/tsearch @@ -13,7 +13,8 @@ include $(top_builddir)/src/Makefile.global DICTDIR=tsearch_data -DICTFILES=synonym.syn.sample thesaurus.ths.sample +DICTFILES=synonym_sample.syn thesaurus_sample.ths hunspell_sample.affix \ + ispell_sample.affix ispell_sample.dict OBJS = ts_locale.o ts_parse.o wparser.o wparser_def.o dict.o \ dict_simple.o dict_synonym.o dict_thesaurus.o \ diff --git a/src/backend/tsearch/hunspell_sample.affix b/src/backend/tsearch/hunspell_sample.affix new file mode 100644 index 0000000000000000000000000000000000000000..d1984c295fbf0de7c9e5344d8ad1ec93525861d3 --- /dev/null +++ b/src/backend/tsearch/hunspell_sample.affix @@ -0,0 +1,24 @@ +COMPOUNDFLAG Z +ONLYINCOMPOUND L + +PFX B Y 1 +PFX B 0 re . + +PFX U N 1 +PFX U 0 un . + +SFX J Y 1 +SFX J 0 INGS [^E] + +SFX G Y 1 +SFX G 0 ING [^E] + +SFX S Y 1 +SFX S 0 S [^SXZHY] + +SFX A Y 1 +SFX A Y IES [^AEIOU]Y + +SFX \ N 1 +SFX \ 0 Y/L [^Y] + diff --git a/src/backend/tsearch/ispell_sample.affix b/src/backend/tsearch/ispell_sample.affix new file mode 100644 index 0000000000000000000000000000000000000000..f29004ff1da7e37103bc15f99b525ee9470cafc1 --- /dev/null +++ b/src/backend/tsearch/ispell_sample.affix @@ -0,0 +1,26 @@ +compoundwords controlled Z + +prefixes + +flag *B: + . > RE # As in enter > reenter + +flag U: + . > UN # As in natural > unnatural + +suffixes + +flag *J: + [^E] > INGS # As in cross > crossings + +flag *G: + [^E] > ING # As in cross > crossing + +flag *S: + [^SXZHY] > S # As in bat > bats + +flag *A: + [^AEIOU]Y > -Y,IES # As in imply > implies + +flag ~\\: + [^Y] > Y #~ advarsel > advarsely- diff --git a/src/backend/tsearch/ispell_sample.dict b/src/backend/tsearch/ispell_sample.dict new file mode 100644 index 0000000000000000000000000000000000000000..44df1967a6c726c10e6f03fdb99275c15bf183d4 --- /dev/null +++ b/src/backend/tsearch/ispell_sample.dict @@ -0,0 +1,8 @@ +book/GJUS +booking/SB +footballklubber +foot/ZS +football/Z +ball/SZ\ +klubber/Z +sky/A diff --git a/src/backend/tsearch/synonym.syn.sample b/src/backend/tsearch/synonym.syn.sample deleted file mode 100644 index fdccca102b412cb82766f5cbe7d371ae85188cd1..0000000000000000000000000000000000000000 --- a/src/backend/tsearch/synonym.syn.sample +++ /dev/null @@ -1,3 +0,0 @@ -skies sky -booking book -bookings book diff --git a/src/backend/tsearch/synonym_sample.syn b/src/backend/tsearch/synonym_sample.syn new file mode 100644 index 0000000000000000000000000000000000000000..4e2eaeec0c1ac0d7fa3b5f66500d1a55829e7bd0 --- /dev/null +++ b/src/backend/tsearch/synonym_sample.syn @@ -0,0 +1,4 @@ +postgres pgsql +postgresql pgsql +postgre pgsql +gogle googl diff --git a/src/backend/tsearch/thesaurus.ths.sample b/src/backend/tsearch/thesaurus_sample.ths similarity index 73% rename from src/backend/tsearch/thesaurus.ths.sample rename to src/backend/tsearch/thesaurus_sample.ths index 7e7702e2ae43a1c8933c6ca71048f12b577662fd..b83d8f1452a9c3794efe82e5ad02c196c7ae97a9 100644 --- a/src/backend/tsearch/thesaurus.ths.sample +++ b/src/backend/tsearch/thesaurus_sample.ths @@ -11,10 +11,8 @@ one two : *12 one : *1 two : *2 -#foo bar : blah blah -#f bar : fbar -#e bar : ebar -#g bar bar : gbarbar -#asd:sdffff -#qwerty:qwer wert erty +supernovae stars : *sn +supernovae : *sn +booking tickets : order invitation cards +booking the tickets : order invitation Cards diff --git a/src/test/regress/expected/tsdicts.out b/src/test/regress/expected/tsdicts.out new file mode 100644 index 0000000000000000000000000000000000000000..859c7bb52261b26d7ed40c91e2d3dac3a364192c --- /dev/null +++ b/src/test/regress/expected/tsdicts.out @@ -0,0 +1,320 @@ +--Test text search dictionaries and configurations +-- Test ISpell dictionary with ispell affix file +CREATE TEXT SEARCH DICTIONARY ispell ( + Template=ispell, + DictFile=ispell_sample, + AffFile=ispell_sample +); +SELECT ts_lexize('ispell', 'skies'); + ts_lexize +----------- + {sky} +(1 row) + +SELECT ts_lexize('ispell', 'bookings'); + ts_lexize +---------------- + {booking,book} +(1 row) + +SELECT ts_lexize('ispell', 'booking'); + ts_lexize +---------------- + {booking,book} +(1 row) + +SELECT ts_lexize('ispell', 'foot'); + ts_lexize +----------- + {foot} +(1 row) + +SELECT ts_lexize('ispell', 'foots'); + ts_lexize +----------- + {foot} +(1 row) + +SELECT ts_lexize('ispell', 'rebookings'); + ts_lexize +---------------- + {booking,book} +(1 row) + +SELECT ts_lexize('ispell', 'rebooking'); + ts_lexize +---------------- + {booking,book} +(1 row) + +SELECT ts_lexize('ispell', 'rebook'); + ts_lexize +----------- + +(1 row) + +SELECT ts_lexize('ispell', 'unbookings'); + ts_lexize +----------- + {book} +(1 row) + +SELECT ts_lexize('ispell', 'unbooking'); + ts_lexize +----------- + {book} +(1 row) + +SELECT ts_lexize('ispell', 'unbook'); + ts_lexize +----------- + {book} +(1 row) + +SELECT ts_lexize('ispell', 'footklubber'); + ts_lexize +---------------- + {foot,klubber} +(1 row) + +SELECT ts_lexize('ispell', 'footballklubber'); + ts_lexize +------------------------------------------------------ + {footballklubber,foot,ball,klubber,football,klubber} +(1 row) + +SELECT ts_lexize('ispell', 'ballyklubber'); + ts_lexize +---------------- + {ball,klubber} +(1 row) + +SELECT ts_lexize('ispell', 'footballyklubber'); + ts_lexize +--------------------- + {foot,ball,klubber} +(1 row) + +-- Test ISpell dictionary with hunspell affix file +CREATE TEXT SEARCH DICTIONARY hunspell ( + Template=ispell, + DictFile=ispell_sample, + AffFile=hunspell_sample +); +SELECT ts_lexize('hunspell', 'skies'); + ts_lexize +----------- + {sky} +(1 row) + +SELECT ts_lexize('hunspell', 'bookings'); + ts_lexize +---------------- + {booking,book} +(1 row) + +SELECT ts_lexize('hunspell', 'booking'); + ts_lexize +---------------- + {booking,book} +(1 row) + +SELECT ts_lexize('hunspell', 'foot'); + ts_lexize +----------- + {foot} +(1 row) + +SELECT ts_lexize('hunspell', 'foots'); + ts_lexize +----------- + {foot} +(1 row) + +SELECT ts_lexize('hunspell', 'rebookings'); + ts_lexize +---------------- + {booking,book} +(1 row) + +SELECT ts_lexize('hunspell', 'rebooking'); + ts_lexize +---------------- + {booking,book} +(1 row) + +SELECT ts_lexize('hunspell', 'rebook'); + ts_lexize +----------- + +(1 row) + +SELECT ts_lexize('hunspell', 'unbookings'); + ts_lexize +----------- + {book} +(1 row) + +SELECT ts_lexize('hunspell', 'unbooking'); + ts_lexize +----------- + {book} +(1 row) + +SELECT ts_lexize('hunspell', 'unbook'); + ts_lexize +----------- + {book} +(1 row) + +SELECT ts_lexize('hunspell', 'footklubber'); + ts_lexize +---------------- + {foot,klubber} +(1 row) + +SELECT ts_lexize('hunspell', 'footballklubber'); + ts_lexize +------------------------------------------------------ + {footballklubber,foot,ball,klubber,football,klubber} +(1 row) + +SELECT ts_lexize('hunspell', 'ballyklubber'); + ts_lexize +---------------- + {ball,klubber} +(1 row) + +SELECT ts_lexize('hunspell', 'footballyklubber'); + ts_lexize +--------------------- + {foot,ball,klubber} +(1 row) + +-- Synonim dictionary +CREATE TEXT SEARCH DICTIONARY synonym ( + Template=synonym, + Synonyms=synonym_sample +); +SELECT ts_lexize('synonym', 'PoStGrEs'); + ts_lexize +----------- + {pgsql} +(1 row) + +SELECT ts_lexize('synonym', 'Gogle'); + ts_lexize +----------- + {googl} +(1 row) + +-- Create and simple test thesaurus dictionary +-- More test in configuration checks because of ts_lexize +-- can not give more tat one word as it may wish thesaurus. +CREATE TEXT SEARCH DICTIONARY thesaurus ( + Template=thesaurus, + DictFile=thesaurus_sample, + Dictionary=english_stem +); +NOTICE: thesaurus word-sample "the" is recognized as stop-word, assign any stop-word (rule 8) +SELECT ts_lexize('thesaurus', 'one'); +NOTICE: thesaurus word-sample "the" is recognized as stop-word, assign any stop-word (rule 8) + ts_lexize +----------- + {1} +(1 row) + +-- Test ispell dictionary in configuration +CREATE TEXT SEARCH CONFIGURATION ispell_tst ( + COPY=english +); +ALTER TEXT SEARCH CONFIGURATION ispell_tst ALTER MAPPING FOR + hword, lhword, lpart_hword, lword, nlhword, nlpart_hword, nlword, part_hword, word + WITH ispell, english_stem; +SELECT to_tsvector('ispell_tst', 'Booking the skies after rebookings for footballklubber from a foot'); + to_tsvector +---------------------------------------------------------------------------------------------------- + 'sky':3 'ball':7 'book':1,5 'foot':7,10 'booking':1,5 'klubber':7 'football':7 'footballklubber':7 +(1 row) + +SELECT to_tsquery('ispell_tst', 'footballklubber'); + to_tsquery +------------------------------------------------------------------------------ + ( 'footballklubber' | 'foot' & 'ball' & 'klubber' ) | 'football' & 'klubber' +(1 row) + +SELECT to_tsquery('ispell_tst', 'footballyklubber:b & rebookings:A & sky'); + to_tsquery +------------------------------------------------------------------------ + 'foot':B & 'ball':B & 'klubber':B & ( 'booking':A | 'book':A ) & 'sky' +(1 row) + +-- Test ispell dictionary with hunspell affix in configuration +CREATE TEXT SEARCH CONFIGURATION hunspell_tst ( + COPY=ispell_tst +); +ALTER TEXT SEARCH CONFIGURATION hunspell_tst ALTER MAPPING + REPLACE ispell WITH hunspell; +SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footballklubber from a foot'); + to_tsvector +---------------------------------------------------------------------------------------------------- + 'sky':3 'ball':7 'book':1,5 'foot':7,10 'booking':1,5 'klubber':7 'football':7 'footballklubber':7 +(1 row) + +SELECT to_tsquery('hunspell_tst', 'footballklubber'); + to_tsquery +------------------------------------------------------------------------------ + ( 'footballklubber' | 'foot' & 'ball' & 'klubber' ) | 'football' & 'klubber' +(1 row) + +SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky'); + to_tsquery +------------------------------------------------------------------------ + 'foot':B & 'ball':B & 'klubber':B & ( 'booking':A | 'book':A ) & 'sky' +(1 row) + +-- Test synonym dictionary in configuration +CREATE TEXT SEARCH CONFIGURATION synonym_tst ( + COPY=english +); +ALTER TEXT SEARCH CONFIGURATION synonym_tst ALTER MAPPING FOR + lword, lpart_hword, lhword + WITH synonym, english_stem; +SELECT to_tsvector('synonym_tst', 'Postgresql is often called as postgres or pgsql and pronounced as postgre'); + to_tsvector +--------------------------------------------------- + 'call':4 'often':3 'pgsql':1,6,8,12 'pronounc':10 +(1 row) + +SELECT to_tsvector('synonym_tst', 'Most common mistake is to write Gogle instead of Google'); + to_tsvector +---------------------------------------------------------- + 'googl':7,10 'write':6 'common':2 'mistak':3 'instead':8 +(1 row) + +-- test thesaurus in configuration +-- see thesaurus_sample.ths to understand 'odd' resulting tsvector +CREATE TEXT SEARCH CONFIGURATION thesaurus_tst ( + COPY=synonym_tst +); +ALTER TEXT SEARCH CONFIGURATION thesaurus_tst ALTER MAPPING FOR + lword, lpart_hword, lhword + WITH synonym, thesaurus, english_stem; +SELECT to_tsvector('thesaurus_tst', 'one postgres one two one two three one'); + to_tsvector +---------------------------------- + '1':1,5 '12':3 '123':4 'pgsql':2 +(1 row) + +SELECT to_tsvector('thesaurus_tst', 'Supernovae star is very new star and usually called supernovae (abbrevation SN)'); + to_tsvector +------------------------------------------------------------- + 'sn':1,9,11 'new':4 'call':8 'star':5 'usual':7 'abbrev':10 +(1 row) + +SELECT to_tsvector('thesaurus_tst', 'Booking tickets is looking like a booking a tickets'); + to_tsvector +------------------------------------------------------- + 'card':3,10 'like':6 'look':5 'invit':2,9 'order':1,8 +(1 row) + diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index a8f5c799d9a7a1f7b1208ab4778d19dd59acc759..4d5af5b16d5d59e6ea1faf115aba3536236fdd5e 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -1,5 +1,5 @@ # ---------- -# $PostgreSQL: pgsql/src/test/regress/parallel_schedule,v 1.43 2007/08/21 01:11:30 tgl Exp $ +# $PostgreSQL: pgsql/src/test/regress/parallel_schedule,v 1.44 2007/09/11 11:54:42 teodor Exp $ # # By convention, we put no more than twenty tests in any one parallel group; # this limits the number of connections needed to run the tests. @@ -77,7 +77,7 @@ test: misc # ---------- # Another group of parallel tests # ---------- -test: select_views portals_p2 rules foreign_key cluster dependency guc combocid tsearch +test: select_views portals_p2 rules foreign_key cluster dependency guc combocid tsearch tsdicts # ---------- # Another group of parallel tests diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule index a11a87023349dea97925e61ebef6b918077c93a2..856682469cce9411a3208b392a9f6745eff8a9bd 100644 --- a/src/test/regress/serial_schedule +++ b/src/test/regress/serial_schedule @@ -1,4 +1,4 @@ -# $PostgreSQL: pgsql/src/test/regress/serial_schedule,v 1.40 2007/08/21 01:11:30 tgl Exp $ +# $PostgreSQL: pgsql/src/test/regress/serial_schedule,v 1.41 2007/09/11 11:54:42 teodor Exp $ # This should probably be in an order similar to parallel_schedule. test: boolean test: char @@ -102,6 +102,7 @@ test: rangefuncs test: prepare test: without_oid test: conversion +test: tsdicts test: truncate test: alter_table test: sequence diff --git a/src/test/regress/sql/tsdicts.sql b/src/test/regress/sql/tsdicts.sql new file mode 100644 index 0000000000000000000000000000000000000000..2e6cf791d875fddaa1461063edc253d5407bad18 --- /dev/null +++ b/src/test/regress/sql/tsdicts.sql @@ -0,0 +1,121 @@ +--Test text search dictionaries and configurations + +-- Test ISpell dictionary with ispell affix file +CREATE TEXT SEARCH DICTIONARY ispell ( + Template=ispell, + DictFile=ispell_sample, + AffFile=ispell_sample +); + +SELECT ts_lexize('ispell', 'skies'); +SELECT ts_lexize('ispell', 'bookings'); +SELECT ts_lexize('ispell', 'booking'); +SELECT ts_lexize('ispell', 'foot'); +SELECT ts_lexize('ispell', 'foots'); +SELECT ts_lexize('ispell', 'rebookings'); +SELECT ts_lexize('ispell', 'rebooking'); +SELECT ts_lexize('ispell', 'rebook'); +SELECT ts_lexize('ispell', 'unbookings'); +SELECT ts_lexize('ispell', 'unbooking'); +SELECT ts_lexize('ispell', 'unbook'); + +SELECT ts_lexize('ispell', 'footklubber'); +SELECT ts_lexize('ispell', 'footballklubber'); +SELECT ts_lexize('ispell', 'ballyklubber'); +SELECT ts_lexize('ispell', 'footballyklubber'); + +-- Test ISpell dictionary with hunspell affix file +CREATE TEXT SEARCH DICTIONARY hunspell ( + Template=ispell, + DictFile=ispell_sample, + AffFile=hunspell_sample +); + +SELECT ts_lexize('hunspell', 'skies'); +SELECT ts_lexize('hunspell', 'bookings'); +SELECT ts_lexize('hunspell', 'booking'); +SELECT ts_lexize('hunspell', 'foot'); +SELECT ts_lexize('hunspell', 'foots'); +SELECT ts_lexize('hunspell', 'rebookings'); +SELECT ts_lexize('hunspell', 'rebooking'); +SELECT ts_lexize('hunspell', 'rebook'); +SELECT ts_lexize('hunspell', 'unbookings'); +SELECT ts_lexize('hunspell', 'unbooking'); +SELECT ts_lexize('hunspell', 'unbook'); + +SELECT ts_lexize('hunspell', 'footklubber'); +SELECT ts_lexize('hunspell', 'footballklubber'); +SELECT ts_lexize('hunspell', 'ballyklubber'); +SELECT ts_lexize('hunspell', 'footballyklubber'); + +-- Synonim dictionary +CREATE TEXT SEARCH DICTIONARY synonym ( + Template=synonym, + Synonyms=synonym_sample +); + +SELECT ts_lexize('synonym', 'PoStGrEs'); +SELECT ts_lexize('synonym', 'Gogle'); + +-- Create and simple test thesaurus dictionary +-- More test in configuration checks because of ts_lexize +-- can not give more tat one word as it may wish thesaurus. +CREATE TEXT SEARCH DICTIONARY thesaurus ( + Template=thesaurus, + DictFile=thesaurus_sample, + Dictionary=english_stem +); + +SELECT ts_lexize('thesaurus', 'one'); + +-- Test ispell dictionary in configuration +CREATE TEXT SEARCH CONFIGURATION ispell_tst ( + COPY=english +); + +ALTER TEXT SEARCH CONFIGURATION ispell_tst ALTER MAPPING FOR + hword, lhword, lpart_hword, lword, nlhword, nlpart_hword, nlword, part_hword, word + WITH ispell, english_stem; + +SELECT to_tsvector('ispell_tst', 'Booking the skies after rebookings for footballklubber from a foot'); +SELECT to_tsquery('ispell_tst', 'footballklubber'); +SELECT to_tsquery('ispell_tst', 'footballyklubber:b & rebookings:A & sky'); + +-- Test ispell dictionary with hunspell affix in configuration +CREATE TEXT SEARCH CONFIGURATION hunspell_tst ( + COPY=ispell_tst +); + +ALTER TEXT SEARCH CONFIGURATION hunspell_tst ALTER MAPPING + REPLACE ispell WITH hunspell; + +SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footballklubber from a foot'); +SELECT to_tsquery('hunspell_tst', 'footballklubber'); +SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky'); + +-- Test synonym dictionary in configuration +CREATE TEXT SEARCH CONFIGURATION synonym_tst ( + COPY=english +); + +ALTER TEXT SEARCH CONFIGURATION synonym_tst ALTER MAPPING FOR + lword, lpart_hword, lhword + WITH synonym, english_stem; + +SELECT to_tsvector('synonym_tst', 'Postgresql is often called as postgres or pgsql and pronounced as postgre'); +SELECT to_tsvector('synonym_tst', 'Most common mistake is to write Gogle instead of Google'); + +-- test thesaurus in configuration +-- see thesaurus_sample.ths to understand 'odd' resulting tsvector +CREATE TEXT SEARCH CONFIGURATION thesaurus_tst ( + COPY=synonym_tst +); + +ALTER TEXT SEARCH CONFIGURATION thesaurus_tst ALTER MAPPING FOR + lword, lpart_hword, lhword + WITH synonym, thesaurus, english_stem; + +SELECT to_tsvector('thesaurus_tst', 'one postgres one two one two three one'); +SELECT to_tsvector('thesaurus_tst', 'Supernovae star is very new star and usually called supernovae (abbrevation SN)'); +SELECT to_tsvector('thesaurus_tst', 'Booking tickets is looking like a booking a tickets'); +