提交 5e2707c4 编写于 作者: T Teodor Sigaev

Snowball multibyte. It's a pity, but snowball sources is very diferent for multibyte and

singlebyte encodings, so we should have snowball for every encodings.

I hope that finalize multibyte support work in tsearch2, but testing is needed...
上级 75c47471
# $PostgreSQL: pgsql/contrib/tsearch2/Makefile,v 1.12 2005/11/21 12:27:57 teodor Exp $
# $PostgreSQL: pgsql/contrib/tsearch2/Makefile,v 1.13 2006/01/27 16:32:31 teodor Exp $
MODULE_big = tsearch2
OBJS = dict_ex.o dict.o snmap.o stopword.o common.o prs_dcfg.o \
......@@ -16,7 +16,7 @@ OBJS += $(SUBDIROBJS)
PG_CPPFLAGS = -I$(srcdir)/snowball -I$(srcdir)/ispell -I$(srcdir)/wordparser
DATA = stopword/english.stop stopword/russian.stop
DATA = stopword/english.stop stopword/russian.stop stopword/russian.stop.utf8
DATA_built = tsearch2.sql untsearch2.sql
DOCS = README.tsearch2
REGRESS = tsearch2
......
......@@ -10,6 +10,7 @@
#include "snowball/header.h"
#include "snowball/english_stem.h"
#include "snowball/russian_stem.h"
#include "snowball/russian_stem_UTF8.h"
#include "ts_locale.h"
typedef struct
......@@ -23,8 +24,11 @@ typedef struct
PG_FUNCTION_INFO_V1(snb_en_init);
Datum snb_en_init(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(snb_ru_init);
Datum snb_ru_init(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(snb_ru_init_koi8);
Datum snb_ru_init_koi8(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(snb_ru_init_utf8);
Datum snb_ru_init_utf8(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(snb_lexize);
Datum snb_lexize(PG_FUNCTION_ARGS);
......@@ -64,7 +68,7 @@ snb_en_init(PG_FUNCTION_ARGS)
}
Datum
snb_ru_init(PG_FUNCTION_ARGS)
snb_ru_init_koi8(PG_FUNCTION_ARGS)
{
DictSnowball *d = (DictSnowball *) malloc(sizeof(DictSnowball));
......@@ -97,6 +101,40 @@ snb_ru_init(PG_FUNCTION_ARGS)
PG_RETURN_POINTER(d);
}
Datum
snb_ru_init_utf8(PG_FUNCTION_ARGS)
{
DictSnowball *d = (DictSnowball *) malloc(sizeof(DictSnowball));
if (!d)
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
memset(d, 0, sizeof(DictSnowball));
d->stoplist.wordop = lowerstr;
if (!PG_ARGISNULL(0) && PG_GETARG_POINTER(0) != NULL)
{
text *in = PG_GETARG_TEXT_P(0);
readstoplist(in, &(d->stoplist));
sortstoplist(&(d->stoplist));
PG_FREE_IF_COPY(in, 0);
}
d->z = russian_UTF_8_create_env();
if (!d->z)
{
freestoplist(&(d->stoplist));
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
}
d->stem = russian_UTF_8_stem;
PG_RETURN_POINTER(d);
}
Datum
snb_lexize(PG_FUNCTION_ARGS)
{
......
......@@ -4,21 +4,21 @@
--
\set ECHO none
psql:tsearch2.sql:13: NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "pg_ts_dict_pkey" for table "pg_ts_dict"
psql:tsearch2.sql:145: NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "pg_ts_parser_pkey" for table "pg_ts_parser"
psql:tsearch2.sql:244: NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "pg_ts_cfg_pkey" for table "pg_ts_cfg"
psql:tsearch2.sql:251: NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "pg_ts_cfgmap_pkey" for table "pg_ts_cfgmap"
psql:tsearch2.sql:337: NOTICE: type "tsvector" is not yet defined
psql:tsearch2.sql:158: NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "pg_ts_parser_pkey" for table "pg_ts_parser"
psql:tsearch2.sql:257: NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "pg_ts_cfg_pkey" for table "pg_ts_cfg"
psql:tsearch2.sql:264: NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "pg_ts_cfgmap_pkey" for table "pg_ts_cfgmap"
psql:tsearch2.sql:370: NOTICE: type "tsvector" is not yet defined
DETAIL: Creating a shell type definition.
psql:tsearch2.sql:342: NOTICE: argument type tsvector is only a shell
psql:tsearch2.sql:396: NOTICE: type "tsquery" is not yet defined
psql:tsearch2.sql:375: NOTICE: argument type tsvector is only a shell
psql:tsearch2.sql:429: NOTICE: type "tsquery" is not yet defined
DETAIL: Creating a shell type definition.
psql:tsearch2.sql:401: NOTICE: argument type tsquery is only a shell
psql:tsearch2.sql:559: NOTICE: type "gtsvector" is not yet defined
psql:tsearch2.sql:434: NOTICE: argument type tsquery is only a shell
psql:tsearch2.sql:592: NOTICE: type "gtsvector" is not yet defined
DETAIL: Creating a shell type definition.
psql:tsearch2.sql:564: NOTICE: argument type gtsvector is only a shell
psql:tsearch2.sql:1054: NOTICE: type "gtsq" is not yet defined
psql:tsearch2.sql:597: NOTICE: argument type gtsvector is only a shell
psql:tsearch2.sql:1087: NOTICE: type "gtsq" is not yet defined
DETAIL: Creating a shell type definition.
psql:tsearch2.sql:1059: NOTICE: argument type gtsq is only a shell
psql:tsearch2.sql:1092: NOTICE: argument type gtsq is only a shell
--tsvector
SELECT '1'::tsvector;
tsvector
......
# $PostgreSQL: pgsql/contrib/tsearch2/snowball/Makefile,v 1.8 2005/10/18 01:30:48 tgl Exp $
# $PostgreSQL: pgsql/contrib/tsearch2/snowball/Makefile,v 1.9 2006/01/27 16:32:31 teodor Exp $
SUBOBJS = english_stem.o api.o russian_stem.o utilities.o
SUBOBJS = english_stem.o api.o russian_stem.o russian_stem_UTF8.o utilities.o
EXTRA_CLEAN = SUBSYS.o $(SUBOBJS)
......
此差异已折叠。
/* This file was generated automatically by the Snowball to ANSI C compiler */
#ifdef __cplusplus
extern "C" {
#endif
extern struct SN_env * russian_UTF_8_create_env(void);
extern void russian_UTF_8_close_env(struct SN_env * z);
extern int russian_UTF_8_stem(struct SN_env * z);
#ifdef __cplusplus
}
#endif
и
в
во
не
что
он
на
я
с
со
как
а
то
все
она
так
его
но
да
ты
к
у
же
вы
за
бы
по
только
ее
мне
было
вот
от
меня
еще
нет
о
из
ему
теперь
когда
даже
ну
вдруг
ли
если
уже
или
ни
быть
был
него
до
вас
нибудь
опять
уж
вам
ведь
там
потом
себя
ничего
ей
может
они
тут
где
есть
надо
ней
для
мы
тебя
их
чем
была
сам
чтоб
без
будто
чего
раз
тоже
себе
под
будет
ж
тогда
кто
этот
того
потому
этого
какой
совсем
ним
здесь
этом
один
почти
мой
тем
чтобы
нее
сейчас
были
куда
зачем
всех
никогда
можно
при
наконец
два
об
другой
хоть
после
над
больше
тот
через
эти
нас
про
всего
них
какая
много
разве
три
эту
моя
впрочем
хорошо
свою
этой
перед
иногда
лучше
чуть
том
нельзя
такой
им
более
всегда
конечно
всю
между
......@@ -82,17 +82,30 @@ insert into pg_ts_dict select
'English Stemmer. Snowball.'
;
CREATE FUNCTION snb_ru_init(internal)
CREATE FUNCTION snb_ru_init_koi8(internal)
returns internal
as 'MODULE_PATHNAME'
language 'C';
insert into pg_ts_dict select
'ru_stem',
'snb_ru_init(internal)',
'ru_stem_koi8',
'snb_ru_init_koi8(internal)',
'contrib/russian.stop',
'snb_lexize(internal,internal,int4)',
'Russian Stemmer. Snowball.'
'Russian Stemmer. Snowball. KOI8 Encoding'
;
CREATE FUNCTION snb_ru_init_utf8(internal)
returns internal
as 'MODULE_PATHNAME'
language 'C';
insert into pg_ts_dict select
'ru_stem_utf8',
'snb_ru_init_utf8(internal)',
'contrib/russian.stop.utf8',
'snb_lexize(internal,internal,int4)',
'Russian Stemmer. Snowball. UTF8 Encoding'
;
CREATE FUNCTION spell_init(internal)
......@@ -270,6 +283,7 @@ CREATE FUNCTION show_curcfg()
insert into pg_ts_cfg values ('default', 'default','C');
insert into pg_ts_cfg values ('default_russian', 'default','ru_RU.KOI8-R');
insert into pg_ts_cfg values ('utf8_russian', 'default','ru_RU.UTF-8');
insert into pg_ts_cfg values ('simple', 'default');
insert into pg_ts_cfgmap values ('default', 'lword', '{en_stem}');
......@@ -292,24 +306,43 @@ insert into pg_ts_cfgmap values ('default', 'float', '{simple}');
insert into pg_ts_cfgmap values ('default', 'int', '{simple}');
insert into pg_ts_cfgmap values ('default', 'uint', '{simple}');
insert into pg_ts_cfgmap values ('default_russian', 'lword', '{en_stem}');
insert into pg_ts_cfgmap values ('default_russian', 'nlword', '{ru_stem}');
insert into pg_ts_cfgmap values ('default_russian', 'word', '{ru_stem}');
insert into pg_ts_cfgmap values ('default_russian', 'nlword', '{ru_stem_koi8}');
insert into pg_ts_cfgmap values ('default_russian', 'word', '{ru_stem_koi8}');
insert into pg_ts_cfgmap values ('default_russian', 'email', '{simple}');
insert into pg_ts_cfgmap values ('default_russian', 'url', '{simple}');
insert into pg_ts_cfgmap values ('default_russian', 'host', '{simple}');
insert into pg_ts_cfgmap values ('default_russian', 'sfloat', '{simple}');
insert into pg_ts_cfgmap values ('default_russian', 'version', '{simple}');
insert into pg_ts_cfgmap values ('default_russian', 'part_hword', '{simple}');
insert into pg_ts_cfgmap values ('default_russian', 'nlpart_hword', '{ru_stem}');
insert into pg_ts_cfgmap values ('default_russian', 'nlpart_hword', '{ru_stem_koi8}');
insert into pg_ts_cfgmap values ('default_russian', 'lpart_hword', '{en_stem}');
insert into pg_ts_cfgmap values ('default_russian', 'hword', '{ru_stem}');
insert into pg_ts_cfgmap values ('default_russian', 'hword', '{ru_stem_koi8}');
insert into pg_ts_cfgmap values ('default_russian', 'lhword', '{en_stem}');
insert into pg_ts_cfgmap values ('default_russian', 'nlhword', '{ru_stem}');
insert into pg_ts_cfgmap values ('default_russian', 'nlhword', '{ru_stem_koi8}');
insert into pg_ts_cfgmap values ('default_russian', 'uri', '{simple}');
insert into pg_ts_cfgmap values ('default_russian', 'file', '{simple}');
insert into pg_ts_cfgmap values ('default_russian', 'float', '{simple}');
insert into pg_ts_cfgmap values ('default_russian', 'int', '{simple}');
insert into pg_ts_cfgmap values ('default_russian', 'uint', '{simple}');
insert into pg_ts_cfgmap values ('utf8_russian', 'lword', '{en_stem}');
insert into pg_ts_cfgmap values ('utf8_russian', 'nlword', '{ru_stem_utf8}');
insert into pg_ts_cfgmap values ('utf8_russian', 'word', '{ru_stem_utf8}');
insert into pg_ts_cfgmap values ('utf8_russian', 'email', '{simple}');
insert into pg_ts_cfgmap values ('utf8_russian', 'url', '{simple}');
insert into pg_ts_cfgmap values ('utf8_russian', 'host', '{simple}');
insert into pg_ts_cfgmap values ('utf8_russian', 'sfloat', '{simple}');
insert into pg_ts_cfgmap values ('utf8_russian', 'version', '{simple}');
insert into pg_ts_cfgmap values ('utf8_russian', 'part_hword', '{simple}');
insert into pg_ts_cfgmap values ('utf8_russian', 'nlpart_hword', '{ru_stem_utf8}');
insert into pg_ts_cfgmap values ('utf8_russian', 'lpart_hword', '{en_stem}');
insert into pg_ts_cfgmap values ('utf8_russian', 'hword', '{ru_stem_utf8}');
insert into pg_ts_cfgmap values ('utf8_russian', 'lhword', '{en_stem}');
insert into pg_ts_cfgmap values ('utf8_russian', 'nlhword', '{ru_stem_utf8}');
insert into pg_ts_cfgmap values ('utf8_russian', 'uri', '{simple}');
insert into pg_ts_cfgmap values ('utf8_russian', 'file', '{simple}');
insert into pg_ts_cfgmap values ('utf8_russian', 'float', '{simple}');
insert into pg_ts_cfgmap values ('utf8_russian', 'int', '{simple}');
insert into pg_ts_cfgmap values ('utf8_russian', 'uint', '{simple}');
insert into pg_ts_cfgmap values ('simple', 'lword', '{simple}');
insert into pg_ts_cfgmap values ('simple', 'nlword', '{simple}');
insert into pg_ts_cfgmap values ('simple', 'word', '{simple}');
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册