提交 16f85390 编写于 作者: T Tom Lane

Support for emulating RTREE indexing in GiST. Contributed by

Oleg Bartunov and Teodor Sigaev.
上级 3043810d
# $Header: /cvsroot/pgsql/contrib/Makefile,v 1.20 2001/05/10 15:51:05 momjian Exp $
# $Header: /cvsroot/pgsql/contrib/Makefile,v 1.21 2001/05/31 18:27:18 tgl Exp $
subdir = contrib
top_builddir = ..
......@@ -27,6 +27,7 @@ WANTED_DIRS = \
pgbench \
pgcrypto \
rserv \
rtree_gist \
seg \
soundex \
spi \
......
......@@ -133,6 +133,10 @@ rserv -
replication server
by Vadim B. Mikheev <vadim4o@email.com>
rtree_gist -
Support for emulating RTREE indexing in GiST
by Oleg Bartunov <oleg@sai.msu.su> and Teodor Sigaev <teodor@stack.net>
seg -
Confidence-interval datatype (GiST indexing example)
by Gene Selkov, Jr. <selkovjr@mcs.anl.gov>
......
#
# $Header: /cvsroot/pgsql/contrib/rtree_gist/Attic/Makefile,v 1.1 2001/05/31 18:27:18 tgl Exp $
#
subdir = contrib/rtree_gist
top_builddir = ../..
include $(top_builddir)/src/Makefile.global
# override libdir to install shlib in contrib not main directory
libdir := $(libdir)/contrib
# shared library parameters
NAME= rtree_gist
SO_MAJOR_VERSION= 1
SO_MINOR_VERSION= 0
override CPPFLAGS := -I$(srcdir) $(CPPFLAGS)
OBJS= rtree_gist.o
all: all-lib $(NAME).sql
# Shared library stuff
include $(top_srcdir)/src/Makefile.shlib
$(NAME).sql: $(NAME).sql.in
sed -e 's:MODULE_PATHNAME:$(libdir)/$(shlib):g' < $< > $@
.PHONY: submake
submake:
$(MAKE) -C $(top_builddir)/src/test/regress pg_regress
# against installed postmaster
installcheck: submake
$(top_builddir)/src/test/regress/pg_regress rtree_gist
# in-tree test doesn't work yet (no way to install my shared library)
#check: all submake
# $(top_builddir)/src/test/regress/pg_regress --temp-install \
# --top-builddir=$(top_builddir) rtree_gist
check:
@echo "'make check' is not supported."
@echo "Do 'make install', then 'make installcheck' instead."
install: all installdirs install-lib
$(INSTALL_DATA) $(srcdir)/README.$(NAME) $(docdir)/contrib
$(INSTALL_DATA) $(NAME).sql $(datadir)/contrib
installdirs:
$(mkinstalldirs) $(docdir)/contrib $(datadir)/contrib $(libdir)
uninstall: uninstall-lib
rm -f $(docdir)/contrib/README.$(NAME) $(datadir)/contrib/$(NAME).sql
clean distclean maintainer-clean: clean-lib
rm -f $(OBJS) $(NAME).sql
# things created by various check targets
rm -rf results tmp_check log
rm -f regression.diffs regression.out regress.out run_check.out
ifeq ($(PORTNAME), win)
rm -f regress.def
endif
depend dep:
$(CC) -MM $(CFLAGS) *.c >depend
ifeq (depend,$(wildcard depend))
include depend
endif
This is R-Tree implementation using GiST.
Code (for PG95) are taken from http://s2k-ftp.cs.berkeley.edu:8000/gist/pggist/
and changed according to new version of GiST (7.1 and above)
All work was done by Teodor Sigaev (teodor@stack.net) and Oleg Bartunov
(oleg@sai.msu.su). See http://www.sai.msu.su/~megera/postgres/gist
for additional information.
CHANGES:
Tue May 29 17:04:16 MSD 2001
1. Small fixes in polygon code
Thanks to Dave Blasby <dblasby@refractions.net>
Mon May 28 19:42:14 MSD 2001
1. Full implementation of R-tree using GiST - gist_box_ops,gist_poly_ops
2. gist_poly_ops is lossy
3. NULLs support
4. works with multi-key GiST
NOTICE:
This version will works only with postgresql version 7.1 and above
because of changes in interface of function calling.
INSTALLATION:
gmake
gmake install
-- load functions
psql <database> < rtree_gist.sql
REGRESSION TEST:
gmake installcheck
EXAMPLE USAGE:
create table boxtmp (b box);
-- create index
create index bix on boxtmp using gist (b gist_box_ops);
-- query
select * from boxtmp where b && '(1000,1000,0,0)'::box;
BENCHMARKS:
subdirectory bench contains benchmark suite.
Prerequisities: perl, DBI, DBD:Pg, Time::HiRes
cd ./bench
1. createdb TEST
2. psql TEST < ../box.sql
3. ./create_test.pl | psql TEST
-- change $NUM - number of rows in test dataset
4. ./bench.pl - perl script to benchmark queries.
Run script without arguments to see available options.
a)test without GiST index, using built-in R-Tree
./bench.pl -d TEST
b)test R-Tree using GiST index
./bench.pl -d TEST -g
RESULTS:
1. One interesting thing is that insertion time for built-in R-Tree is
about 8 times more than ones for GiST implementation of R-Tree !!!
2. Postmaster requires much more memory for built-in R-Tree
3. Search time depends on dataset. In our case we got:
+------------+-----------+--------------+
|Number boxes|R-tree, sec|R-tree using |
| | | GiST, sec |
+------------+-----------+--------------+
| 10| 0.002| 0.002|
+------------+-----------+--------------+
| 100| 0.002| 0.002|
+------------+-----------+--------------+
| 1000| 0.002| 0.002|
+------------+-----------+--------------+
| 10000| 0.015| 0.025|
+------------+-----------+--------------+
| 20000| 0.029| 0.048|
+------------+-----------+--------------+
| 40000| 0.055| 0.092|
+------------+-----------+--------------+
| 80000| 0.113| 0.178|
+------------+-----------+--------------+
| 160000| 0.338| 0.337|
+------------+-----------+--------------+
| 320000| 0.674| 0.673|
+------------+-----------+--------------+
#!/usr/bin/perl -w
use strict;
# make sure we are in a sane environment.
use DBI();
use DBD::Pg();
use Time::HiRes qw( usleep ualarm gettimeofday tv_interval );
use Getopt::Std;
my %opt;
getopts('d:b:gv', \%opt);
if ( !( scalar %opt ) ) {
print <<EOT;
Usage:
$0 -d DATABASE -b N [-v] [-g]
-d DATABASE - DATABASE name
-b N -number of cycles
-v - print sql
-g -use GiST index( default built-in R-tree )
EOT
exit;
}
$opt{d} ||= 'TEST';
my $dbi=DBI->connect('DBI:Pg:dbname='.$opt{d}) || die "Couldn't connect DB: $opt{d} !\n";
my $sql;
my $notice;
my $sss = '(3000,3000,2990,2990)';
if ( $opt{g} ) {
$notice = "Testing GiST implementation of R-Tree";
$sql = "select count(*) from boxtmp where b && '$sss'::box;";
} else {
$notice = "Testing built-in implementation of R-Tree";
$sql = "select count(*) from boxtmp2 where b && '$sss'::box;";
}
my $t0 = [gettimeofday];
my $count=0;
my $b=$opt{b};
$b ||=1;
foreach ( 1..$b ) {
my @a=exec_sql($dbi,$sql);
$count=$#a;
}
my $elapsed = tv_interval ( $t0, [gettimeofday]);
print "$notice:\n";
print "$sql\n" if ( $opt{v} );
print "Done\n";
print sprintf("total: %.02f sec; number: %d; for one: %.03f sec; found %d docs\n", $elapsed, $b, $elapsed/$b, $count+1 );
$dbi -> disconnect;
sub exec_sql {
my ($dbi, $sql, @keys) = @_;
my $sth=$dbi->prepare($sql) || die;
$sth->execute( @keys ) || die;
my $r;
my @row;
while ( defined ( $r=$sth->fetchrow_hashref ) ) {
push @row, $r;
}
$sth->finish;
return @row;
}
#!/usr/bin/perl
use strict;
my $NUM = 20000;
print "drop table boxtmp;\n";
print "drop table boxtmp2;\n";
print "create table boxtmp (b box);\n";
print "create table boxtmp2 (b box);\n";
srand(1);
open(DAT,">bbb.dat") || die;
foreach ( 1..$NUM ) {
#print DAT '(',int( 500+500*rand() ),',',int( 500+500*rand() ),',',int( 500*rand() ),',',int( 500*rand() ),")\n";
my ( $x1,$y1, $x2,$y2 ) = (
10000*rand(),
10000*rand(),
10000*rand(),
10000*rand()
);
print DAT '(',
max($x1,$x2),',',
max($y1,$y2),',',
min($x1,$x2),',',
min($y1,$y2),")\n";
}
close DAT;
print "copy boxtmp from stdin;\n";
open(DAT,"bbb.dat") || die;
while(<DAT>) { print; }
close DAT;
print "\\.\n";
print "copy boxtmp2 from stdin;\n";
open(DAT,"bbb.dat") || die;
while(<DAT>) { print; }
close DAT;
print "\\.\n";
print "create index bix on boxtmp using gist (b gist_box_ops);\n";
print "create index bix2 on boxtmp2 using rtree (b box_ops);\n";
sub min {
return ( $_[0] < $_[1] ) ? $_[0] : $_[1];
}
sub max {
return ( $_[0] > $_[1] ) ? $_[0] : $_[1];
}
此差异已折叠。
--
-- first, define the datatype. Turn off echoing so that expected file
-- does not depend on contents of seg.sql.
--
\set ECHO none
create table boxtmp (b box);
\copy boxtmp from 'data/test_box.data'
select count(*) from boxtmp where b && '(1000,1000,0,0)'::box;
count
-------
2
(1 row)
create index bix on boxtmp using rtree (b);
select count(*) from boxtmp where b && '(1000,1000,0,0)'::box;
count
-------
2
(1 row)
drop index bix;
create index bix on boxtmp using gist (b gist_box_ops);
select count(*) from boxtmp where b && '(1000,1000,0,0)'::box;
count
-------
2
(1 row)
create table polytmp (p polygon);
\copy polytmp from 'data/test_box.data'
create index pix on polytmp using rtree (p);
select count(*) from polytmp where p && '(1000,1000),(0,0)'::polygon;
count
-------
2
(1 row)
drop index pix;
create index pix on polytmp using gist (p gist_poly_ops) with(islossy);
select count(*) from polytmp where p && '(1000,1000),(0,0)'::polygon;
count
-------
2
(1 row)
/*-------------------------------------------------------------------------
*
* rtree_gist.c
* pg_amproc entries for GiSTs over 2-D boxes.
* This gives R-tree behavior, with Guttman's poly-time split algorithm.
*
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/contrib/rtree_gist/Attic/rtree_gist.c,v 1.1 2001/05/31 18:27:18 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/gist.h"
#include "access/itup.h"
#include "access/rtree.h"
#include "utils/palloc.h"
#include "utils/geo_decls.h"
#include "utils/elog.h"
typedef Datum (*RDF)(PG_FUNCTION_ARGS);
typedef Datum (*BINARY_UNION)(Datum, Datum, int*);
typedef float (*SIZE_BOX)(Datum);
/*
** Workaround for index_formtuple
*/
typedef struct polykey {
int32 size; /* size in varlena terms */
BOX key;
} POLYKEY;
/*
** box ops
*/
PG_FUNCTION_INFO_V1(gbox_compress);
PG_FUNCTION_INFO_V1(gbox_union);
PG_FUNCTION_INFO_V1(gbox_picksplit);
PG_FUNCTION_INFO_V1(gbox_consistent);
PG_FUNCTION_INFO_V1(gbox_penalty);
PG_FUNCTION_INFO_V1(gbox_same);
GISTENTRY * gbox_compress(PG_FUNCTION_ARGS);
BOX *gbox_union(PG_FUNCTION_ARGS);
GIST_SPLITVEC * gbox_picksplit(PG_FUNCTION_ARGS);
bool gbox_consistent(PG_FUNCTION_ARGS);
float * gbox_penalty(PG_FUNCTION_ARGS);
bool * gbox_same(PG_FUNCTION_ARGS);
static Datum gbox_binary_union(Datum r1, Datum r2, int *sizep);
static bool gbox_leaf_consistent(BOX *key, BOX *query, StrategyNumber strategy);
static float size_box( Datum box );
/*
** Polygon ops
*/
PG_FUNCTION_INFO_V1(gpoly_compress);
PG_FUNCTION_INFO_V1(gpoly_union);
PG_FUNCTION_INFO_V1(gpoly_picksplit);
PG_FUNCTION_INFO_V1(gpoly_consistent);
PG_FUNCTION_INFO_V1(gpoly_penalty);
PG_FUNCTION_INFO_V1(gpoly_same);
GISTENTRY * gpoly_compress(PG_FUNCTION_ARGS);
POLYKEY *gpoly_union(PG_FUNCTION_ARGS);
GIST_SPLITVEC * gpoly_picksplit(PG_FUNCTION_ARGS);
bool gpoly_consistent(PG_FUNCTION_ARGS);
float * gpoly_penalty(PG_FUNCTION_ARGS);
bool * gpoly_same(PG_FUNCTION_ARGS);
static Datum gpoly_binary_union(Datum r1, Datum r2, int *sizep);
static float size_polykey( Datum pk );
PG_FUNCTION_INFO_V1(gpoly_inter);
Datum gpoly_inter(PG_FUNCTION_ARGS);
/*
** Common rtree-function (for all ops)
*/
static Datum rtree_union(bytea *entryvec, int *sizep, BINARY_UNION bu);
static float * rtree_penalty(GISTENTRY *origentry, GISTENTRY *newentry,
float *result, BINARY_UNION bu, SIZE_BOX sb);
static GIST_SPLITVEC * rtree_picksplit(bytea *entryvec, GIST_SPLITVEC *v,
int keylen, BINARY_UNION bu, RDF interop, SIZE_BOX sb);
static bool rtree_internal_consistent(BOX *key, BOX *query, StrategyNumber strategy);
PG_FUNCTION_INFO_V1(rtree_decompress);
GISTENTRY * rtree_decompress(PG_FUNCTION_ARGS);
/**************************************************
* Box ops
**************************************************/
/*
** The GiST Consistent method for boxes
** Should return false if for all data items x below entry,
** the predicate x op query == FALSE, where op is the oper
** corresponding to strategy in the pg_amop table.
*/
bool
gbox_consistent(PG_FUNCTION_ARGS)
{
GISTENTRY *entry = (GISTENTRY*) PG_GETARG_POINTER(0);
BOX *query = (BOX*) PG_GETARG_POINTER(1);
StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
/*
** if entry is not leaf, use gbox_internal_consistent,
** else use gbox_leaf_consistent
*/
if ( ! (DatumGetPointer(entry->key) != NULL && query) )
return FALSE;
if (GIST_LEAF(entry))
PG_RETURN_BOOL(gbox_leaf_consistent((BOX *) DatumGetPointer(entry->key), query, strategy));
else
PG_RETURN_BOOL(rtree_internal_consistent((BOX *) DatumGetPointer(entry->key), query, strategy));
}
/*
** The GiST Union method for boxes
** returns the minimal bounding box that encloses all the entries in entryvec
*/
BOX *
gbox_union(PG_FUNCTION_ARGS)
{
return (BOX*)
DatumGetPointer(rtree_union(
(bytea*) PG_GETARG_POINTER(0),
(int*) PG_GETARG_POINTER(1),
gbox_binary_union
));
}
/*
** GiST Compress methods for boxes
** do not do anything.
*/
GISTENTRY *
gbox_compress(PG_FUNCTION_ARGS)
{
return((GISTENTRY*)PG_GETARG_POINTER(0));
}
/*
** The GiST Penalty method for boxes
** As in the R-tree paper, we use change in area as our penalty metric
*/
float *
gbox_penalty(PG_FUNCTION_ARGS)
{
return rtree_penalty(
(GISTENTRY*) PG_GETARG_POINTER(0),
(GISTENTRY*) PG_GETARG_POINTER(1),
(float*) PG_GETARG_POINTER(2),
gbox_binary_union,
size_box
);
}
/*
** The GiST PickSplit method for boxes
** We use Guttman's poly time split algorithm
*/
GIST_SPLITVEC *
gbox_picksplit(PG_FUNCTION_ARGS)
{
return rtree_picksplit(
(bytea*)PG_GETARG_POINTER(0),
(GIST_SPLITVEC*)PG_GETARG_POINTER(1),
sizeof(BOX),
gbox_binary_union,
rt_box_inter,
size_box
);
}
/*
** Equality method
*/
bool *
gbox_same(PG_FUNCTION_ARGS)
{
BOX *b1 = (BOX*) PG_GETARG_POINTER(0);
BOX *b2 = (BOX*) PG_GETARG_POINTER(1);
bool *result = (bool*) PG_GETARG_POINTER(2);
if ( b1 && b2 )
*result = DatumGetBool( DirectFunctionCall2( box_same, PointerGetDatum(b1), PointerGetDatum(b2)) );
else
*result = ( b1==NULL && b2==NULL ) ? TRUE : FALSE;
return(result);
}
/*
** SUPPORT ROUTINES for boxes
*/
static bool
gbox_leaf_consistent(BOX *key,
BOX *query,
StrategyNumber strategy)
{
bool retval;
switch(strategy) {
case RTLeftStrategyNumber:
retval = DatumGetBool( DirectFunctionCall2( box_left, PointerGetDatum(key), PointerGetDatum(query) ) );
break;
case RTOverLeftStrategyNumber:
retval = DatumGetBool( DirectFunctionCall2( box_overleft, PointerGetDatum(key), PointerGetDatum(query) ) );
break;
case RTOverlapStrategyNumber:
retval = DatumGetBool( DirectFunctionCall2( box_overlap, PointerGetDatum(key), PointerGetDatum(query) ) );
break;
case RTOverRightStrategyNumber:
retval = DatumGetBool( DirectFunctionCall2( box_overright, PointerGetDatum(key), PointerGetDatum(query) ) );
break;
case RTRightStrategyNumber:
retval = DatumGetBool( DirectFunctionCall2( box_right, PointerGetDatum(key), PointerGetDatum(query) ) );
break;
case RTSameStrategyNumber:
retval = DatumGetBool( DirectFunctionCall2( box_same, PointerGetDatum(key), PointerGetDatum(query) ) );
break;
case RTContainsStrategyNumber:
retval = DatumGetBool( DirectFunctionCall2( box_contain, PointerGetDatum(key), PointerGetDatum(query) ) );
break;
case RTContainedByStrategyNumber:
retval = DatumGetBool( DirectFunctionCall2( box_contained, PointerGetDatum(key), PointerGetDatum(query) ) );
break;
default:
retval = FALSE;
}
return(retval);
}
static Datum
gbox_binary_union(Datum r1, Datum r2, int *sizep)
{
BOX *retval;
if ( ! (DatumGetPointer(r1) != NULL && DatumGetPointer(r2) != NULL) ) {
if ( DatumGetPointer(r1) != NULL ) {
retval = (BOX*) palloc( sizeof(BOX) );
memcpy( retval, DatumGetPointer(r1), sizeof(BOX) );
*sizep = sizeof(BOX);
} else if ( DatumGetPointer(r2) != NULL ) {
retval = (BOX*) palloc( sizeof(BOX) );
memcpy( retval, DatumGetPointer(r2), sizeof(BOX) );
*sizep = sizeof(BOX);
} else {
*sizep = 0;
retval = NULL;
}
} else {
retval = (BOX*) DatumGetPointer(
DirectFunctionCall2(rt_box_union, r1, r2));
*sizep = sizeof(BOX);
}
return PointerGetDatum(retval);
}
static float
size_box( Datum box ) {
if ( DatumGetPointer(box) != NULL ) {
float size;
DirectFunctionCall2( rt_box_size,
box, PointerGetDatum( &size ) );
return size;
} else
return 0.0;
}
/**************************************************
* Polygon ops
**************************************************/
GISTENTRY *
gpoly_compress(PG_FUNCTION_ARGS)
{
GISTENTRY *entry=(GISTENTRY*)PG_GETARG_POINTER(0);
GISTENTRY *retval;
if ( entry->leafkey) {
retval = palloc(sizeof(GISTENTRY));
if ( DatumGetPointer(entry->key) != NULL ) {
POLYGON *in;
POLYKEY *r;
in = (POLYGON *) PG_DETOAST_DATUM(entry->key);
r = (POLYKEY *) palloc( sizeof(POLYKEY) );
r->size = sizeof(POLYKEY);
memcpy( (void*)&(r->key), (void*)&(in->boundbox), sizeof(BOX) );
if ( in != (POLYGON *) DatumGetPointer(entry->key) )
pfree( in );
gistentryinit(*retval, PointerGetDatum(r),
entry->rel, entry->page,
entry->offset, sizeof(POLYKEY), FALSE);
} else {
gistentryinit(*retval, (Datum) 0,
entry->rel, entry->page,
entry->offset, 0,FALSE);
}
} else {
retval = entry;
}
return( retval );
}
bool
gpoly_consistent(PG_FUNCTION_ARGS)
{
GISTENTRY *entry = (GISTENTRY*) PG_GETARG_POINTER(0);
POLYGON *query = (POLYGON*)PG_DETOAST_DATUM( PG_GETARG_POINTER(1) );
StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
bool result;
/*
** if entry is not leaf, use gbox_internal_consistent,
** else use gbox_leaf_consistent
*/
if ( ! (DatumGetPointer(entry->key) != NULL && query) )
return FALSE;
result = rtree_internal_consistent((BOX*)&( ((POLYKEY *) DatumGetPointer(entry->key))->key ),
&(query->boundbox), strategy);
PG_FREE_IF_COPY(query,1);
PG_RETURN_BOOL( result );
}
POLYKEY *
gpoly_union(PG_FUNCTION_ARGS)
{
return (POLYKEY*)
DatumGetPointer(rtree_union(
(bytea*) PG_GETARG_POINTER(0),
(int*) PG_GETARG_POINTER(1),
gpoly_binary_union
));
}
float *
gpoly_penalty(PG_FUNCTION_ARGS)
{
return rtree_penalty(
(GISTENTRY*) PG_GETARG_POINTER(0),
(GISTENTRY*) PG_GETARG_POINTER(1),
(float*) PG_GETARG_POINTER(2),
gpoly_binary_union,
size_polykey
);
}
GIST_SPLITVEC *
gpoly_picksplit(PG_FUNCTION_ARGS)
{
return rtree_picksplit(
(bytea*)PG_GETARG_POINTER(0),
(GIST_SPLITVEC*)PG_GETARG_POINTER(1),
sizeof(POLYKEY),
gpoly_binary_union,
gpoly_inter,
size_polykey
);
}
bool *
gpoly_same(PG_FUNCTION_ARGS)
{
POLYKEY *b1 = (POLYKEY*) PG_GETARG_POINTER(0);
POLYKEY *b2 = (POLYKEY*) PG_GETARG_POINTER(1);
bool *result = (bool*) PG_GETARG_POINTER(2);
if ( b1 && b2 )
*result = DatumGetBool( DirectFunctionCall2( box_same,
PointerGetDatum(&(b1->key)),
PointerGetDatum(&(b2->key))) );
else
*result = ( b1==NULL && b2==NULL ) ? TRUE : FALSE;
return(result);
}
/*
** SUPPORT ROUTINES for polygons
*/
Datum
gpoly_inter(PG_FUNCTION_ARGS)
{
POLYKEY *b1 = (POLYKEY*) PG_GETARG_POINTER(0);
POLYKEY *b2 = (POLYKEY*) PG_GETARG_POINTER(1);
Datum interd;
interd = DirectFunctionCall2(rt_box_inter,
PointerGetDatum( &(b1->key) ),
PointerGetDatum( &(b2->key) ));
if (DatumGetPointer(interd) != NULL) {
POLYKEY *tmp = (POLYKEY*) palloc( sizeof(POLYKEY) );
tmp->size = sizeof(POLYKEY);
memcpy( &(tmp->key), DatumGetPointer(interd), sizeof(BOX) );
pfree( DatumGetPointer(interd) );
PG_RETURN_POINTER( tmp );
} else
PG_RETURN_POINTER( NULL );
}
static Datum
gpoly_binary_union(Datum r1, Datum r2, int *sizep)
{
POLYKEY *retval;
if ( ! (DatumGetPointer(r1) != NULL && DatumGetPointer(r2) != NULL) ) {
if ( DatumGetPointer(r1) != NULL ) {
retval = (POLYKEY*)palloc( sizeof(POLYKEY) );
memcpy( (void*)retval, DatumGetPointer(r1), sizeof(POLYKEY) );
*sizep = sizeof(POLYKEY);
} else if ( DatumGetPointer(r2) != NULL ) {
retval = (POLYKEY*)palloc( sizeof(POLYKEY) );
memcpy( (void*)retval, DatumGetPointer(r2), sizeof(POLYKEY) );
*sizep = sizeof(POLYKEY);
} else {
*sizep = 0;
retval = NULL;
}
} else {
BOX *key = (BOX*)DatumGetPointer(
DirectFunctionCall2(
rt_box_union,
PointerGetDatum( &(((POLYKEY*) DatumGetPointer(r1))->key) ),
PointerGetDatum( &(((POLYKEY*) DatumGetPointer(r2))->key) )) );
retval = (POLYKEY*)palloc( sizeof(POLYKEY) );
memcpy( &(retval->key), key, sizeof(BOX) );
pfree( key );
*sizep = retval->size = sizeof(POLYKEY);
}
return PointerGetDatum(retval);
}
static float
size_polykey( Datum pk ) {
if ( DatumGetPointer(pk) != NULL ) {
float size;
DirectFunctionCall2( rt_box_size,
PointerGetDatum( &(((POLYKEY*) DatumGetPointer(pk))->key) ),
PointerGetDatum( &size ) );
return size;
} else
return 0.0;
}
/*
** Common rtree-function (for all ops)
*/
static Datum
rtree_union(bytea *entryvec, int *sizep, BINARY_UNION bu)
{
int numranges, i;
Datum out,
tmp;
numranges = (VARSIZE(entryvec) - VARHDRSZ)/sizeof(GISTENTRY);
tmp = ((GISTENTRY *) VARDATA(entryvec))[0].key;
out = (Datum) 0;
for (i = 1; i < numranges; i++) {
out = (*bu)(tmp,
((GISTENTRY *) VARDATA(entryvec))[i].key,
sizep);
if (i > 1 && DatumGetPointer(tmp) != NULL)
pfree(DatumGetPointer(tmp));
tmp = out;
}
return(out);
}
static float *
rtree_penalty(GISTENTRY *origentry, GISTENTRY *newentry, float *result, BINARY_UNION bu, SIZE_BOX sb)
{
Datum ud;
float tmp1;
int sizep;
ud = (*bu)( origentry->key, newentry->key, &sizep );
tmp1 = (*sb)( ud );
if (DatumGetPointer(ud) != NULL) pfree(DatumGetPointer(ud));
*result = tmp1 - (*sb)( origentry->key );
return(result);
}
/*
** The GiST PickSplit method
** We use Guttman's poly time split algorithm
*/
static GIST_SPLITVEC *
rtree_picksplit(bytea *entryvec, GIST_SPLITVEC *v, int keylen, BINARY_UNION bu, RDF interop, SIZE_BOX sb)
{
OffsetNumber i, j;
Datum datum_alpha, datum_beta;
Datum datum_l, datum_r;
Datum union_d, union_dl, union_dr;
Datum inter_d;
bool firsttime;
float size_alpha, size_beta, size_union, size_inter;
float size_waste, waste;
float size_l, size_r;
int nbytes;
int sizep;
OffsetNumber seed_1 = 0, seed_2 = 0;
OffsetNumber *left, *right;
OffsetNumber maxoff;
maxoff = ((VARSIZE(entryvec) - VARHDRSZ)/sizeof(GISTENTRY)) - 2;
nbytes = (maxoff + 2) * sizeof(OffsetNumber);
v->spl_left = (OffsetNumber *) palloc(nbytes);
v->spl_right = (OffsetNumber *) palloc(nbytes);
firsttime = true;
waste = 0.0;
for (i = FirstOffsetNumber; i < maxoff; i = OffsetNumberNext(i)) {
datum_alpha = ((GISTENTRY *) VARDATA(entryvec))[i].key;
for (j = OffsetNumberNext(i); j <= maxoff; j = OffsetNumberNext(j)) {
datum_beta = ((GISTENTRY *) VARDATA(entryvec))[j].key;
/* compute the wasted space by unioning these guys */
/* size_waste = size_union - size_inter; */
union_d = (*bu)( datum_alpha, datum_beta, &sizep );
if ( DatumGetPointer(union_d) != NULL ) {
size_union = (*sb)(union_d);
pfree(DatumGetPointer(union_d));
} else
size_union = 0.0;
if ( DatumGetPointer(datum_alpha) != NULL &&
DatumGetPointer(datum_beta) != NULL ) {
inter_d = DirectFunctionCall2(interop,
datum_alpha,
datum_beta);
if ( DatumGetPointer(inter_d) != NULL ) {
size_inter = (*sb)(inter_d);
pfree(DatumGetPointer(inter_d));
} else
size_inter = 0.0;
} else
size_inter = 0.0;
size_waste = size_union - size_inter;
/*
* are these a more promising split that what we've
* already seen?
*/
if (size_waste > waste || firsttime) {
waste = size_waste;
seed_1 = i;
seed_2 = j;
firsttime = false;
}
}
}
left = v->spl_left;
v->spl_nleft = 0;
right = v->spl_right;
v->spl_nright = 0;
if ( DatumGetPointer(((GISTENTRY *) VARDATA(entryvec))[seed_1].key) != NULL )
{
datum_l = PointerGetDatum(palloc( keylen ));
memcpy(DatumGetPointer(datum_l),
DatumGetPointer(((GISTENTRY *) VARDATA(entryvec))[seed_1].key),
keylen);
} else
datum_l = (Datum) 0;
size_l = (*sb)( datum_l );
if ( DatumGetPointer(((GISTENTRY *) VARDATA(entryvec))[seed_2].key) != NULL )
{
datum_r = PointerGetDatum(palloc( keylen ));
memcpy(DatumGetPointer(datum_r),
DatumGetPointer(((GISTENTRY *) VARDATA(entryvec))[seed_2].key),
keylen);
} else
datum_r = (Datum) 0;
size_r = (*sb)( datum_r );
/*
* Now split up the regions between the two seeds. An important
* property of this split algorithm is that the split vector v
* has the indices of items to be split in order in its left and
* right vectors. We exploit this property by doing a merge in
* the code that actually splits the page.
*
* For efficiency, we also place the new index tuple in this loop.
* This is handled at the very end, when we have placed all the
* existing tuples and i == maxoff + 1.
*/
maxoff = OffsetNumberNext(maxoff);
for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) {
/*
* If we've already decided where to place this item, just
* put it on the right list. Otherwise, we need to figure
* out which page needs the least enlargement in order to
* store the item.
*/
if (i == seed_1) {
*left++ = i;
v->spl_nleft++;
continue;
} else if (i == seed_2) {
*right++ = i;
v->spl_nright++;
continue;
}
/* okay, which page needs least enlargement? */
datum_alpha = ((GISTENTRY *) VARDATA(entryvec))[i].key;
union_dl = (*bu)( datum_l, datum_alpha, &sizep );
union_dr = (*bu)( datum_r, datum_alpha, &sizep );
size_alpha = (*sb)( union_dl );
size_beta = (*sb)( union_dr );
/* pick which page to add it to */
if (size_alpha - size_l < size_beta - size_r) {
pfree(DatumGetPointer(datum_l));
pfree(DatumGetPointer(union_dr));
datum_l = union_dl;
size_l = size_alpha;
*left++ = i;
v->spl_nleft++;
} else {
pfree(DatumGetPointer(datum_r));
pfree(DatumGetPointer(union_dl));
datum_r = union_dr;
size_r = size_alpha;
*right++ = i;
v->spl_nright++;
}
}
*left = *right = FirstOffsetNumber; /* sentinel value, see dosplit() */
v->spl_ldatum = datum_l;
v->spl_rdatum = datum_r;
return( v );
}
static bool
rtree_internal_consistent(BOX *key,
BOX *query,
StrategyNumber strategy)
{
bool retval;
switch(strategy) {
case RTLeftStrategyNumber:
case RTOverLeftStrategyNumber:
retval = DatumGetBool( DirectFunctionCall2( box_overleft, PointerGetDatum(key), PointerGetDatum(query) ) );
break;
case RTOverlapStrategyNumber:
retval = DatumGetBool( DirectFunctionCall2( box_overlap, PointerGetDatum(key), PointerGetDatum(query) ) );
break;
case RTOverRightStrategyNumber:
case RTRightStrategyNumber:
retval = DatumGetBool( DirectFunctionCall2( box_right, PointerGetDatum(key), PointerGetDatum(query) ) );
break;
case RTSameStrategyNumber:
case RTContainsStrategyNumber:
retval = DatumGetBool( DirectFunctionCall2( box_contain, PointerGetDatum(key), PointerGetDatum(query) ) );
break;
case RTContainedByStrategyNumber:
retval = DatumGetBool( DirectFunctionCall2( box_overlap, PointerGetDatum(key), PointerGetDatum(query) ) );
break;
default:
retval = FALSE;
}
return(retval);
}
/*
** GiST DeCompress methods
** do not do anything.
*/
GISTENTRY *
rtree_decompress(PG_FUNCTION_ARGS)
{
return((GISTENTRY*)PG_GETARG_POINTER(0));
}
begin transaction;
--
--
--
-- BOX ops
--
--
--
-- define the GiST support methods
create function gbox_consistent(opaque,box,int4) returns bool as 'MODULE_PATHNAME' language 'C';
create function gbox_compress(opaque) returns opaque as 'MODULE_PATHNAME' language 'C';
create function rtree_decompress(opaque) returns opaque as 'MODULE_PATHNAME' language 'C';
create function gbox_penalty(opaque,opaque,opaque) returns opaque as 'MODULE_PATHNAME' language 'C';
create function gbox_picksplit(opaque, opaque) returns opaque as 'MODULE_PATHNAME' language 'C';
create function gbox_union(bytea, opaque) returns box as 'MODULE_PATHNAME' language 'C';
create function gbox_same(box, box, opaque) returns opaque as 'MODULE_PATHNAME' language 'C';
-- add a new opclass
--INSERT INTO pg_opclass (opcname, opcdeftype)
-- SELECT 'gist_box_ops', oid
-- FROM pg_type
-- WHERE typname = 'box';
INSERT INTO pg_opclass (opcname, opcdeftype) values ( 'gist_box_ops', 0 );
--SELECT oid, opcname FROM pg_opclass WHERE opcname = 'gist_box_ops';
-- get the comparators for boxes and store them in a tmp table
SELECT o.oid AS opoid, o.oprname
INTO TABLE rt_ops_tmp
FROM pg_operator o, pg_type t
WHERE o.oprleft = t.oid
and t.typname = 'box';
-- using the tmp table, generate the amop entries
-- box_left
INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
SELECT am.oid, opcl.oid, c.opoid, 1
FROM pg_am am, pg_opclass opcl, rt_ops_tmp c
WHERE amname = 'gist' and opcname = 'gist_box_ops'
and c.oprname = '<<';
-- box_overleft
INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
SELECT am.oid, opcl.oid, c.opoid, 2
FROM pg_am am, pg_opclass opcl, rt_ops_tmp c
WHERE amname = 'gist' and opcname = 'gist_box_ops'
and c.oprname = '&<';
-- box_overlap
INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
SELECT am.oid, opcl.oid, c.opoid, 3
FROM pg_am am, pg_opclass opcl, rt_ops_tmp c
WHERE amname = 'gist' and opcname = 'gist_box_ops'
and c.oprname = '&&';
-- box_overright
INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
SELECT am.oid, opcl.oid, c.opoid, 4
FROM pg_am am, pg_opclass opcl, rt_ops_tmp c
WHERE amname = 'gist' and opcname = 'gist_box_ops'
and c.oprname = '&>';
-- box_right
INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
SELECT am.oid, opcl.oid, c.opoid, 5
FROM pg_am am, pg_opclass opcl, rt_ops_tmp c
WHERE amname = 'gist' and opcname = 'gist_box_ops'
and c.oprname = '>>';
-- box_same
INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
SELECT am.oid, opcl.oid, c.opoid, 6
FROM pg_am am, pg_opclass opcl, rt_ops_tmp c
WHERE amname = 'gist' and opcname = 'gist_box_ops'
and c.oprname = '~=';
-- box_contains
INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
SELECT am.oid, opcl.oid, c.opoid, 7
FROM pg_am am, pg_opclass opcl, rt_ops_tmp c
WHERE amname = 'gist' and opcname = 'gist_box_ops'
and c.oprname = '~';
-- box_contained
INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
SELECT am.oid, opcl.oid, c.opoid, 8
FROM pg_am am, pg_opclass opcl, rt_ops_tmp c
WHERE amname = 'gist' and opcname = 'gist_box_ops'
and c.oprname = '@';
DROP table rt_ops_tmp;
-- add the entries to amproc for the support methods
-- note the amprocnum numbers associated with each are specific!
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
SELECT am.oid, opcl.oid, pro.oid, 1
FROM pg_am am, pg_opclass opcl, pg_proc pro
WHERE amname = 'gist' and opcname = 'gist_box_ops'
and proname = 'gbox_consistent';
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
SELECT am.oid, opcl.oid, pro.oid, 2
FROM pg_am am, pg_opclass opcl, pg_proc pro
WHERE amname = 'gist' and opcname = 'gist_box_ops'
and proname = 'gbox_union';
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
SELECT am.oid, opcl.oid, pro.oid, 3
FROM pg_am am, pg_opclass opcl, pg_proc pro
WHERE amname = 'gist' and opcname = 'gist_box_ops'
and proname = 'gbox_compress';
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
SELECT am.oid, opcl.oid, pro.oid, 4
FROM pg_am am, pg_opclass opcl, pg_proc pro
WHERE amname = 'gist' and opcname = 'gist_box_ops'
and proname = 'rtree_decompress';
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
SELECT am.oid, opcl.oid, pro.oid, 5
FROM pg_am am, pg_opclass opcl, pg_proc pro
WHERE amname = 'gist' and opcname = 'gist_box_ops'
and proname = 'gbox_penalty';
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
SELECT am.oid, opcl.oid, pro.oid, 6
FROM pg_am am, pg_opclass opcl, pg_proc pro
WHERE amname = 'gist' and opcname = 'gist_box_ops'
and proname = 'gbox_picksplit';
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
SELECT am.oid, opcl.oid, pro.oid, 7
FROM pg_am am, pg_opclass opcl, pg_proc pro
WHERE amname = 'gist' and opcname = 'gist_box_ops'
and proname = 'gbox_same';
--
--
--
-- POLYGON ops
--
--
--
-- define the GiST support methods
create function gpoly_consistent(opaque,polygon,int4) returns bool as 'MODULE_PATHNAME' language 'C';
create function gpoly_compress(opaque) returns opaque as 'MODULE_PATHNAME' language 'C';
create function gpoly_penalty(opaque,opaque,opaque) returns opaque as 'MODULE_PATHNAME' language 'C';
create function gpoly_picksplit(opaque, opaque) returns opaque as 'MODULE_PATHNAME' language 'C';
create function gpoly_union(bytea, opaque) returns opaque as 'MODULE_PATHNAME' language 'C';
create function gpoly_same(opaque, opaque, opaque) returns opaque as 'MODULE_PATHNAME' language 'C';
-- add a new opclass
--INSERT INTO pg_opclass (opcname, opcdeftype)
-- SELECT 'gist_poly_ops', oid
-- FROM pg_type
-- WHERE typname = 'polygon';
INSERT INTO pg_opclass (opcname, opcdeftype) values ( 'gist_poly_ops', 0 );
--SELECT oid, opcname FROM pg_opclass WHERE opcname = 'gist_poly_ops';
-- get the comparators for polygons and store them in a tmp table
-- hack for 757 (poly_contain_pt) Teodor
SELECT o.oid AS opoid, o.oprname
INTO TABLE rt_ops_tmp
FROM pg_operator o, pg_type t
WHERE o.oprleft = t.oid and o.oid <> 757
and t.typname = 'polygon';
-- using the tmp table, generate the amop entries
-- poly_left
INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
SELECT am.oid, opcl.oid, c.opoid, 1
FROM pg_am am, pg_opclass opcl, rt_ops_tmp c
WHERE amname = 'gist' and opcname = 'gist_poly_ops'
and c.oprname = '<<';
-- poly_overleft
INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
SELECT am.oid, opcl.oid, c.opoid, 2
FROM pg_am am, pg_opclass opcl, rt_ops_tmp c
WHERE amname = 'gist' and opcname = 'gist_poly_ops'
and c.oprname = '&<';
-- poly_overlap
INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
SELECT am.oid, opcl.oid, c.opoid, 3
FROM pg_am am, pg_opclass opcl, rt_ops_tmp c
WHERE amname = 'gist' and opcname = 'gist_poly_ops'
and c.oprname = '&&';
-- poly_overright
INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
SELECT am.oid, opcl.oid, c.opoid, 4
FROM pg_am am, pg_opclass opcl, rt_ops_tmp c
WHERE amname = 'gist' and opcname = 'gist_poly_ops'
and c.oprname = '&>';
-- poly_right
INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
SELECT am.oid, opcl.oid, c.opoid, 5
FROM pg_am am, pg_opclass opcl, rt_ops_tmp c
WHERE amname = 'gist' and opcname = 'gist_poly_ops'
and c.oprname = '>>';
-- poly_same
INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
SELECT am.oid, opcl.oid, c.opoid, 6
FROM pg_am am, pg_opclass opcl, rt_ops_tmp c
WHERE amname = 'gist' and opcname = 'gist_poly_ops'
and c.oprname = '~=';
-- poly_contains
INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
SELECT am.oid, opcl.oid, c.opoid, 7
FROM pg_am am, pg_opclass opcl, rt_ops_tmp c
WHERE amname = 'gist' and opcname = 'gist_poly_ops'
and c.oprname = '~';
-- poly_contained
INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
SELECT am.oid, opcl.oid, c.opoid, 8
FROM pg_am am, pg_opclass opcl, rt_ops_tmp c
WHERE amname = 'gist' and opcname = 'gist_poly_ops'
and c.oprname = '@';
DROP table rt_ops_tmp;
-- add the entries to amproc for the support methods
-- note the amprocnum numbers associated with each are specific!
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
SELECT am.oid, opcl.oid, pro.oid, 1
FROM pg_am am, pg_opclass opcl, pg_proc pro
WHERE amname = 'gist' and opcname = 'gist_poly_ops'
and proname = 'gpoly_consistent';
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
SELECT am.oid, opcl.oid, pro.oid, 2
FROM pg_am am, pg_opclass opcl, pg_proc pro
WHERE amname = 'gist' and opcname = 'gist_poly_ops'
and proname = 'gpoly_union';
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
SELECT am.oid, opcl.oid, pro.oid, 3
FROM pg_am am, pg_opclass opcl, pg_proc pro
WHERE amname = 'gist' and opcname = 'gist_poly_ops'
and proname = 'gpoly_compress';
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
SELECT am.oid, opcl.oid, pro.oid, 4
FROM pg_am am, pg_opclass opcl, pg_proc pro
WHERE amname = 'gist' and opcname = 'gist_poly_ops'
and proname = 'rtree_decompress';
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
SELECT am.oid, opcl.oid, pro.oid, 5
FROM pg_am am, pg_opclass opcl, pg_proc pro
WHERE amname = 'gist' and opcname = 'gist_poly_ops'
and proname = 'gpoly_penalty';
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
SELECT am.oid, opcl.oid, pro.oid, 6
FROM pg_am am, pg_opclass opcl, pg_proc pro
WHERE amname = 'gist' and opcname = 'gist_poly_ops'
and proname = 'gpoly_picksplit';
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
SELECT am.oid, opcl.oid, pro.oid, 7
FROM pg_am am, pg_opclass opcl, pg_proc pro
WHERE amname = 'gist' and opcname = 'gist_poly_ops'
and proname = 'gpoly_same';
end transaction;
--
-- first, define the datatype. Turn off echoing so that expected file
-- does not depend on contents of seg.sql.
--
\set ECHO none
\i rtree_gist.sql
\set ECHO all
create table boxtmp (b box);
\copy boxtmp from 'data/test_box.data'
select count(*) from boxtmp where b && '(1000,1000,0,0)'::box;
create index bix on boxtmp using rtree (b);
select count(*) from boxtmp where b && '(1000,1000,0,0)'::box;
drop index bix;
create index bix on boxtmp using gist (b gist_box_ops);
select count(*) from boxtmp where b && '(1000,1000,0,0)'::box;
create table polytmp (p polygon);
\copy polytmp from 'data/test_box.data'
create index pix on polytmp using rtree (p);
select count(*) from polytmp where p && '(1000,1000),(0,0)'::polygon;
drop index pix;
create index pix on polytmp using gist (p gist_poly_ops) with(islossy);
select count(*) from polytmp where p && '(1000,1000),(0,0)'::polygon;
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册