_ltree_gist.c 11.6 KB
Newer Older
1
/*
B
Bruce Momjian 已提交
2
 * GiST support for ltree[]
3 4 5 6 7 8 9 10 11 12 13
 * Teodor Sigaev <teodor@stack.net>
 */

#include "ltree.h"
#include "access/gist.h"
#include "access/rtree.h"
#include "access/nbtree.h"
#include "utils/array.h"

#include "crc32.h"

B
Bruce Momjian 已提交
14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
PG_FUNCTION_INFO_V1(_ltree_compress);
Datum		_ltree_compress(PG_FUNCTION_ARGS);

PG_FUNCTION_INFO_V1(_ltree_same);
Datum		_ltree_same(PG_FUNCTION_ARGS);

PG_FUNCTION_INFO_V1(_ltree_union);
Datum		_ltree_union(PG_FUNCTION_ARGS);

PG_FUNCTION_INFO_V1(_ltree_penalty);
Datum		_ltree_penalty(PG_FUNCTION_ARGS);

PG_FUNCTION_INFO_V1(_ltree_picksplit);
Datum		_ltree_picksplit(PG_FUNCTION_ARGS);

PG_FUNCTION_INFO_V1(_ltree_consistent);
Datum		_ltree_consistent(PG_FUNCTION_ARGS);
31

32
#define GETENTRY(vec,pos) ((ltree_gist *) DatumGetPointer((vec)->vector[(pos)].key))
33
#define NEXTVAL(x) ( (ltree*)( (char*)(x) + INTALIGN( VARSIZE(x) ) ) )
B
Bruce Momjian 已提交
34
#define SUMBIT(val) (		 \
35 36 37 38 39 40 41
	GETBITBYTE(val,0) + \
	GETBITBYTE(val,1) + \
	GETBITBYTE(val,2) + \
	GETBITBYTE(val,3) + \
	GETBITBYTE(val,4) + \
	GETBITBYTE(val,5) + \
	GETBITBYTE(val,6) + \
B
Bruce Momjian 已提交
42
	GETBITBYTE(val,7)	\
43 44 45 46
)
#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )

static void
B
Bruce Momjian 已提交
47 48 49
hashing(BITVECP sign, ltree * t)
{
	int			tlen = t->numlevel;
50
	ltree_level *cur = LTREE_FIRST(t);
B
Bruce Momjian 已提交
51
	int			hash;
52

B
Bruce Momjian 已提交
53 54 55 56
	while (tlen > 0)
	{
		hash = ltree_crc32_sz(cur->name, cur->len);
		AHASH(sign, hash);
57 58 59 60 61
		cur = LEVEL_NEXT(cur);
		tlen--;
	}
}

B
Bruce Momjian 已提交
62 63 64 65
Datum
_ltree_compress(PG_FUNCTION_ARGS)
{
	GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
66 67
	GISTENTRY  *retval = entry;

B
Bruce Momjian 已提交
68 69 70 71 72 73 74
	if (entry->leafkey)
	{							/* ltree */
		ltree_gist *key;
		ArrayType  *val = DatumGetArrayTypeP(entry->key);
		int4		len = LTG_HDRSIZE + ASIGLEN;
		int			num = ArrayGetNItems(ARR_NDIM(val), ARR_DIMS(val));
		ltree	   *item = (ltree *) ARR_DATA_PTR(val);
75

B
Bruce Momjian 已提交
76
		if (ARR_NDIM(val) != 1)
77
			ereport(ERROR,
B
Bruce Momjian 已提交
78 79
					(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
					 errmsg("array must be one-dimensional")));
80

B
Bruce Momjian 已提交
81
		key = (ltree_gist *) palloc(len);
82 83 84
		key->len = len;
		key->flag = 0;

B
Bruce Momjian 已提交
85 86 87
		MemSet(LTG_SIGN(key), 0, sizeof(ASIGLEN));
		while (num > 0)
		{
88 89 90 91 92
			hashing(LTG_SIGN(key), item);
			num--;
			item = NEXTVAL(item);
		}

B
Bruce Momjian 已提交
93
		if (PointerGetDatum(val) != entry->key)
94 95
			pfree(val);

B
Bruce Momjian 已提交
96
		retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
97
		gistentryinit(*retval, PointerGetDatum(key),
B
Bruce Momjian 已提交
98 99 100
					  entry->rel, entry->page,
					  entry->offset, key->len, FALSE);
	}
B
Bruce Momjian 已提交
101
	else if (!LTG_ISALLTRUE(entry->key))
B
Bruce Momjian 已提交
102 103 104 105
	{
		int4		i,
					len;
		ltree_gist *key;
106

B
Bruce Momjian 已提交
107
		BITVECP		sign = LTG_SIGN(DatumGetPointer(entry->key));
108 109

		ALOOPBYTE(
B
Bruce Momjian 已提交
110
				  if ((sign[i] & 0xff) != 0xff)
B
Bruce Momjian 已提交
111
				  PG_RETURN_POINTER(retval);
112
		);
B
Bruce Momjian 已提交
113 114
		len = LTG_HDRSIZE;
		key = (ltree_gist *) palloc(len);
115 116 117
		key->len = len;
		key->flag = LTG_ALLTRUE;

B
Bruce Momjian 已提交
118
		retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
119
		gistentryinit(*retval, PointerGetDatum(key),
B
Bruce Momjian 已提交
120 121
					  entry->rel, entry->page,
					  entry->offset, key->len, FALSE);
122 123 124 125
	}
	PG_RETURN_POINTER(retval);
}

B
Bruce Momjian 已提交
126 127 128 129 130 131
Datum
_ltree_same(PG_FUNCTION_ARGS)
{
	ltree_gist *a = (ltree_gist *) PG_GETARG_POINTER(0);
	ltree_gist *b = (ltree_gist *) PG_GETARG_POINTER(1);
	bool	   *result = (bool *) PG_GETARG_POINTER(2);
132

B
Bruce Momjian 已提交
133
	if (LTG_ISALLTRUE(a) && LTG_ISALLTRUE(b))
134
		*result = true;
B
Bruce Momjian 已提交
135
	else if (LTG_ISALLTRUE(a))
136
		*result = false;
B
Bruce Momjian 已提交
137
	else if (LTG_ISALLTRUE(b))
138
		*result = false;
B
Bruce Momjian 已提交
139 140 141 142 143 144
	else
	{
		int4		i;
		BITVECP		sa = LTG_SIGN(a),
					sb = LTG_SIGN(b);

145 146
		*result = true;
		ALOOPBYTE(
B
Bruce Momjian 已提交
147 148 149 150 151
				  if (sa[i] != sb[i])
				  {
			*result = false;
			break;
		}
152
		);
B
Bruce Momjian 已提交
153 154
	}
	PG_RETURN_POINTER(result);
155 156
}

B
Bruce Momjian 已提交
157 158 159 160 161
static int4
unionkey(BITVECP sbase, ltree_gist * add)
{
	int4		i;
	BITVECP		sadd = LTG_SIGN(add);
162

B
Bruce Momjian 已提交
163
	if (LTG_ISALLTRUE(add))
164 165 166
		return 1;

	ALOOPBYTE(
B
Bruce Momjian 已提交
167
			  sbase[i] |= sadd[i];
168 169 170 171
	);
	return 0;
}

B
Bruce Momjian 已提交
172 173 174
Datum
_ltree_union(PG_FUNCTION_ARGS)
{
B
Bruce Momjian 已提交
175
	GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
B
Bruce Momjian 已提交
176 177
	int		   *size = (int *) PG_GETARG_POINTER(1);
	ABITVEC		base;
B
Bruce Momjian 已提交
178 179
	int4		i,
				len;
B
Bruce Momjian 已提交
180 181 182 183
	int4		flag = 0;
	ltree_gist *result;

	MemSet((void *) base, 0, sizeof(ABITVEC));
184
	for (i = 0; i < entryvec->n; i++)
B
Bruce Momjian 已提交
185 186 187
	{
		if (unionkey(base, GETENTRY(entryvec, i)))
		{
188 189 190 191 192
			flag = LTG_ALLTRUE;
			break;
		}
	}

B
Bruce Momjian 已提交
193 194
	len = LTG_HDRSIZE + ((flag & LTG_ALLTRUE) ? 0 : ASIGLEN);
	result = (ltree_gist *) palloc(len);
195 196
	*size = result->len = len;
	result->flag = flag;
B
Bruce Momjian 已提交
197 198
	if (!LTG_ISALLTRUE(result))
		memcpy((void *) LTG_SIGN(result), (void *) base, sizeof(ABITVEC));
199

B
Bruce Momjian 已提交
200
	PG_RETURN_POINTER(result);
201 202 203
}

static int4
B
Bruce Momjian 已提交
204 205 206 207 208
sizebitvec(BITVECP sign)
{
	int4		size = 0,
				i;

209
	ALOOPBYTE(
B
Bruce Momjian 已提交
210 211
			  size += SUMBIT(*(char *) sign);
	sign = (BITVECP) (((char *) sign) + 1);
212 213 214 215
	);
	return size;
}

B
Bruce Momjian 已提交
216 217 218 219 220 221
static int
hemdistsign(BITVECP a, BITVECP b)
{
	int			i,
				dist = 0;

B
Bruce Momjian 已提交
222
	ALOOPBIT(
B
Bruce Momjian 已提交
223 224
			 if (GETBIT(a, i) != GETBIT(b, i))
			 dist++;
B
Bruce Momjian 已提交
225 226 227 228 229
	);
	return dist;
}

static int
B
Bruce Momjian 已提交
230 231 232 233 234 235 236 237 238 239 240 241 242
hemdist(ltree_gist * a, ltree_gist * b)
{
	if (LTG_ISALLTRUE(a))
	{
		if (LTG_ISALLTRUE(b))
			return 0;
		else
			return ASIGLENBIT - sizebitvec(LTG_SIGN(b));
	}
	else if (LTG_ISALLTRUE(b))
		return ASIGLENBIT - sizebitvec(LTG_SIGN(a));

	return hemdistsign(LTG_SIGN(a), LTG_SIGN(b));
B
Bruce Momjian 已提交
243 244 245
}


B
Bruce Momjian 已提交
246 247 248 249 250 251
Datum
_ltree_penalty(PG_FUNCTION_ARGS)
{
	ltree_gist *origval = (ltree_gist *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(0))->key);
	ltree_gist *newval = (ltree_gist *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(1))->key);
	float	   *penalty = (float *) PG_GETARG_POINTER(2);
252

B
Bruce Momjian 已提交
253
	*penalty = hemdist(origval, newval);
B
Bruce Momjian 已提交
254
	PG_RETURN_POINTER(penalty);
255 256
}

B
Bruce Momjian 已提交
257 258 259 260
typedef struct
{
	OffsetNumber pos;
	int4		cost;
261 262 263
} SPLITCOST;

static int
B
Bruce Momjian 已提交
264 265 266
comparecost(const void *a, const void *b)
{
	return ((SPLITCOST *) a)->cost - ((SPLITCOST *) b)->cost;
267 268
}

B
Bruce Momjian 已提交
269 270 271
Datum
_ltree_picksplit(PG_FUNCTION_ARGS)
{
B
Bruce Momjian 已提交
272
	GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
B
Bruce Momjian 已提交
273 274 275 276 277
	GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
	OffsetNumber k,
				j;
	ltree_gist *datum_l,
			   *datum_r;
B
Bruce Momjian 已提交
278
	BITVECP		union_l,
B
Bruce Momjian 已提交
279
				union_r;
B
Bruce Momjian 已提交
280 281
	int4		size_alpha,
				size_beta;
B
Bruce Momjian 已提交
282
	int4		size_waste,
B
Bruce Momjian 已提交
283
				waste = -1;
B
Bruce Momjian 已提交
284 285 286 287 288
	int4		nbytes;
	OffsetNumber seed_1 = 0,
				seed_2 = 0;
	OffsetNumber *left,
			   *right;
289
	OffsetNumber maxoff;
B
Bruce Momjian 已提交
290
	BITVECP		ptr;
B
Bruce Momjian 已提交
291 292 293 294
	int			i;
	SPLITCOST  *costvector;
	ltree_gist *_k,
			   *_j;
295

296
	maxoff = entryvec->n - 2;
297 298 299 300
	nbytes = (maxoff + 2) * sizeof(OffsetNumber);
	v->spl_left = (OffsetNumber *) palloc(nbytes);
	v->spl_right = (OffsetNumber *) palloc(nbytes);

B
Bruce Momjian 已提交
301 302
	for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
	{
B
Bruce Momjian 已提交
303
		_k = GETENTRY(entryvec, k);
B
Bruce Momjian 已提交
304 305 306 307 308
		for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
		{
			size_waste = hemdist(_k, GETENTRY(entryvec, j));
			if (size_waste > waste)
			{
309 310 311 312 313 314 315 316 317 318 319 320
				waste = size_waste;
				seed_1 = k;
				seed_2 = j;
			}
		}
	}

	left = v->spl_left;
	v->spl_nleft = 0;
	right = v->spl_right;
	v->spl_nright = 0;

B
Bruce Momjian 已提交
321 322
	if (seed_1 == 0 || seed_2 == 0)
	{
323 324 325 326 327
		seed_1 = 1;
		seed_2 = 2;
	}

	/* form initial .. */
B
Bruce Momjian 已提交
328 329 330 331 332
	if (LTG_ISALLTRUE(GETENTRY(entryvec, seed_1)))
	{
		datum_l = (ltree_gist *) palloc(LTG_HDRSIZE);
		datum_l->len = LTG_HDRSIZE;
		datum_l->flag = LTG_ALLTRUE;
333
	}
B
Bruce Momjian 已提交
334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352
	else
	{
		datum_l = (ltree_gist *) palloc(LTG_HDRSIZE + ASIGLEN);
		datum_l->len = LTG_HDRSIZE + ASIGLEN;
		datum_l->flag = 0;
		memcpy((void *) LTG_SIGN(datum_l), (void *) LTG_SIGN(GETENTRY(entryvec, seed_1)), sizeof(ABITVEC));
	}
	if (LTG_ISALLTRUE(GETENTRY(entryvec, seed_2)))
	{
		datum_r = (ltree_gist *) palloc(LTG_HDRSIZE);
		datum_r->len = LTG_HDRSIZE;
		datum_r->flag = LTG_ALLTRUE;
	}
	else
	{
		datum_r = (ltree_gist *) palloc(LTG_HDRSIZE + ASIGLEN);
		datum_r->len = LTG_HDRSIZE + ASIGLEN;
		datum_r->flag = 0;
		memcpy((void *) LTG_SIGN(datum_r), (void *) LTG_SIGN(GETENTRY(entryvec, seed_2)), sizeof(ABITVEC));
353 354 355 356
	}

	maxoff = OffsetNumberNext(maxoff);
	/* sort before ... */
B
Bruce Momjian 已提交
357 358 359 360 361
	costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
	for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
	{
		costvector[j - 1].pos = j;
		_j = GETENTRY(entryvec, j);
B
Bruce Momjian 已提交
362 363
		size_alpha = hemdist(datum_l, _j);
		size_beta = hemdist(datum_r, _j);
364
		costvector[j - 1].cost = Abs(size_alpha - size_beta);
365
	}
B
Bruce Momjian 已提交
366
	qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
367

B
Bruce Momjian 已提交
368 369 370
	union_l = LTG_SIGN(datum_l);
	union_r = LTG_SIGN(datum_r);

B
Bruce Momjian 已提交
371 372
	for (k = 0; k < maxoff; k++)
	{
373
		j = costvector[k].pos;
B
Bruce Momjian 已提交
374 375
		if (j == seed_1)
		{
376 377 378
			*left++ = j;
			v->spl_nleft++;
			continue;
B
Bruce Momjian 已提交
379 380 381
		}
		else if (j == seed_2)
		{
382 383 384 385
			*right++ = j;
			v->spl_nright++;
			continue;
		}
B
Bruce Momjian 已提交
386
		_j = GETENTRY(entryvec, j);
B
Bruce Momjian 已提交
387 388
		size_alpha = hemdist(datum_l, _j);
		size_beta = hemdist(datum_r, _j);
389

B
Bruce Momjian 已提交
390
		if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.00001))
B
Bruce Momjian 已提交
391
		{
B
Bruce Momjian 已提交
392 393
			if (LTG_ISALLTRUE(datum_l) || LTG_ISALLTRUE(_j))
			{
B
Bruce Momjian 已提交
394
				if (!LTG_ISALLTRUE(datum_l))
B
Bruce Momjian 已提交
395 396 397 398 399
					MemSet((void *) union_l, 0xff, sizeof(ABITVEC));
			}
			else
			{
				ptr = LTG_SIGN(_j);
B
Bruce Momjian 已提交
400
				ALOOPBYTE(
B
Bruce Momjian 已提交
401
						  union_l[i] |= ptr[i];
B
Bruce Momjian 已提交
402
				);
403 404 405
			}
			*left++ = j;
			v->spl_nleft++;
B
Bruce Momjian 已提交
406 407 408
		}
		else
		{
B
Bruce Momjian 已提交
409 410
			if (LTG_ISALLTRUE(datum_r) || LTG_ISALLTRUE(_j))
			{
B
Bruce Momjian 已提交
411
				if (!LTG_ISALLTRUE(datum_r))
B
Bruce Momjian 已提交
412 413 414 415 416
					MemSet((void *) union_r, 0xff, sizeof(ABITVEC));
			}
			else
			{
				ptr = LTG_SIGN(_j);
B
Bruce Momjian 已提交
417
				ALOOPBYTE(
B
Bruce Momjian 已提交
418
						  union_r[i] |= ptr[i];
B
Bruce Momjian 已提交
419
				);
420 421 422 423 424 425 426 427 428 429 430 431
			}
			*right++ = j;
			v->spl_nright++;
		}
	}

	*right = *left = FirstOffsetNumber;
	pfree(costvector);

	v->spl_ldatum = PointerGetDatum(datum_l);
	v->spl_rdatum = PointerGetDatum(datum_r);

B
Bruce Momjian 已提交
432
	PG_RETURN_POINTER(v);
433 434 435
}

static bool
B
Bruce Momjian 已提交
436 437 438 439 440
gist_te(ltree_gist * key, ltree * query)
{
	ltree_level *curq = LTREE_FIRST(query);
	BITVECP		sign = LTG_SIGN(key);
	int			qlen = query->numlevel;
441 442
	unsigned int hv;

B
Bruce Momjian 已提交
443
	if (LTG_ISALLTRUE(key))
444 445
		return true;

B
Bruce Momjian 已提交
446 447 448 449 450
	while (qlen > 0)
	{
		hv = ltree_crc32_sz(curq->name, curq->len);
		if (!GETBIT(sign, AHASHVAL(hv)))
			return false;
451 452 453 454 455 456 457 458
		curq = LEVEL_NEXT(curq);
		qlen--;
	}

	return true;
}

static bool
B
Bruce Momjian 已提交
459 460 461
checkcondition_bit(void *checkval, ITEM * val)
{
	return (FLG_CANLOOKSIGN(val->flag)) ? GETBIT(checkval, AHASHVAL(val->val)) : true;
462 463 464
}

static bool
B
Bruce Momjian 已提交
465 466 467
gist_qtxt(ltree_gist * key, ltxtquery * query)
{
	if (LTG_ISALLTRUE(key))
468
		return true;
B
Bruce Momjian 已提交
469

470
	return ltree_execute(
B
Bruce Momjian 已提交
471 472 473 474
						 GETQUERY(query),
						 (void *) LTG_SIGN(key), false,
						 checkcondition_bit
		);
475 476 477
}

static bool
B
Bruce Momjian 已提交
478 479 480 481 482 483 484
gist_qe(ltree_gist * key, lquery * query)
{
	lquery_level *curq = LQUERY_FIRST(query);
	BITVECP		sign = LTG_SIGN(key);
	int			qlen = query->numlevel;

	if (LTG_ISALLTRUE(key))
485
		return true;
B
Bruce Momjian 已提交
486 487 488 489 490 491 492

	while (qlen > 0)
	{
		if (curq->numvar && LQL_CANLOOKSIGN(curq))
		{
			bool		isexist = false;
			int			vlen = curq->numvar;
493
			lquery_variant *curv = LQL_FIRST(curq);
B
Bruce Momjian 已提交
494 495 496 497 498 499

			while (vlen > 0)
			{
				if (GETBIT(sign, AHASHVAL(curv->val)))
				{
					isexist = true;
500 501 502 503 504
					break;
				}
				curv = LVAR_NEXT(curv);
				vlen--;
			}
B
Bruce Momjian 已提交
505
			if (!isexist)
506 507
				return false;
		}
B
Bruce Momjian 已提交
508

509 510 511 512 513 514 515
		curq = LQL_NEXT(curq);
		qlen--;
	}

	return true;
}

516
static bool
B
Bruce Momjian 已提交
517 518 519 520
_arrq_cons(ltree_gist * key, ArrayType *_query)
{
	lquery	   *query = (lquery *) ARR_DATA_PTR(_query);
	int			num = ArrayGetNItems(ARR_NDIM(_query), ARR_DIMS(_query));
521

B
Bruce Momjian 已提交
522 523
	if (ARR_NDIM(_query) != 1)
		ereport(ERROR,
524 525
				(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
				 errmsg("array must be one-dimensional")));
526

B
Bruce Momjian 已提交
527 528 529 530 531 532 533 534
	while (num > 0)
	{
		if (gist_qe(key, query))
			return true;
		num--;
		query = (lquery *) NEXTVAL(query);
	}
	return false;
535
}
536

B
Bruce Momjian 已提交
537 538 539 540 541 542
Datum
_ltree_consistent(PG_FUNCTION_ARGS)
{
	GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
	char	   *query = (char *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1)));
	ltree_gist *key = (ltree_gist *) DatumGetPointer(entry->key);
543
	StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
B
Bruce Momjian 已提交
544
	bool		res = false;
545

B
Bruce Momjian 已提交
546
#ifndef assert_enabled
547 548
#define assert_enabled 0
#endif
B
Bruce Momjian 已提交
549 550 551

	switch (strategy)
	{
552 553
		case 10:
		case 11:
B
Bruce Momjian 已提交
554
			res = gist_te(key, (ltree *) query);
555 556 557
			break;
		case 12:
		case 13:
B
Bruce Momjian 已提交
558 559
			res = gist_qe(key, (lquery *) query);
			break;
560 561
		case 14:
		case 15:
B
Bruce Momjian 已提交
562 563
			res = gist_qtxt(key, (ltxtquery *) query);
			break;
564 565 566 567
		case 16:
		case 17:
			res = _arrq_cons(key, (ArrayType *) query);
			break;
568
		default:
569 570
			/* internal error */
			elog(ERROR, "unrecognized StrategyNumber: %d", strategy);
571 572 573
	}
	PG_RETURN_BOOL(res);
}