_int_gist.c 12.3 KB
Newer Older
B
Bruce Momjian 已提交
1 2
#include "_int.h"

3
#define GETENTRY(vec,pos) ((ArrayType *) DatumGetPointer((vec)->vector[(pos)].key))
B
Bruce Momjian 已提交
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40

/*
** GiST support methods
*/
PG_FUNCTION_INFO_V1(g_int_consistent);
PG_FUNCTION_INFO_V1(g_int_compress);
PG_FUNCTION_INFO_V1(g_int_decompress);
PG_FUNCTION_INFO_V1(g_int_penalty);
PG_FUNCTION_INFO_V1(g_int_picksplit);
PG_FUNCTION_INFO_V1(g_int_union);
PG_FUNCTION_INFO_V1(g_int_same);

Datum		g_int_consistent(PG_FUNCTION_ARGS);
Datum		g_int_compress(PG_FUNCTION_ARGS);
Datum		g_int_decompress(PG_FUNCTION_ARGS);
Datum		g_int_penalty(PG_FUNCTION_ARGS);
Datum		g_int_picksplit(PG_FUNCTION_ARGS);
Datum		g_int_union(PG_FUNCTION_ARGS);
Datum		g_int_same(PG_FUNCTION_ARGS);


/*
** The GiST Consistent method for _intments
** Should return false if for all data items x below entry,
** the predicate x op query == FALSE, where op is the oper
** corresponding to strategy in the pg_amop table.
*/
Datum
g_int_consistent(PG_FUNCTION_ARGS)
{
	GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
	ArrayType  *query = (ArrayType *) PG_GETARG_POINTER(1);
	StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
	bool		retval;

	if (strategy == BooleanSearchStrategy)
		PG_RETURN_BOOL(execconsistent((QUERYTYPE *) query,
B
Bruce Momjian 已提交
41
								   (ArrayType *) DatumGetPointer(entry->key),
42
					  GIST_LEAF(entry)));
B
Bruce Momjian 已提交
43 44 45 46

	/* XXX are we sure it's safe to scribble on the query object here? */
	/* XXX what about toasted input? */
	/* sort query for fast search, key is already sorted */
47
	CHECKARRVALID(query);
B
Bruce Momjian 已提交
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
	if (ARRISVOID(query))
		PG_RETURN_BOOL(false);
	PREPAREARR(query);

	switch (strategy)
	{
		case RTOverlapStrategyNumber:
			retval = inner_int_overlap((ArrayType *) DatumGetPointer(entry->key),
									   query);
			break;
		case RTSameStrategyNumber:
			if (GIST_LEAF(entry))
				DirectFunctionCall3(
									g_int_same,
									entry->key,
									PointerGetDatum(query),
									PointerGetDatum(&retval)
					);
			else
				retval = inner_int_contains((ArrayType *) DatumGetPointer(entry->key),
											query);
			break;
		case RTContainsStrategyNumber:
			retval = inner_int_contains((ArrayType *) DatumGetPointer(entry->key),
										query);
			break;
		case RTContainedByStrategyNumber:
			if (GIST_LEAF(entry))
				retval = inner_int_contains(query,
B
Bruce Momjian 已提交
77
								  (ArrayType *) DatumGetPointer(entry->key));
B
Bruce Momjian 已提交
78 79 80 81 82 83 84 85 86 87 88
			else
				retval = inner_int_overlap((ArrayType *) DatumGetPointer(entry->key),
										   query);
			break;
		default:
			retval = FALSE;
	}
	PG_RETURN_BOOL(retval);
}

Datum
B
Bruce Momjian 已提交
89 90
g_int_union(PG_FUNCTION_ARGS)
{
B
Bruce Momjian 已提交
91
	GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
B
Bruce Momjian 已提交
92
	int		   *size = (int *) PG_GETARG_POINTER(1);
93
	int4		i,
B
Bruce Momjian 已提交
94
			   *ptr;
95 96
	ArrayType  *res;
	int			totlen = 0;
B
Bruce Momjian 已提交
97

98
	for (i = 0; i < entryvec->n; i++)
99 100 101 102 103 104
	{
		ArrayType *ent = GETENTRY(entryvec, i);

		CHECKARRVALID(ent);
		totlen += ARRNELEMS(ent);
	}
B
Bruce Momjian 已提交
105

B
Bruce Momjian 已提交
106 107
	res = new_intArrayType(totlen);
	ptr = ARRPTR(res);
B
Bruce Momjian 已提交
108

109
	for (i = 0; i < entryvec->n; i++)
B
Bruce Momjian 已提交
110
	{
111 112 113 114 115 116
		ArrayType *ent = GETENTRY(entryvec, i);
		int nel;

		nel = ARRNELEMS(ent);
		memcpy(ptr, ARRPTR(ent), nel * sizeof(int4));
		ptr += nel;
B
Bruce Momjian 已提交
117 118
	}

B
Bruce Momjian 已提交
119 120 121
	QSORT(res, 1);
	res = _int_unique(res);
	*size = VARSIZE(res);
B
Bruce Momjian 已提交
122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
	PG_RETURN_POINTER(res);
}

/*
** GiST Compress and Decompress methods
*/
Datum
g_int_compress(PG_FUNCTION_ARGS)
{
	GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
	GISTENTRY  *retval;
	ArrayType  *r;
	int			len;
	int		   *dr;
	int			i,
				min,
				cand;

	if (entry->leafkey)
	{
		r = (ArrayType *) PG_DETOAST_DATUM_COPY(entry->key);
143
		CHECKARRVALID(r);
B
Bruce Momjian 已提交
144
		PREPAREARR(r);
145 146 147 148 149

		if (ARRNELEMS(r)>= 2 * MAXNUMRANGE)
			elog(NOTICE,"Input array is too big (%d maximum allowed, %d current), use gist__intbig_ops opclass instead",
				2 * MAXNUMRANGE - 1, ARRNELEMS(r)); 
			
B
Bruce Momjian 已提交
150 151
		retval = palloc(sizeof(GISTENTRY));
		gistentryinit(*retval, PointerGetDatum(r),
B
Bruce Momjian 已提交
152
				  entry->rel, entry->page, entry->offset, VARSIZE(r), FALSE);
B
Bruce Momjian 已提交
153 154 155 156

		PG_RETURN_POINTER(retval);
	}

157 158 159
	/* leaf entries never compress one more time, only when entry->leafkey ==true,
           so now we work only with internal keys  */

B
Bruce Momjian 已提交
160
	r = (ArrayType *) PG_DETOAST_DATUM(entry->key);
161
	CHECKARRVALID(r);
162
	if (ARRISVOID(r)) 
B
Bruce Momjian 已提交
163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196
	{
		if (r != (ArrayType *) DatumGetPointer(entry->key))
			pfree(r);
		PG_RETURN_POINTER(entry);
	}

	if ((len = ARRNELEMS(r)) >= 2 * MAXNUMRANGE)
	{							/* compress */
		if (r == (ArrayType *) DatumGetPointer(entry->key))
			r = (ArrayType *) PG_DETOAST_DATUM_COPY(entry->key);
		r = resize_intArrayType(r, 2 * (len));

		dr = ARRPTR(r);

		for (i = len - 1; i >= 0; i--)
			dr[2 * i] = dr[2 * i + 1] = dr[i];

		len *= 2;
		cand = 1;
		while (len > MAXNUMRANGE * 2)
		{
			min = 0x7fffffff;
			for (i = 2; i < len; i += 2)
				if (min > (dr[i] - dr[i - 1]))
				{
					min = (dr[i] - dr[i - 1]);
					cand = i;
				}
			memmove((void *) &dr[cand - 1], (void *) &dr[cand + 1], (len - cand - 1) * sizeof(int));
			len -= 2;
		}
		r = resize_intArrayType(r, len);
		retval = palloc(sizeof(GISTENTRY));
		gistentryinit(*retval, PointerGetDatum(r),
B
Bruce Momjian 已提交
197
				  entry->rel, entry->page, entry->offset, VARSIZE(r), FALSE);
B
Bruce Momjian 已提交
198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221
		PG_RETURN_POINTER(retval);
	}
	else
		PG_RETURN_POINTER(entry);

	PG_RETURN_POINTER(entry);
}

Datum
g_int_decompress(PG_FUNCTION_ARGS)
{
	GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
	GISTENTRY  *retval;
	ArrayType  *r;
	int		   *dr,
				lenr;
	ArrayType  *in;
	int			lenin;
	int		   *din;
	int			i,
				j;

	in = (ArrayType *) PG_DETOAST_DATUM(entry->key);

222
	CHECKARRVALID(in);
B
Bruce Momjian 已提交
223 224 225 226 227
	if (ARRISVOID(in))
		PG_RETURN_POINTER(entry);

	lenin = ARRNELEMS(in);

228
	if (lenin < 2 * MAXNUMRANGE)
B
Bruce Momjian 已提交
229 230 231 232 233
	{							/* not compressed value */
		if (in != (ArrayType *) DatumGetPointer(entry->key))
		{
			retval = palloc(sizeof(GISTENTRY));
			gistentryinit(*retval, PointerGetDatum(in),
B
Bruce Momjian 已提交
234
				 entry->rel, entry->page, entry->offset, VARSIZE(in), FALSE);
B
Bruce Momjian 已提交
235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255

			PG_RETURN_POINTER(retval);
		}
		PG_RETURN_POINTER(entry);
	}

	din = ARRPTR(in);
	lenr = internal_size(din, lenin);

	r = new_intArrayType(lenr);
	dr = ARRPTR(r);

	for (i = 0; i < lenin; i += 2)
		for (j = din[i]; j <= din[i + 1]; j++)
			if ((!i) || *(dr - 1) != j)
				*dr++ = j;

	if (in != (ArrayType *) DatumGetPointer(entry->key))
		pfree(in);
	retval = palloc(sizeof(GISTENTRY));
	gistentryinit(*retval, PointerGetDatum(r),
B
Bruce Momjian 已提交
256
				  entry->rel, entry->page, entry->offset, VARSIZE(r), FALSE);
B
Bruce Momjian 已提交
257 258 259 260 261 262 263 264

	PG_RETURN_POINTER(retval);
}

/*
** The GiST Penalty method for _intments
*/
Datum
B
Bruce Momjian 已提交
265 266 267 268 269
g_int_penalty(PG_FUNCTION_ARGS)
{
	GISTENTRY  *origentry = (GISTENTRY *) PG_GETARG_POINTER(0);
	GISTENTRY  *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
	float	   *result = (float *) PG_GETARG_POINTER(2);
B
Bruce Momjian 已提交
270 271 272 273 274
	ArrayType  *ud;
	float		tmp1,
				tmp2;

	ud = inner_int_union((ArrayType *) DatumGetPointer(origentry->key),
B
Bruce Momjian 已提交
275
						 (ArrayType *) DatumGetPointer(newentry->key));
B
Bruce Momjian 已提交
276 277 278 279 280
	rt__int_size(ud, &tmp1);
	rt__int_size((ArrayType *) DatumGetPointer(origentry->key), &tmp2);
	*result = tmp1 - tmp2;
	pfree(ud);

B
Bruce Momjian 已提交
281
	PG_RETURN_POINTER(result);
B
Bruce Momjian 已提交
282 283 284 285 286 287 288 289 290 291 292 293 294 295
}



Datum
g_int_same(PG_FUNCTION_ARGS)
{
	ArrayType  *a = (ArrayType *) PointerGetDatum(PG_GETARG_POINTER(0));
	ArrayType  *b = (ArrayType *) PointerGetDatum(PG_GETARG_POINTER(1));
	bool	   *result = (bool *) PG_GETARG_POINTER(2);
	int4		n = ARRNELEMS(a);
	int4	   *da,
			   *db;

296 297 298
	CHECKARRVALID(a);
	CHECKARRVALID(b);

B
Bruce Momjian 已提交
299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340
	if (n != ARRNELEMS(b))
	{
		*result = false;
		PG_RETURN_POINTER(result);
	}
	*result = TRUE;
	da = ARRPTR(a);
	db = ARRPTR(b);
	while (n--)
		if (*da++ != *db++)
		{
			*result = FALSE;
			break;
		}

	PG_RETURN_POINTER(result);
}

/*****************************************************************
** Common GiST Method
*****************************************************************/

typedef struct
{
	OffsetNumber pos;
	float		cost;
} SPLITCOST;

static int
comparecost(const void *a, const void *b)
{
	if (((SPLITCOST *) a)->cost == ((SPLITCOST *) b)->cost)
		return 0;
	else
		return (((SPLITCOST *) a)->cost > ((SPLITCOST *) b)->cost) ? 1 : -1;
}

/*
** The GiST PickSplit method for _intments
** We use Guttman's poly time split algorithm
*/
Datum
B
Bruce Momjian 已提交
341 342
g_int_picksplit(PG_FUNCTION_ARGS)
{
B
Bruce Momjian 已提交
343
	GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
B
Bruce Momjian 已提交
344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372
	GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
	OffsetNumber i,
				j;
	ArrayType  *datum_alpha,
			   *datum_beta;
	ArrayType  *datum_l,
			   *datum_r;
	ArrayType  *union_d,
			   *union_dl,
			   *union_dr;
	ArrayType  *inter_d;
	bool		firsttime;
	float		size_alpha,
				size_beta,
				size_union,
				size_inter;
	float		size_waste,
				waste;
	float		size_l,
				size_r;
	int			nbytes;
	OffsetNumber seed_1 = 0,
				seed_2 = 0;
	OffsetNumber *left,
			   *right;
	OffsetNumber maxoff;
	SPLITCOST  *costvector;

#ifdef GIST_DEBUG
373
	elog(DEBUG3, "--------picksplit %d", entryvec->n);
B
Bruce Momjian 已提交
374 375
#endif

376
	maxoff = entryvec->n - 2;
B
Bruce Momjian 已提交
377 378 379 380 381 382 383 384
	nbytes = (maxoff + 2) * sizeof(OffsetNumber);
	v->spl_left = (OffsetNumber *) palloc(nbytes);
	v->spl_right = (OffsetNumber *) palloc(nbytes);

	firsttime = true;
	waste = 0.0;
	for (i = FirstOffsetNumber; i < maxoff; i = OffsetNumberNext(i))
	{
B
Bruce Momjian 已提交
385
		datum_alpha = GETENTRY(entryvec, i);
B
Bruce Momjian 已提交
386 387
		for (j = OffsetNumberNext(i); j <= maxoff; j = OffsetNumberNext(j))
		{
B
Bruce Momjian 已提交
388
			datum_beta = GETENTRY(entryvec, j);
B
Bruce Momjian 已提交
389 390 391 392 393 394 395 396 397 398 399 400 401 402 403

			/* compute the wasted space by unioning these guys */
			/* size_waste = size_union - size_inter; */
			union_d = inner_int_union(datum_alpha, datum_beta);
			rt__int_size(union_d, &size_union);
			inter_d = inner_int_inter(datum_alpha, datum_beta);
			rt__int_size(inter_d, &size_inter);
			size_waste = size_union - size_inter;

			pfree(union_d);

			if (inter_d != (ArrayType *) NULL)
				pfree(inter_d);

			/*
B
Bruce Momjian 已提交
404
			 * are these a more promising split that what we've already seen?
B
Bruce Momjian 已提交
405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426
			 */

			if (size_waste > waste || firsttime)
			{
				waste = size_waste;
				seed_1 = i;
				seed_2 = j;
				firsttime = false;
			}
		}
	}

	left = v->spl_left;
	v->spl_nleft = 0;
	right = v->spl_right;
	v->spl_nright = 0;
	if (seed_1 == 0 || seed_2 == 0)
	{
		seed_1 = 1;
		seed_2 = 2;
	}

B
Bruce Momjian 已提交
427
	datum_alpha = GETENTRY(entryvec, seed_1);
B
Bruce Momjian 已提交
428 429
	datum_l = copy_intArrayType(datum_alpha);
	rt__int_size(datum_l, &size_l);
B
Bruce Momjian 已提交
430
	datum_beta = GETENTRY(entryvec, seed_2);
B
Bruce Momjian 已提交
431 432 433 434 435 436 437 438 439 440 441 442
	datum_r = copy_intArrayType(datum_beta);
	rt__int_size(datum_r, &size_r);

	maxoff = OffsetNumberNext(maxoff);

	/*
	 * sort entries
	 */
	costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
	for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
	{
		costvector[i - 1].pos = i;
B
Bruce Momjian 已提交
443
		datum_alpha = GETENTRY(entryvec, i);
B
Bruce Momjian 已提交
444 445 446 447 448 449
		union_d = inner_int_union(datum_l, datum_alpha);
		rt__int_size(union_d, &size_alpha);
		pfree(union_d);
		union_d = inner_int_union(datum_r, datum_alpha);
		rt__int_size(union_d, &size_beta);
		pfree(union_d);
450
		costvector[i - 1].cost = Abs((size_alpha - size_l) - (size_beta - size_r));
B
Bruce Momjian 已提交
451 452 453 454
	}
	qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);

	/*
B
Bruce Momjian 已提交
455 456 457 458 459
	 * Now split up the regions between the two seeds.	An important property
	 * of this split algorithm is that the split vector v has the indices of
	 * items to be split in order in its left and right vectors.  We exploit
	 * this property by doing a merge in the code that actually splits the
	 * page.
B
Bruce Momjian 已提交
460
	 *
B
Bruce Momjian 已提交
461 462 463
	 * For efficiency, we also place the new index tuple in this loop. This is
	 * handled at the very end, when we have placed all the existing tuples
	 * and i == maxoff + 1.
B
Bruce Momjian 已提交
464 465 466 467 468 469 470 471
	 */


	for (j = 0; j < maxoff; j++)
	{
		i = costvector[j].pos;

		/*
B
Bruce Momjian 已提交
472 473 474
		 * If we've already decided where to place this item, just put it on
		 * the right list.	Otherwise, we need to figure out which page needs
		 * the least enlargement in order to store the item.
B
Bruce Momjian 已提交
475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490
		 */

		if (i == seed_1)
		{
			*left++ = i;
			v->spl_nleft++;
			continue;
		}
		else if (i == seed_2)
		{
			*right++ = i;
			v->spl_nright++;
			continue;
		}

		/* okay, which page needs least enlargement? */
B
Bruce Momjian 已提交
491
		datum_alpha = GETENTRY(entryvec, i);
B
Bruce Momjian 已提交
492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528
		union_dl = inner_int_union(datum_l, datum_alpha);
		union_dr = inner_int_union(datum_r, datum_alpha);
		rt__int_size(union_dl, &size_alpha);
		rt__int_size(union_dr, &size_beta);

		/* pick which page to add it to */
		if (size_alpha - size_l < size_beta - size_r + WISH_F(v->spl_nleft, v->spl_nright, 0.01))
		{
			if (datum_l)
				pfree(datum_l);
			if (union_dr)
				pfree(union_dr);
			datum_l = union_dl;
			size_l = size_alpha;
			*left++ = i;
			v->spl_nleft++;
		}
		else
		{
			if (datum_r)
				pfree(datum_r);
			if (union_dl)
				pfree(union_dl);
			datum_r = union_dr;
			size_r = size_beta;
			*right++ = i;
			v->spl_nright++;
		}
	}
	pfree(costvector);
	*right = *left = FirstOffsetNumber;

	v->spl_ldatum = PointerGetDatum(datum_l);
	v->spl_rdatum = PointerGetDatum(datum_r);

	PG_RETURN_POINTER(v);
}