_int_gist.c 12.0 KB
Newer Older
B
Bruce Momjian 已提交
1 2
#include "_int.h"

3
#define GETENTRY(vec,pos) ((ArrayType *) DatumGetPointer((vec)->vector[(pos)].key))
B
Bruce Momjian 已提交
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40

/*
** GiST support methods
*/
PG_FUNCTION_INFO_V1(g_int_consistent);
PG_FUNCTION_INFO_V1(g_int_compress);
PG_FUNCTION_INFO_V1(g_int_decompress);
PG_FUNCTION_INFO_V1(g_int_penalty);
PG_FUNCTION_INFO_V1(g_int_picksplit);
PG_FUNCTION_INFO_V1(g_int_union);
PG_FUNCTION_INFO_V1(g_int_same);

Datum		g_int_consistent(PG_FUNCTION_ARGS);
Datum		g_int_compress(PG_FUNCTION_ARGS);
Datum		g_int_decompress(PG_FUNCTION_ARGS);
Datum		g_int_penalty(PG_FUNCTION_ARGS);
Datum		g_int_picksplit(PG_FUNCTION_ARGS);
Datum		g_int_union(PG_FUNCTION_ARGS);
Datum		g_int_same(PG_FUNCTION_ARGS);


/*
** The GiST Consistent method for _intments
** Should return false if for all data items x below entry,
** the predicate x op query == FALSE, where op is the oper
** corresponding to strategy in the pg_amop table.
*/
Datum
g_int_consistent(PG_FUNCTION_ARGS)
{
	GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
	ArrayType  *query = (ArrayType *) PG_GETARG_POINTER(1);
	StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
	bool		retval;

	if (strategy == BooleanSearchStrategy)
		PG_RETURN_BOOL(execconsistent((QUERYTYPE *) query,
B
Bruce Momjian 已提交
41 42
								   (ArrayType *) DatumGetPointer(entry->key),
					  ISLEAFKEY((ArrayType *) DatumGetPointer(entry->key))));
B
Bruce Momjian 已提交
43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75

	/* XXX are we sure it's safe to scribble on the query object here? */
	/* XXX what about toasted input? */
	/* sort query for fast search, key is already sorted */
	if (ARRISVOID(query))
		PG_RETURN_BOOL(false);
	PREPAREARR(query);

	switch (strategy)
	{
		case RTOverlapStrategyNumber:
			retval = inner_int_overlap((ArrayType *) DatumGetPointer(entry->key),
									   query);
			break;
		case RTSameStrategyNumber:
			if (GIST_LEAF(entry))
				DirectFunctionCall3(
									g_int_same,
									entry->key,
									PointerGetDatum(query),
									PointerGetDatum(&retval)
					);
			else
				retval = inner_int_contains((ArrayType *) DatumGetPointer(entry->key),
											query);
			break;
		case RTContainsStrategyNumber:
			retval = inner_int_contains((ArrayType *) DatumGetPointer(entry->key),
										query);
			break;
		case RTContainedByStrategyNumber:
			if (GIST_LEAF(entry))
				retval = inner_int_contains(query,
B
Bruce Momjian 已提交
76
								  (ArrayType *) DatumGetPointer(entry->key));
B
Bruce Momjian 已提交
77 78 79 80 81 82 83 84 85 86 87
			else
				retval = inner_int_overlap((ArrayType *) DatumGetPointer(entry->key),
										   query);
			break;
		default:
			retval = FALSE;
	}
	PG_RETURN_BOOL(retval);
}

Datum
B
Bruce Momjian 已提交
88 89
g_int_union(PG_FUNCTION_ARGS)
{
B
Bruce Momjian 已提交
90
	GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
B
Bruce Momjian 已提交
91
	int		   *size = (int *) PG_GETARG_POINTER(1);
92
	int4		i;
B
Bruce Momjian 已提交
93 94 95
	ArrayType  *res;
	int			totlen = 0,
			   *ptr;
B
Bruce Momjian 已提交
96

97
	for (i = 0; i < entryvec->n; i++)
B
Bruce Momjian 已提交
98
		totlen += ARRNELEMS(GETENTRY(entryvec, i));
B
Bruce Momjian 已提交
99

B
Bruce Momjian 已提交
100 101
	res = new_intArrayType(totlen);
	ptr = ARRPTR(res);
B
Bruce Momjian 已提交
102

103
	for (i = 0; i < entryvec->n; i++)
B
Bruce Momjian 已提交
104 105 106
	{
		memcpy(ptr, ARRPTR(GETENTRY(entryvec, i)), ARRNELEMS(GETENTRY(entryvec, i)) * sizeof(int4));
		ptr += ARRNELEMS(GETENTRY(entryvec, i));
B
Bruce Momjian 已提交
107 108
	}

B
Bruce Momjian 已提交
109 110 111
	QSORT(res, 1);
	res = _int_unique(res);
	*size = VARSIZE(res);
B
Bruce Momjian 已提交
112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
	PG_RETURN_POINTER(res);
}

/*
** GiST Compress and Decompress methods
*/
Datum
g_int_compress(PG_FUNCTION_ARGS)
{
	GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
	GISTENTRY  *retval;
	ArrayType  *r;
	int			len;
	int		   *dr;
	int			i,
				min,
				cand;

	if (entry->leafkey)
	{
		r = (ArrayType *) PG_DETOAST_DATUM_COPY(entry->key);
		PREPAREARR(r);
		r->flags |= LEAFKEY;
		retval = palloc(sizeof(GISTENTRY));
		gistentryinit(*retval, PointerGetDatum(r),
B
Bruce Momjian 已提交
137
				  entry->rel, entry->page, entry->offset, VARSIZE(r), FALSE);
B
Bruce Momjian 已提交
138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177

		PG_RETURN_POINTER(retval);
	}

	r = (ArrayType *) PG_DETOAST_DATUM(entry->key);
	if (ISLEAFKEY(r) || ARRISVOID(r))
	{
		if (r != (ArrayType *) DatumGetPointer(entry->key))
			pfree(r);
		PG_RETURN_POINTER(entry);
	}

	if ((len = ARRNELEMS(r)) >= 2 * MAXNUMRANGE)
	{							/* compress */
		if (r == (ArrayType *) DatumGetPointer(entry->key))
			r = (ArrayType *) PG_DETOAST_DATUM_COPY(entry->key);
		r = resize_intArrayType(r, 2 * (len));

		dr = ARRPTR(r);

		for (i = len - 1; i >= 0; i--)
			dr[2 * i] = dr[2 * i + 1] = dr[i];

		len *= 2;
		cand = 1;
		while (len > MAXNUMRANGE * 2)
		{
			min = 0x7fffffff;
			for (i = 2; i < len; i += 2)
				if (min > (dr[i] - dr[i - 1]))
				{
					min = (dr[i] - dr[i - 1]);
					cand = i;
				}
			memmove((void *) &dr[cand - 1], (void *) &dr[cand + 1], (len - cand - 1) * sizeof(int));
			len -= 2;
		}
		r = resize_intArrayType(r, len);
		retval = palloc(sizeof(GISTENTRY));
		gistentryinit(*retval, PointerGetDatum(r),
B
Bruce Momjian 已提交
178
				  entry->rel, entry->page, entry->offset, VARSIZE(r), FALSE);
B
Bruce Momjian 已提交
179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213
		PG_RETURN_POINTER(retval);
	}
	else
		PG_RETURN_POINTER(entry);

	PG_RETURN_POINTER(entry);
}

Datum
g_int_decompress(PG_FUNCTION_ARGS)
{
	GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
	GISTENTRY  *retval;
	ArrayType  *r;
	int		   *dr,
				lenr;
	ArrayType  *in;
	int			lenin;
	int		   *din;
	int			i,
				j;

	in = (ArrayType *) PG_DETOAST_DATUM(entry->key);

	if (ARRISVOID(in))
		PG_RETURN_POINTER(entry);

	lenin = ARRNELEMS(in);

	if (lenin < 2 * MAXNUMRANGE || ISLEAFKEY(in))
	{							/* not compressed value */
		if (in != (ArrayType *) DatumGetPointer(entry->key))
		{
			retval = palloc(sizeof(GISTENTRY));
			gistentryinit(*retval, PointerGetDatum(in),
B
Bruce Momjian 已提交
214
				 entry->rel, entry->page, entry->offset, VARSIZE(in), FALSE);
B
Bruce Momjian 已提交
215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235

			PG_RETURN_POINTER(retval);
		}
		PG_RETURN_POINTER(entry);
	}

	din = ARRPTR(in);
	lenr = internal_size(din, lenin);

	r = new_intArrayType(lenr);
	dr = ARRPTR(r);

	for (i = 0; i < lenin; i += 2)
		for (j = din[i]; j <= din[i + 1]; j++)
			if ((!i) || *(dr - 1) != j)
				*dr++ = j;

	if (in != (ArrayType *) DatumGetPointer(entry->key))
		pfree(in);
	retval = palloc(sizeof(GISTENTRY));
	gistentryinit(*retval, PointerGetDatum(r),
B
Bruce Momjian 已提交
236
				  entry->rel, entry->page, entry->offset, VARSIZE(r), FALSE);
B
Bruce Momjian 已提交
237 238 239 240 241 242 243 244

	PG_RETURN_POINTER(retval);
}

/*
** The GiST Penalty method for _intments
*/
Datum
B
Bruce Momjian 已提交
245 246 247 248 249
g_int_penalty(PG_FUNCTION_ARGS)
{
	GISTENTRY  *origentry = (GISTENTRY *) PG_GETARG_POINTER(0);
	GISTENTRY  *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
	float	   *result = (float *) PG_GETARG_POINTER(2);
B
Bruce Momjian 已提交
250 251 252 253 254
	ArrayType  *ud;
	float		tmp1,
				tmp2;

	ud = inner_int_union((ArrayType *) DatumGetPointer(origentry->key),
B
Bruce Momjian 已提交
255
						 (ArrayType *) DatumGetPointer(newentry->key));
B
Bruce Momjian 已提交
256 257 258 259 260
	rt__int_size(ud, &tmp1);
	rt__int_size((ArrayType *) DatumGetPointer(origentry->key), &tmp2);
	*result = tmp1 - tmp2;
	pfree(ud);

B
Bruce Momjian 已提交
261
	PG_RETURN_POINTER(result);
B
Bruce Momjian 已提交
262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317
}



Datum
g_int_same(PG_FUNCTION_ARGS)
{
	ArrayType  *a = (ArrayType *) PointerGetDatum(PG_GETARG_POINTER(0));
	ArrayType  *b = (ArrayType *) PointerGetDatum(PG_GETARG_POINTER(1));
	bool	   *result = (bool *) PG_GETARG_POINTER(2);
	int4		n = ARRNELEMS(a);
	int4	   *da,
			   *db;

	if (n != ARRNELEMS(b))
	{
		*result = false;
		PG_RETURN_POINTER(result);
	}
	*result = TRUE;
	da = ARRPTR(a);
	db = ARRPTR(b);
	while (n--)
		if (*da++ != *db++)
		{
			*result = FALSE;
			break;
		}

	PG_RETURN_POINTER(result);
}

/*****************************************************************
** Common GiST Method
*****************************************************************/

typedef struct
{
	OffsetNumber pos;
	float		cost;
} SPLITCOST;

static int
comparecost(const void *a, const void *b)
{
	if (((SPLITCOST *) a)->cost == ((SPLITCOST *) b)->cost)
		return 0;
	else
		return (((SPLITCOST *) a)->cost > ((SPLITCOST *) b)->cost) ? 1 : -1;
}

/*
** The GiST PickSplit method for _intments
** We use Guttman's poly time split algorithm
*/
Datum
B
Bruce Momjian 已提交
318 319
g_int_picksplit(PG_FUNCTION_ARGS)
{
B
Bruce Momjian 已提交
320
	GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
B
Bruce Momjian 已提交
321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349
	GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
	OffsetNumber i,
				j;
	ArrayType  *datum_alpha,
			   *datum_beta;
	ArrayType  *datum_l,
			   *datum_r;
	ArrayType  *union_d,
			   *union_dl,
			   *union_dr;
	ArrayType  *inter_d;
	bool		firsttime;
	float		size_alpha,
				size_beta,
				size_union,
				size_inter;
	float		size_waste,
				waste;
	float		size_l,
				size_r;
	int			nbytes;
	OffsetNumber seed_1 = 0,
				seed_2 = 0;
	OffsetNumber *left,
			   *right;
	OffsetNumber maxoff;
	SPLITCOST  *costvector;

#ifdef GIST_DEBUG
350
	elog(DEBUG3, "--------picksplit %d", entryvec->n);
B
Bruce Momjian 已提交
351 352
#endif

353
	maxoff = entryvec->n - 2;
B
Bruce Momjian 已提交
354 355 356 357 358 359 360 361
	nbytes = (maxoff + 2) * sizeof(OffsetNumber);
	v->spl_left = (OffsetNumber *) palloc(nbytes);
	v->spl_right = (OffsetNumber *) palloc(nbytes);

	firsttime = true;
	waste = 0.0;
	for (i = FirstOffsetNumber; i < maxoff; i = OffsetNumberNext(i))
	{
B
Bruce Momjian 已提交
362
		datum_alpha = GETENTRY(entryvec, i);
B
Bruce Momjian 已提交
363 364
		for (j = OffsetNumberNext(i); j <= maxoff; j = OffsetNumberNext(j))
		{
B
Bruce Momjian 已提交
365
			datum_beta = GETENTRY(entryvec, j);
B
Bruce Momjian 已提交
366 367 368 369 370 371 372 373 374 375 376 377 378 379 380

			/* compute the wasted space by unioning these guys */
			/* size_waste = size_union - size_inter; */
			union_d = inner_int_union(datum_alpha, datum_beta);
			rt__int_size(union_d, &size_union);
			inter_d = inner_int_inter(datum_alpha, datum_beta);
			rt__int_size(inter_d, &size_inter);
			size_waste = size_union - size_inter;

			pfree(union_d);

			if (inter_d != (ArrayType *) NULL)
				pfree(inter_d);

			/*
B
Bruce Momjian 已提交
381
			 * are these a more promising split that what we've already seen?
B
Bruce Momjian 已提交
382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403
			 */

			if (size_waste > waste || firsttime)
			{
				waste = size_waste;
				seed_1 = i;
				seed_2 = j;
				firsttime = false;
			}
		}
	}

	left = v->spl_left;
	v->spl_nleft = 0;
	right = v->spl_right;
	v->spl_nright = 0;
	if (seed_1 == 0 || seed_2 == 0)
	{
		seed_1 = 1;
		seed_2 = 2;
	}

B
Bruce Momjian 已提交
404
	datum_alpha = GETENTRY(entryvec, seed_1);
B
Bruce Momjian 已提交
405 406
	datum_l = copy_intArrayType(datum_alpha);
	rt__int_size(datum_l, &size_l);
B
Bruce Momjian 已提交
407
	datum_beta = GETENTRY(entryvec, seed_2);
B
Bruce Momjian 已提交
408 409 410 411 412 413 414 415 416 417 418 419
	datum_r = copy_intArrayType(datum_beta);
	rt__int_size(datum_r, &size_r);

	maxoff = OffsetNumberNext(maxoff);

	/*
	 * sort entries
	 */
	costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
	for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
	{
		costvector[i - 1].pos = i;
B
Bruce Momjian 已提交
420
		datum_alpha = GETENTRY(entryvec, i);
B
Bruce Momjian 已提交
421 422 423 424 425 426
		union_d = inner_int_union(datum_l, datum_alpha);
		rt__int_size(union_d, &size_alpha);
		pfree(union_d);
		union_d = inner_int_union(datum_r, datum_alpha);
		rt__int_size(union_d, &size_beta);
		pfree(union_d);
427
		costvector[i - 1].cost = Abs((size_alpha - size_l) - (size_beta - size_r));
B
Bruce Momjian 已提交
428 429 430 431
	}
	qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);

	/*
B
Bruce Momjian 已提交
432 433 434 435 436
	 * Now split up the regions between the two seeds.	An important property
	 * of this split algorithm is that the split vector v has the indices of
	 * items to be split in order in its left and right vectors.  We exploit
	 * this property by doing a merge in the code that actually splits the
	 * page.
B
Bruce Momjian 已提交
437
	 *
B
Bruce Momjian 已提交
438 439 440
	 * For efficiency, we also place the new index tuple in this loop. This is
	 * handled at the very end, when we have placed all the existing tuples
	 * and i == maxoff + 1.
B
Bruce Momjian 已提交
441 442 443 444 445 446 447 448
	 */


	for (j = 0; j < maxoff; j++)
	{
		i = costvector[j].pos;

		/*
B
Bruce Momjian 已提交
449 450 451
		 * If we've already decided where to place this item, just put it on
		 * the right list.	Otherwise, we need to figure out which page needs
		 * the least enlargement in order to store the item.
B
Bruce Momjian 已提交
452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467
		 */

		if (i == seed_1)
		{
			*left++ = i;
			v->spl_nleft++;
			continue;
		}
		else if (i == seed_2)
		{
			*right++ = i;
			v->spl_nright++;
			continue;
		}

		/* okay, which page needs least enlargement? */
B
Bruce Momjian 已提交
468
		datum_alpha = GETENTRY(entryvec, i);
B
Bruce Momjian 已提交
469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507
		union_dl = inner_int_union(datum_l, datum_alpha);
		union_dr = inner_int_union(datum_r, datum_alpha);
		rt__int_size(union_dl, &size_alpha);
		rt__int_size(union_dr, &size_beta);

		/* pick which page to add it to */
		if (size_alpha - size_l < size_beta - size_r + WISH_F(v->spl_nleft, v->spl_nright, 0.01))
		{
			if (datum_l)
				pfree(datum_l);
			if (union_dr)
				pfree(union_dr);
			datum_l = union_dl;
			size_l = size_alpha;
			*left++ = i;
			v->spl_nleft++;
		}
		else
		{
			if (datum_r)
				pfree(datum_r);
			if (union_dl)
				pfree(union_dl);
			datum_r = union_dr;
			size_r = size_beta;
			*right++ = i;
			v->spl_nright++;
		}
	}
	pfree(costvector);
	*right = *left = FirstOffsetNumber;

	datum_l->flags &= ~LEAFKEY;
	datum_r->flags &= ~LEAFKEY;
	v->spl_ldatum = PointerGetDatum(datum_l);
	v->spl_rdatum = PointerGetDatum(datum_r);

	PG_RETURN_POINTER(v);
}