nodeHashjoin.c 19.9 KB
Newer Older
1 2
/*-------------------------------------------------------------------------
 *
3
 * nodeHashjoin.c
4
 *	  Routines to handle hash join nodes
5
 *
6
 * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
B
Add:  
Bruce Momjian 已提交
7
 * Portions Copyright (c) 1994, Regents of the University of California
8 9 10
 *
 *
 * IDENTIFICATION
11
 *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeHashjoin.c,v 1.35 2001/01/24 19:42:54 momjian Exp $
12 13 14
 *
 *-------------------------------------------------------------------------
 */
15
#include <sys/types.h>
16

B
Bruce Momjian 已提交
17
#include "postgres.h"
18 19

#include "executor/executor.h"
20 21
#include "executor/nodeHash.h"
#include "executor/nodeHashjoin.h"
B
Bruce Momjian 已提交
22
#include "optimizer/clauses.h"
23 24
#include "utils/memutils.h"

25

26
static TupleTableSlot *ExecHashJoinOuterGetTuple(Plan *node, Plan *parent,
B
Bruce Momjian 已提交
27
						  HashJoinState *hjstate);
28
static TupleTableSlot *ExecHashJoinGetSavedTuple(HashJoinState *hjstate,
29
						  BufFile *file,
B
Bruce Momjian 已提交
30
						  TupleTableSlot *tupleSlot);
31
static int	ExecHashJoinGetBatch(int bucketno, HashJoinTable hashtable);
32
static int	ExecHashJoinNewBatch(HashJoinState *hjstate);
33 34


35
/* ----------------------------------------------------------------
36
 *		ExecHashJoin
37
 *
38 39 40 41
 *		This function implements the Hybrid Hashjoin algorithm.
 *		recursive partitioning remains to be added.
 *		Note: the relation we build hash table on is the inner
 *			  the other one is outer.
42 43
 * ----------------------------------------------------------------
 */
44
TupleTableSlot *				/* return: a tuple or NULL */
45
ExecHashJoin(HashJoin *node)
46
{
47 48 49 50 51 52
	HashJoinState *hjstate;
	EState	   *estate;
	Plan	   *outerNode;
	Hash	   *hashNode;
	List	   *hjclauses;
	Expr	   *clause;
53 54
	List	   *joinqual;
	List	   *otherqual;
55
	ScanDirection dir;
56
	TupleTableSlot *inntuple;
57
	Node	   *outerVar;
58
	ExprContext *econtext;
59
	ExprDoneCond isDone;
60 61
	HashJoinTable hashtable;
	HeapTuple	curtuple;
62 63
	TupleTableSlot *outerTupleSlot;
	TupleTableSlot *innerTupleSlot;
64 65
	int			i;
	bool		hashPhaseDone;
66 67 68 69 70 71 72 73

	/* ----------------
	 *	get information from HashJoin node
	 * ----------------
	 */
	hjstate = node->hashjoinstate;
	hjclauses = node->hashclauses;
	clause = lfirst(hjclauses);
74 75 76
	estate = node->join.plan.state;
	joinqual = node->join.joinqual;
	otherqual = node->join.plan.qual;
77 78
	hashNode = (Hash *) innerPlan(node);
	outerNode = outerPlan(node);
79
	hashPhaseDone = hjstate->hj_hashdone;
80 81
	dir = estate->es_direction;

82
	/* -----------------
83
	 * get information from HashJoin state
84 85
	 * -----------------
	 */
86
	hashtable = hjstate->hj_HashTable;
87
	econtext = hjstate->jstate.cs_ExprContext;
88

89 90 91 92 93 94
	/* ----------------
	 *	Check to see if we're still projecting out tuples from a previous
	 *	join tuple (because there is a function-returning-set in the
	 *	projection expressions).  If so, try to project another one.
	 * ----------------
	 */
95 96 97 98 99
	if (hjstate->jstate.cs_TupFromTlist)
	{
		TupleTableSlot *result;

		result = ExecProject(hjstate->jstate.cs_ProjInfo, &isDone);
100
		if (isDone == ExprMultipleResult)
101
			return result;
102 103
		/* Done with that source tuple... */
		hjstate->jstate.cs_TupFromTlist = false;
104
	}
105

106 107 108 109 110 111 112 113
	/* ----------------
	 *	Reset per-tuple memory context to free any expression evaluation
	 *	storage allocated in the previous tuple cycle.  Note this can't
	 *	happen until we're done projecting out tuples from a join tuple.
	 * ----------------
	 */
	ResetExprContext(econtext);

114
	/* ----------------
115
	 *	if this is the first call, build the hash table for inner relation
116 117
	 * ----------------
	 */
118 119 120 121 122 123 124 125 126 127
	if (!hashPhaseDone)
	{							/* if the hash phase not completed */
		if (hashtable == NULL)
		{						/* if the hash table has not been created */
			/* ----------------
			 * create the hash table
			 * ----------------
			 */
			hashtable = ExecHashTableCreate(hashNode);
			hjstate->hj_HashTable = hashtable;
128
			hjstate->hj_InnerHashKey = hashNode->hashkey;
129 130 131 132 133

			/* ----------------
			 * execute the Hash node, to build the hash table
			 * ----------------
			 */
134
			hashNode->hashstate->hashtable = hashtable;
135 136
			innerTupleSlot = ExecProcNode((Plan *) hashNode, (Plan *) node);
		}
137
		hjstate->hj_hashdone = true;
138 139 140 141
		/* ----------------
		 * Open temp files for outer batches, if needed.
		 * Note that file buffers are palloc'd in regular executor context.
		 * ----------------
142
		 */
143
		for (i = 0; i < hashtable->nbatch; i++)
144
			hashtable->outerBatchFile[i] = BufFileCreateTemp();
145
	}
146 147
	else if (hashtable == NULL)
		return NULL;
148

149
	/* ----------------
150
	 *	Now get an outer tuple and probe into the hash table for matches
151 152
	 * ----------------
	 */
153
	outerTupleSlot = hjstate->jstate.cs_OuterTupleSlot;
154
	outerVar = (Node *) get_leftop(clause);
155

156
	for (;;)
157
	{
158
		/*
159
		 * If we don't have an outer tuple, get the next one
160
		 */
161
		if (hjstate->hj_NeedNewOuter)
162
		{
163 164 165 166
			outerTupleSlot = ExecHashJoinOuterGetTuple(outerNode,
													   (Plan *) node,
													   hjstate);
			if (TupIsNull(outerTupleSlot))
167
			{
B
Bruce Momjian 已提交
168

169
				/*
170
				 * when the last batch runs out, clean up and exit
171 172 173 174 175 176
				 */
				ExecHashTableDestroy(hashtable);
				hjstate->hj_HashTable = NULL;
				return NULL;
			}

177 178 179 180 181
			hjstate->jstate.cs_OuterTupleSlot = outerTupleSlot;
			econtext->ecxt_outertuple = outerTupleSlot;
			hjstate->hj_NeedNewOuter = false;
			hjstate->hj_MatchedOuter = false;

182
			/*
B
Bruce Momjian 已提交
183 184
			 * now we have an outer tuple, find the corresponding bucket
			 * for this tuple from the hash table
185
			 */
186 187 188
			hjstate->hj_CurBucketNo = ExecHashGetBucket(hashtable, econtext,
														outerVar);
			hjstate->hj_CurTuple = NULL;
189

190 191 192 193 194 195 196 197
			/* ----------------
			 *	Now we've got an outer tuple and the corresponding hash bucket,
			 *	but this tuple may not belong to the current batch.
			 *	This need only be checked in the first pass.
			 * ----------------
			 */
			if (hashtable->curbatch == 0)
			{
B
Bruce Momjian 已提交
198 199 200
				int			batch = ExecHashJoinGetBatch(hjstate->hj_CurBucketNo,
														 hashtable);

201
				if (batch > 0)
202
				{
B
Bruce Momjian 已提交
203

204
					/*
205 206
					 * Need to postpone this outer tuple to a later batch.
					 * Save it in the corresponding outer-batch file.
207
					 */
B
Bruce Momjian 已提交
208 209
					int			batchno = batch - 1;

210 211
					hashtable->outerBatchSize[batchno]++;
					ExecHashJoinSaveTuple(outerTupleSlot->val,
B
Bruce Momjian 已提交
212
									 hashtable->outerBatchFile[batchno]);
213
					hjstate->hj_NeedNewOuter = true;
B
Bruce Momjian 已提交
214
					continue;	/* loop around for a new outer tuple */
215 216 217 218
				}
			}
		}

219 220
		/*
		 * OK, scan the selected hash bucket for matches
221
		 */
222
		for (;;)
223
		{
224 225 226 227 228
			curtuple = ExecScanHashBucket(hjstate,
										  hjclauses,
										  econtext);
			if (curtuple == NULL)
				break;			/* out of matches */
B
Bruce Momjian 已提交
229

230
			/*
231
			 * we've got a match, but still need to test non-hashed quals
232
			 */
233 234 235 236 237
			inntuple = ExecStoreTuple(curtuple,
									  hjstate->hj_HashTupleSlot,
									  InvalidBuffer,
									  false);	/* don't pfree this tuple */
			econtext->ecxt_innertuple = inntuple;
238

239
			/* reset temp memory each time to avoid leaks from qual expr */
240 241
			ResetExprContext(econtext);

242 243 244 245
			/* ----------------
			 * if we pass the qual, then save state for next call and
			 * have ExecProject form the projection, store it
			 * in the tuple table, and return the slot.
246 247 248
			 *
			 * Only the joinquals determine MatchedOuter status,
			 * but all quals must pass to actually return the tuple.
249 250
			 * ----------------
			 */
251
			if (ExecQual(joinqual, econtext, false))
252
			{
253
				hjstate->hj_MatchedOuter = true;
254

255
				if (otherqual == NIL || ExecQual(otherqual, econtext, false))
256
				{
257 258 259 260 261 262 263 264 265 266
					TupleTableSlot *result;

					result = ExecProject(hjstate->jstate.cs_ProjInfo, &isDone);

					if (isDone != ExprEndResult)
					{
						hjstate->jstate.cs_TupFromTlist =
							(isDone == ExprMultipleResult);
						return result;
					}
267
				}
268 269 270 271
			}
		}

		/* ----------------
272
		 *	 Now the current outer tuple has run out of matches,
273 274
		 *	 so check whether to emit a dummy outer-join tuple.
		 *	 If not, loop around to get a new outer tuple.
275 276
		 * ----------------
		 */
277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309
		hjstate->hj_NeedNewOuter = true;

		if (! hjstate->hj_MatchedOuter &&
			node->join.jointype == JOIN_LEFT)
		{
			/*
			 * We are doing an outer join and there were no join matches
			 * for this outer tuple.  Generate a fake join tuple with
			 * nulls for the inner tuple, and return it if it passes
			 * the non-join quals.
			 */
			econtext->ecxt_innertuple = hjstate->hj_NullInnerTupleSlot;

			if (ExecQual(otherqual, econtext, false))
			{
				/* ----------------
				 *	qualification was satisfied so we project and
				 *	return the slot containing the result tuple
				 *	using ExecProject().
				 * ----------------
				 */
				TupleTableSlot *result;

				result = ExecProject(hjstate->jstate.cs_ProjInfo, &isDone);

				if (isDone != ExprEndResult)
				{
					hjstate->jstate.cs_TupFromTlist =
						(isDone == ExprMultipleResult);
					return result;
				}
			}
		}
310
	}
311 312 313
}

/* ----------------------------------------------------------------
314
 *		ExecInitHashJoin
315
 *
316
 *		Init routine for HashJoin node.
317 318
 * ----------------------------------------------------------------
 */
319
bool							/* return: initialization status */
320
ExecInitHashJoin(HashJoin *node, EState *estate, Plan *parent)
321
{
322 323 324
	HashJoinState *hjstate;
	Plan	   *outerNode;
	Hash	   *hashNode;
325 326 327 328 329

	/* ----------------
	 *	assign the node's execution state
	 * ----------------
	 */
330
	node->join.plan.state = estate;
331 332 333 334 335 336 337 338 339

	/* ----------------
	 * create state structure
	 * ----------------
	 */
	hjstate = makeNode(HashJoinState);
	node->hashjoinstate = hjstate;

	/* ----------------
340
	 *	Miscellaneous initialization
341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356
	 *
	 *		 +	create expression context for node
	 * ----------------
	 */
	ExecAssignExprContext(estate, &hjstate->jstate);

	/* ----------------
	 * initializes child nodes
	 * ----------------
	 */
	outerNode = outerPlan((Plan *) node);
	hashNode = (Hash *) innerPlan((Plan *) node);

	ExecInitNode(outerNode, estate, (Plan *) node);
	ExecInitNode((Plan *) hashNode, estate, (Plan *) node);

357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378
#define HASHJOIN_NSLOTS 3
	/* ----------------
	 *	tuple table initialization
	 * ----------------
	 */
	ExecInitResultTupleSlot(estate, &hjstate->jstate);
	hjstate->hj_OuterTupleSlot = ExecInitExtraTupleSlot(estate);

	switch (node->join.jointype)
	{
		case JOIN_INNER:
			break;
		case JOIN_LEFT:
			hjstate->hj_NullInnerTupleSlot =
				ExecInitNullTupleSlot(estate,
									  ExecGetTupType((Plan *) hashNode));
			break;
		default:
			elog(ERROR, "ExecInitHashJoin: unsupported join type %d",
				 (int) node->join.jointype);
	}

379 380 381 382 383 384 385 386 387 388
	/* ----------------
	 *	now for some voodoo.  our temporary tuple slot
	 *	is actually the result tuple slot of the Hash node
	 *	(which is our inner plan).	we do this because Hash
	 *	nodes don't return tuples via ExecProcNode() -- instead
	 *	the hash join node uses ExecScanHashBucket() to get
	 *	at the contents of the hash table.	-cim 6/9/91
	 * ----------------
	 */
	{
389
		HashState  *hashstate = hashNode->hashstate;
390
		TupleTableSlot *slot = hashstate->cstate.cs_ResultTupleSlot;
391 392 393 394 395 396 397 398 399 400 401

		hjstate->hj_HashTupleSlot = slot;
	}

	/* ----------------
	 *	initialize tuple type and projection info
	 * ----------------
	 */
	ExecAssignResultTypeFromTL((Plan *) node, &hjstate->jstate);
	ExecAssignProjectionInfo((Plan *) node, &hjstate->jstate);

402 403 404
	ExecSetSlotDescriptor(hjstate->hj_OuterTupleSlot,
						  ExecGetTupType(outerNode));

405
	/* ----------------
406
	 *	initialize hash-specific info
407 408 409
	 * ----------------
	 */

410
	hjstate->hj_hashdone = false;
411 412

	hjstate->hj_HashTable = (HashJoinTable) NULL;
413 414
	hjstate->hj_CurBucketNo = 0;
	hjstate->hj_CurTuple = (HashJoinTuple) NULL;
415
	hjstate->hj_InnerHashKey = (Node *) NULL;
416

417
	hjstate->jstate.cs_OuterTupleSlot = NULL;
418
	hjstate->jstate.cs_TupFromTlist = false;
419 420
	hjstate->hj_NeedNewOuter = true;
	hjstate->hj_MatchedOuter = false;
421 422

	return TRUE;
423 424 425
}

int
426
ExecCountSlotsHashJoin(HashJoin *node)
427
{
428
	return ExecCountSlotsNode(outerPlan(node)) +
429
	ExecCountSlotsNode(innerPlan(node)) +
430
	HASHJOIN_NSLOTS;
431 432 433
}

/* ----------------------------------------------------------------
434
 *		ExecEndHashJoin
435
 *
436
 *		clean up routine for HashJoin node
437 438 439
 * ----------------------------------------------------------------
 */
void
440
ExecEndHashJoin(HashJoin *node)
441
{
442
	HashJoinState *hjstate;
443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469

	/* ----------------
	 *	get info from the HashJoin state
	 * ----------------
	 */
	hjstate = node->hashjoinstate;

	/* ----------------
	 * free hash table in case we end plan before all tuples are retrieved
	 * ---------------
	 */
	if (hjstate->hj_HashTable)
	{
		ExecHashTableDestroy(hjstate->hj_HashTable);
		hjstate->hj_HashTable = NULL;
	}

	/* ----------------
	 *	Free the projection info and the scan attribute info
	 *
	 *	Note: we don't ExecFreeResultType(hjstate)
	 *		  because the rule manager depends on the tupType
	 *		  returned by ExecMain().  So for now, this
	 *		  is freed at end-transaction time.  -cim 6/2/91
	 * ----------------
	 */
	ExecFreeProjectionInfo(&hjstate->jstate);
470
	ExecFreeExprContext(&hjstate->jstate);
471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486

	/* ----------------
	 * clean up subtrees
	 * ----------------
	 */
	ExecEndNode(outerPlan((Plan *) node), (Plan *) node);
	ExecEndNode(innerPlan((Plan *) node), (Plan *) node);

	/* ----------------
	 *	clean out the tuple table
	 * ----------------
	 */
	ExecClearTuple(hjstate->jstate.cs_ResultTupleSlot);
	ExecClearTuple(hjstate->hj_OuterTupleSlot);
	ExecClearTuple(hjstate->hj_HashTupleSlot);

487 488 489
}

/* ----------------------------------------------------------------
490
 *		ExecHashJoinOuterGetTuple
491
 *
492 493 494
 *		get the next outer tuple for hashjoin: either by
 *		executing a plan node as in the first pass, or from
 *		the tmp files for the hashjoin batches.
495 496 497 498
 * ----------------------------------------------------------------
 */

static TupleTableSlot *
499
ExecHashJoinOuterGetTuple(Plan *node, Plan *parent, HashJoinState *hjstate)
500
{
B
Bruce Momjian 已提交
501 502
	HashJoinTable hashtable = hjstate->hj_HashTable;
	int			curbatch = hashtable->curbatch;
503 504 505 506 507
	TupleTableSlot *slot;

	if (curbatch == 0)
	{							/* if it is the first pass */
		slot = ExecProcNode(node, parent);
B
Bruce Momjian 已提交
508
		if (!TupIsNull(slot))
509
			return slot;
B
Bruce Momjian 已提交
510

511
		/*
B
Bruce Momjian 已提交
512 513
		 * We have just reached the end of the first pass. Try to switch
		 * to a saved batch.
514 515
		 */
		curbatch = ExecHashJoinNewBatch(hjstate);
516 517 518
	}

	/*
B
Bruce Momjian 已提交
519 520
	 * Try to read from a temp file. Loop allows us to advance to new
	 * batch as needed.
521
	 */
522 523 524
	while (curbatch <= hashtable->nbatch)
	{
		slot = ExecHashJoinGetSavedTuple(hjstate,
B
Bruce Momjian 已提交
525
								 hashtable->outerBatchFile[curbatch - 1],
526
										 hjstate->hj_OuterTupleSlot);
B
Bruce Momjian 已提交
527
		if (!TupIsNull(slot))
528 529 530 531 532 533
			return slot;
		curbatch = ExecHashJoinNewBatch(hjstate);
	}

	/* Out of batches... */
	return NULL;
534 535 536
}

/* ----------------------------------------------------------------
537
 *		ExecHashJoinGetSavedTuple
538
 *
539
 *		read the next tuple from a tmp file
540 541 542 543
 * ----------------------------------------------------------------
 */

static TupleTableSlot *
544
ExecHashJoinGetSavedTuple(HashJoinState *hjstate,
545
						  BufFile *file,
546
						  TupleTableSlot *tupleSlot)
547
{
B
Bruce Momjian 已提交
548 549 550
	HeapTupleData htup;
	size_t		nread;
	HeapTuple	heapTuple;
551 552 553 554 555 556 557 558

	nread = BufFileRead(file, (void *) &htup, sizeof(HeapTupleData));
	if (nread == 0)
		return NULL;			/* end of file */
	if (nread != sizeof(HeapTupleData))
		elog(ERROR, "Read from hashjoin temp file failed");
	heapTuple = palloc(HEAPTUPLESIZE + htup.t_len);
	memcpy((char *) heapTuple, (char *) &htup, sizeof(HeapTupleData));
559
	heapTuple->t_datamcxt = CurrentMemoryContext;
B
Bruce Momjian 已提交
560 561
	heapTuple->t_data = (HeapTupleHeader)
		((char *) heapTuple + HEAPTUPLESIZE);
562 563 564 565
	nread = BufFileRead(file, (void *) heapTuple->t_data, htup.t_len);
	if (nread != (size_t) htup.t_len)
		elog(ERROR, "Read from hashjoin temp file failed");
	return ExecStoreTuple(heapTuple, tupleSlot, InvalidBuffer, true);
566 567 568
}

/* ----------------------------------------------------------------
569
 *		ExecHashJoinNewBatch
570
 *
571
 *		switch to a new hashjoin batch
572 573
 * ----------------------------------------------------------------
 */
574
static int
575
ExecHashJoinNewBatch(HashJoinState *hjstate)
576
{
577 578 579 580 581
	HashJoinTable hashtable = hjstate->hj_HashTable;
	int			nbatch = hashtable->nbatch;
	int			newbatch = hashtable->curbatch + 1;
	long	   *innerBatchSize = hashtable->innerBatchSize;
	long	   *outerBatchSize = hashtable->outerBatchSize;
B
Bruce Momjian 已提交
582
	BufFile    *innerFile;
583
	TupleTableSlot *slot;
584
	ExprContext *econtext;
585
	Node	   *innerhashkey;
586 587 588

	if (newbatch > 1)
	{
B
Bruce Momjian 已提交
589

590
		/*
B
Bruce Momjian 已提交
591 592
		 * We no longer need the previous outer batch file; close it right
		 * away to free disk space.
593
		 */
594 595
		BufFileClose(hashtable->outerBatchFile[newbatch - 2]);
		hashtable->outerBatchFile[newbatch - 2] = NULL;
596 597 598
	}

	/* --------------
599 600
	 *	We can skip over any batches that are empty on either side.
	 *	Release associated temp files right away.
601
	 * --------------
602
	 */
603 604 605
	while (newbatch <= nbatch &&
		   (innerBatchSize[newbatch - 1] == 0L ||
			outerBatchSize[newbatch - 1] == 0L))
606
	{
607 608 609 610
		BufFileClose(hashtable->innerBatchFile[newbatch - 1]);
		hashtable->innerBatchFile[newbatch - 1] = NULL;
		BufFileClose(hashtable->outerBatchFile[newbatch - 1]);
		hashtable->outerBatchFile[newbatch - 1] = NULL;
611
		newbatch++;
612
	}
613

614
	if (newbatch > nbatch)
615
		return newbatch;		/* no more batches */
616

617
	/*
B
Bruce Momjian 已提交
618 619
	 * Rewind inner and outer batch files for this batch, so that we can
	 * start reading them.
620
	 */
621
	if (BufFileSeek(hashtable->outerBatchFile[newbatch - 1], 0, 0L, SEEK_SET))
622 623 624
		elog(ERROR, "Failed to rewind hash temp file");

	innerFile = hashtable->innerBatchFile[newbatch - 1];
625

626
	if (BufFileSeek(innerFile, 0, 0L, SEEK_SET))
627 628 629 630 631 632
		elog(ERROR, "Failed to rewind hash temp file");

	/*
	 * Reload the hash table with the new inner batch
	 */
	ExecHashTableReset(hashtable, innerBatchSize[newbatch - 1]);
633 634 635 636 637

	econtext = hjstate->jstate.cs_ExprContext;
	innerhashkey = hjstate->hj_InnerHashKey;

	while ((slot = ExecHashJoinGetSavedTuple(hjstate,
638 639
											 innerFile,
											 hjstate->hj_HashTupleSlot))
640 641 642
		   && !TupIsNull(slot))
	{
		econtext->ecxt_innertuple = slot;
643
		ExecHashTableInsert(hashtable, econtext, innerhashkey);
644 645 646
	}

	/*
B
Bruce Momjian 已提交
647 648
	 * after we build the hash table, the inner batch file is no longer
	 * needed
649
	 */
650 651
	BufFileClose(innerFile);
	hashtable->innerBatchFile[newbatch - 1] = NULL;
652

653
	hashtable->curbatch = newbatch;
654 655 656 657
	return newbatch;
}

/* ----------------------------------------------------------------
658
 *		ExecHashJoinGetBatch
659
 *
660 661 662 663
 *		determine the batch number for a bucketno
 *		+----------------+-------+-------+ ... +-------+
 *		0			  nbuckets						 totalbuckets
 * batch		 0			 1		 2	   ...
664 665
 * ----------------------------------------------------------------
 */
666
static int
667
ExecHashJoinGetBatch(int bucketno, HashJoinTable hashtable)
668
{
669
	int			b;
670

671
	if (bucketno < hashtable->nbuckets || hashtable->nbatch == 0)
672 673
		return 0;

674 675
	b = (hashtable->nbatch * (bucketno - hashtable->nbuckets)) /
		(hashtable->totalbuckets - hashtable->nbuckets);
676
	return b + 1;
677 678 679
}

/* ----------------------------------------------------------------
680
 *		ExecHashJoinSaveTuple
681
 *
682 683 684 685 686
 *		save a tuple to a tmp file.
 *
 * The data recorded in the file for each tuple is an image of its
 * HeapTupleData (with meaningless t_data pointer) followed by the
 * HeapTupleHeader and tuple data.
687 688 689
 * ----------------------------------------------------------------
 */

690
void
691
ExecHashJoinSaveTuple(HeapTuple heapTuple,
692
					  BufFile *file)
693
{
B
Bruce Momjian 已提交
694
	size_t		written;
695 696 697 698 699 700 701

	written = BufFileWrite(file, (void *) heapTuple, sizeof(HeapTupleData));
	if (written != sizeof(HeapTupleData))
		elog(ERROR, "Write to hashjoin temp file failed");
	written = BufFileWrite(file, (void *) heapTuple->t_data, heapTuple->t_len);
	if (written != (size_t) heapTuple->t_len)
		elog(ERROR, "Write to hashjoin temp file failed");
702
}
V
Vadim B. Mikheev 已提交
703 704 705 706

void
ExecReScanHashJoin(HashJoin *node, ExprContext *exprCtxt, Plan *parent)
{
707
	HashJoinState *hjstate = node->hashjoinstate;
V
Vadim B. Mikheev 已提交
708

709
	if (!hjstate->hj_hashdone)
V
Vadim B. Mikheev 已提交
710
		return;
711

712
	hjstate->hj_hashdone = false;
713 714 715 716

	/*
	 * Unfortunately, currently we have to destroy hashtable in all
	 * cases...
V
Vadim B. Mikheev 已提交
717 718 719 720 721 722
	 */
	if (hjstate->hj_HashTable)
	{
		ExecHashTableDestroy(hjstate->hj_HashTable);
		hjstate->hj_HashTable = NULL;
	}
723 724 725

	hjstate->hj_CurBucketNo = 0;
	hjstate->hj_CurTuple = (HashJoinTuple) NULL;
726
	hjstate->hj_InnerHashKey = (Node *) NULL;
V
Vadim B. Mikheev 已提交
727 728

	hjstate->jstate.cs_OuterTupleSlot = (TupleTableSlot *) NULL;
729
	hjstate->jstate.cs_TupFromTlist = false;
730 731
	hjstate->hj_NeedNewOuter = true;
	hjstate->hj_MatchedOuter = false;
732 733 734 735

	/*
	 * if chgParam of subnodes is not null then plans will be re-scanned
	 * by first ExecProcNode.
V
Vadim B. Mikheev 已提交
736
	 */
737 738 739 740
	if (((Plan *) node)->lefttree->chgParam == NULL)
		ExecReScan(((Plan *) node)->lefttree, exprCtxt, (Plan *) node);
	if (((Plan *) node)->righttree->chgParam == NULL)
		ExecReScan(((Plan *) node)->righttree, exprCtxt, (Plan *) node);
V
Vadim B. Mikheev 已提交
741
}