nodeHashjoin.c 19.8 KB
Newer Older
1 2
/*-------------------------------------------------------------------------
 *
3
 * nodeHashjoin.c
4
 *	  Routines to handle hash join nodes
5
 *
B
Bruce Momjian 已提交
6
 * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
B
Add:  
Bruce Momjian 已提交
7
 * Portions Copyright (c) 1994, Regents of the University of California
8 9 10
 *
 *
 * IDENTIFICATION
B
Bruce Momjian 已提交
11
 *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeHashjoin.c,v 1.54 2003/08/04 00:43:17 momjian Exp $
12 13 14
 *
 *-------------------------------------------------------------------------
 */
15

B
Bruce Momjian 已提交
16
#include "postgres.h"
17 18

#include "executor/executor.h"
19 20
#include "executor/nodeHash.h"
#include "executor/nodeHashjoin.h"
B
Bruce Momjian 已提交
21
#include "optimizer/clauses.h"
22 23
#include "utils/memutils.h"

24

B
Bruce Momjian 已提交
25 26
static TupleTableSlot *ExecHashJoinOuterGetTuple(PlanState * node,
						  HashJoinState *hjstate);
27
static TupleTableSlot *ExecHashJoinGetSavedTuple(HashJoinState *hjstate,
28
						  BufFile *file,
B
Bruce Momjian 已提交
29
						  TupleTableSlot *tupleSlot);
30
static int	ExecHashJoinNewBatch(HashJoinState *hjstate);
31 32


33
/* ----------------------------------------------------------------
34
 *		ExecHashJoin
35
 *
36 37 38 39
 *		This function implements the Hybrid Hashjoin algorithm.
 *		recursive partitioning remains to be added.
 *		Note: the relation we build hash table on is the inner
 *			  the other one is outer.
40 41
 * ----------------------------------------------------------------
 */
42
TupleTableSlot *				/* return: a tuple or NULL */
43
ExecHashJoin(HashJoinState *node)
44
{
45
	EState	   *estate;
46 47
	PlanState  *outerNode;
	HashState  *hashNode;
48
	List	   *hjclauses;
49
	List	   *outerkeys;
50 51
	List	   *joinqual;
	List	   *otherqual;
52
	ScanDirection dir;
53
	TupleTableSlot *inntuple;
54
	ExprContext *econtext;
55
	ExprDoneCond isDone;
56 57
	HashJoinTable hashtable;
	HeapTuple	curtuple;
58
	TupleTableSlot *outerTupleSlot;
59
	int			i;
60

61 62
	/*
	 * get information from HashJoin node
63 64
	 */
	hjclauses = node->hashclauses;
65 66 67 68 69
	estate = node->js.ps.state;
	joinqual = node->js.joinqual;
	otherqual = node->js.ps.qual;
	hashNode = (HashState *) innerPlanState(node);
	outerNode = outerPlanState(node);
70 71
	dir = estate->es_direction;

72
	/*
73
	 * get information from HashJoin state
74
	 */
75 76 77
	hashtable = node->hj_HashTable;
	outerkeys = node->hj_OuterHashKeys;
	econtext = node->js.ps.ps_ExprContext;
78

79 80 81 82
	/*
	 * Check to see if we're still projecting out tuples from a previous
	 * join tuple (because there is a function-returning-set in the
	 * projection expressions).  If so, try to project another one.
83
	 */
84
	if (node->js.ps.ps_TupFromTlist)
85 86 87
	{
		TupleTableSlot *result;

88
		result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
89
		if (isDone == ExprMultipleResult)
90
			return result;
91
		/* Done with that source tuple... */
92
		node->js.ps.ps_TupFromTlist = false;
93
	}
94

95 96
	/*
	 * If we're doing an IN join, we want to return at most one row per
B
Bruce Momjian 已提交
97 98
	 * outer tuple; so we can stop scanning the inner scan if we matched
	 * on the previous try.
99
	 */
B
Bruce Momjian 已提交
100
	if (node->js.jointype == JOIN_IN &&
101 102 103
		node->hj_MatchedOuter)
		node->hj_NeedNewOuter = true;

104 105 106 107
	/*
	 * Reset per-tuple memory context to free any expression evaluation
	 * storage allocated in the previous tuple cycle.  Note this can't
	 * happen until we're done projecting out tuples from a join tuple.
108 109 110
	 */
	ResetExprContext(econtext);

111 112
	/*
	 * if this is the first call, build the hash table for inner relation
113
	 */
114 115 116 117 118 119
	if (!node->hj_hashdone)
	{
		/*
		 * create the hash table
		 */
		Assert(hashtable == NULL);
120 121
		hashtable = ExecHashTableCreate((Hash *) hashNode->ps.plan,
										node->hj_HashOperators);
122
		node->hj_HashTable = hashtable;
123

124 125 126 127 128
		/*
		 * execute the Hash node, to build the hash table
		 */
		hashNode->hashtable = hashtable;
		(void) ExecProcNode((PlanState *) hashNode);
129 130 131 132

		/*
		 * Open temp files for outer batches, if needed. Note that file
		 * buffers are palloc'd in regular executor context.
133
		 */
134
		for (i = 0; i < hashtable->nbatch; i++)
135
			hashtable->outerBatchFile[i] = BufFileCreateTemp(false);
136 137

		node->hj_hashdone = true;
138
	}
139

140 141
	/*
	 * Now get an outer tuple and probe into the hash table for matches
142
	 */
143
	outerTupleSlot = node->js.ps.ps_OuterTupleSlot;
144

145
	for (;;)
146
	{
147
		/*
148
		 * If we don't have an outer tuple, get the next one
149
		 */
150
		if (node->hj_NeedNewOuter)
151
		{
152
			outerTupleSlot = ExecHashJoinOuterGetTuple(outerNode,
153
													   node);
154
			if (TupIsNull(outerTupleSlot))
155
			{
156
				/* end of join */
157 158 159
				return NULL;
			}

160
			node->js.ps.ps_OuterTupleSlot = outerTupleSlot;
161
			econtext->ecxt_outertuple = outerTupleSlot;
162 163
			node->hj_NeedNewOuter = false;
			node->hj_MatchedOuter = false;
164

165
			/*
B
Bruce Momjian 已提交
166 167
			 * now we have an outer tuple, find the corresponding bucket
			 * for this tuple from the hash table
168
			 */
169
			node->hj_CurBucketNo = ExecHashGetBucket(hashtable, econtext,
170
													 outerkeys);
171
			node->hj_CurTuple = NULL;
172

173 174 175 176
			/*
			 * Now we've got an outer tuple and the corresponding hash
			 * bucket, but this tuple may not belong to the current batch.
			 * This need only be checked in the first pass.
177 178 179
			 */
			if (hashtable->curbatch == 0)
			{
180 181
				int			batchno = ExecHashGetBatch(node->hj_CurBucketNo,
													   hashtable);
B
Bruce Momjian 已提交
182

183
				if (batchno >= 0)
184 185
				{
					/*
186 187
					 * Need to postpone this outer tuple to a later batch.
					 * Save it in the corresponding outer-batch file.
188
					 */
189 190
					hashtable->outerBatchSize[batchno]++;
					ExecHashJoinSaveTuple(outerTupleSlot->val,
B
Bruce Momjian 已提交
191
									 hashtable->outerBatchFile[batchno]);
192
					node->hj_NeedNewOuter = true;
B
Bruce Momjian 已提交
193
					continue;	/* loop around for a new outer tuple */
194 195 196 197
				}
			}
		}

198 199
		/*
		 * OK, scan the selected hash bucket for matches
200
		 */
201
		for (;;)
202
		{
203
			curtuple = ExecScanHashBucket(node,
204 205 206 207
										  hjclauses,
										  econtext);
			if (curtuple == NULL)
				break;			/* out of matches */
B
Bruce Momjian 已提交
208

209
			/*
210
			 * we've got a match, but still need to test non-hashed quals
211
			 */
212
			inntuple = ExecStoreTuple(curtuple,
213
									  node->hj_HashTupleSlot,
214 215 216
									  InvalidBuffer,
									  false);	/* don't pfree this tuple */
			econtext->ecxt_innertuple = inntuple;
217

218
			/* reset temp memory each time to avoid leaks from qual expr */
219 220
			ResetExprContext(econtext);

221 222 223 224
			/*
			 * if we pass the qual, then save state for next call and have
			 * ExecProject form the projection, store it in the tuple
			 * table, and return the slot.
225
			 *
226 227
			 * Only the joinquals determine MatchedOuter status, but all
			 * quals must pass to actually return the tuple.
228
			 */
229
			if (ExecQual(joinqual, econtext, false))
230
			{
231
				node->hj_MatchedOuter = true;
232

233
				if (otherqual == NIL || ExecQual(otherqual, econtext, false))
234
				{
235 236
					TupleTableSlot *result;

237
					result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
238 239 240

					if (isDone != ExprEndResult)
					{
241
						node->js.ps.ps_TupFromTlist =
242 243 244
							(isDone == ExprMultipleResult);
						return result;
					}
245
				}
246

B
Bruce Momjian 已提交
247 248 249 250
				/*
				 * If we didn't return a tuple, may need to set
				 * NeedNewOuter
				 */
251 252 253 254 255
				if (node->js.jointype == JOIN_IN)
				{
					node->hj_NeedNewOuter = true;
					break;		/* out of loop over hash bucket */
				}
256 257 258
			}
		}

259 260 261 262
		/*
		 * Now the current outer tuple has run out of matches, so check
		 * whether to emit a dummy outer-join tuple. If not, loop around
		 * to get a new outer tuple.
263
		 */
264
		node->hj_NeedNewOuter = true;
265

266 267
		if (!node->hj_MatchedOuter &&
			node->js.jointype == JOIN_LEFT)
268 269 270 271
		{
			/*
			 * We are doing an outer join and there were no join matches
			 * for this outer tuple.  Generate a fake join tuple with
B
Bruce Momjian 已提交
272 273
			 * nulls for the inner tuple, and return it if it passes the
			 * non-join quals.
274
			 */
275
			econtext->ecxt_innertuple = node->hj_NullInnerTupleSlot;
276 277 278

			if (ExecQual(otherqual, econtext, false))
			{
279 280 281 282
				/*
				 * qualification was satisfied so we project and return
				 * the slot containing the result tuple using
				 * ExecProject().
283 284 285
				 */
				TupleTableSlot *result;

286
				result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
287 288 289

				if (isDone != ExprEndResult)
				{
290
					node->js.ps.ps_TupFromTlist =
291 292 293 294 295
						(isDone == ExprMultipleResult);
					return result;
				}
			}
		}
296
	}
297 298 299
}

/* ----------------------------------------------------------------
300
 *		ExecInitHashJoin
301
 *
302
 *		Init routine for HashJoin node.
303 304
 * ----------------------------------------------------------------
 */
305 306
HashJoinState *
ExecInitHashJoin(HashJoin *node, EState *estate)
307
{
308 309 310
	HashJoinState *hjstate;
	Plan	   *outerNode;
	Hash	   *hashNode;
311
	List	   *hclauses;
312
	List	   *hoperators;
313
	List	   *hcl;
314

315
	/*
316 317 318
	 * create state structure
	 */
	hjstate = makeNode(HashJoinState);
319 320
	hjstate->js.ps.plan = (Plan *) node;
	hjstate->js.ps.state = estate;
321

322 323
	/*
	 * Miscellaneous initialization
324
	 *
325
	 * create expression context for node
326
	 */
327 328 329 330 331 332
	ExecAssignExprContext(estate, &hjstate->js.ps);

	/*
	 * initialize child expressions
	 */
	hjstate->js.ps.targetlist = (List *)
333
		ExecInitExpr((Expr *) node->join.plan.targetlist,
334 335
					 (PlanState *) hjstate);
	hjstate->js.ps.qual = (List *)
336
		ExecInitExpr((Expr *) node->join.plan.qual,
337 338 339
					 (PlanState *) hjstate);
	hjstate->js.jointype = node->join.jointype;
	hjstate->js.joinqual = (List *)
340
		ExecInitExpr((Expr *) node->join.joinqual,
341 342
					 (PlanState *) hjstate);
	hjstate->hashclauses = (List *)
343
		ExecInitExpr((Expr *) node->hashclauses,
344
					 (PlanState *) hjstate);
345

346
	/*
347
	 * initialize child nodes
348
	 */
349 350
	outerNode = outerPlan(node);
	hashNode = (Hash *) innerPlan(node);
351

352 353
	outerPlanState(hjstate) = ExecInitNode(outerNode, estate);
	innerPlanState(hjstate) = ExecInitNode((Plan *) hashNode, estate);
354

355
#define HASHJOIN_NSLOTS 3
356 357 358

	/*
	 * tuple table initialization
359
	 */
360
	ExecInitResultTupleSlot(estate, &hjstate->js.ps);
361 362 363 364 365
	hjstate->hj_OuterTupleSlot = ExecInitExtraTupleSlot(estate);

	switch (node->join.jointype)
	{
		case JOIN_INNER:
366
		case JOIN_IN:
367 368 369 370
			break;
		case JOIN_LEFT:
			hjstate->hj_NullInnerTupleSlot =
				ExecInitNullTupleSlot(estate,
B
Bruce Momjian 已提交
371
							 ExecGetResultType(innerPlanState(hjstate)));
372 373
			break;
		default:
374
			elog(ERROR, "unrecognized join type: %d",
375 376 377
				 (int) node->join.jointype);
	}

378 379 380 381 382 383
	/*
	 * now for some voodoo.  our temporary tuple slot is actually the
	 * result tuple slot of the Hash node (which is our inner plan).  we
	 * do this because Hash nodes don't return tuples via ExecProcNode()
	 * -- instead the hash join node uses ExecScanHashBucket() to get at
	 * the contents of the hash table.	-cim 6/9/91
384 385
	 */
	{
386 387
		HashState  *hashstate = (HashState *) innerPlanState(hjstate);
		TupleTableSlot *slot = hashstate->ps.ps_ResultTupleSlot;
388 389 390 391

		hjstate->hj_HashTupleSlot = slot;
	}

392 393
	/*
	 * initialize tuple type and projection info
394
	 */
395 396
	ExecAssignResultTypeFromTL(&hjstate->js.ps);
	ExecAssignProjectionInfo(&hjstate->js.ps);
397

398
	ExecSetSlotDescriptor(hjstate->hj_OuterTupleSlot,
399
						  ExecGetResultType(outerPlanState(hjstate)),
400
						  false);
401

402 403
	/*
	 * initialize hash-specific info
404 405
	 */

406
	hjstate->hj_hashdone = false;
407
	hjstate->hj_HashTable = (HashJoinTable) NULL;
408

409 410
	hjstate->hj_CurBucketNo = 0;
	hjstate->hj_CurTuple = (HashJoinTuple) NULL;
411 412

	/*
B
Bruce Momjian 已提交
413 414 415 416
	 * The planner already made a list of the inner hashkeys for us, but
	 * we also need a list of the outer hashkeys, as well as a list of the
	 * hash operator OIDs.	Both lists of exprs must then be prepared for
	 * execution.
417
	 */
418 419 420 421 422 423 424
	hjstate->hj_InnerHashKeys = (List *)
		ExecInitExpr((Expr *) hashNode->hashkeys,
					 innerPlanState(hjstate));
	((HashState *) innerPlanState(hjstate))->hashkeys =
		hjstate->hj_InnerHashKeys;

	hclauses = NIL;
425
	hoperators = NIL;
426 427
	foreach(hcl, node->hashclauses)
	{
428 429 430 431 432
		OpExpr	   *hclause = (OpExpr *) lfirst(hcl);

		Assert(IsA(hclause, OpExpr));
		hclauses = lappend(hclauses, get_leftop((Expr *) hclause));
		hoperators = lappendo(hoperators, hclause->opno);
433
	}
434 435 436
	hjstate->hj_OuterHashKeys = (List *)
		ExecInitExpr((Expr *) hclauses,
					 (PlanState *) hjstate);
437
	hjstate->hj_HashOperators = hoperators;
438

439 440
	hjstate->js.ps.ps_OuterTupleSlot = NULL;
	hjstate->js.ps.ps_TupFromTlist = false;
441 442
	hjstate->hj_NeedNewOuter = true;
	hjstate->hj_MatchedOuter = false;
443

444
	return hjstate;
445 446 447
}

int
448
ExecCountSlotsHashJoin(HashJoin *node)
449
{
450
	return ExecCountSlotsNode(outerPlan(node)) +
451 452
		ExecCountSlotsNode(innerPlan(node)) +
		HASHJOIN_NSLOTS;
453 454 455
}

/* ----------------------------------------------------------------
456
 *		ExecEndHashJoin
457
 *
458
 *		clean up routine for HashJoin node
459 460 461
 * ----------------------------------------------------------------
 */
void
462
ExecEndHashJoin(HashJoinState *node)
463
{
464
	/*
465
	 * Free hash table
466
	 */
467
	if (node->hj_HashTable)
468
	{
469 470
		ExecHashTableDestroy(node->hj_HashTable);
		node->hj_HashTable = NULL;
471 472
	}

473
	/*
474
	 * Free the exprcontext
475
	 */
476
	ExecFreeExprContext(&node->js.ps);
477

478 479
	/*
	 * clean out the tuple table
480
	 */
481 482 483
	ExecClearTuple(node->js.ps.ps_ResultTupleSlot);
	ExecClearTuple(node->hj_OuterTupleSlot);
	ExecClearTuple(node->hj_HashTupleSlot);
484

485 486 487 488 489
	/*
	 * clean up subtrees
	 */
	ExecEndNode(outerPlanState(node));
	ExecEndNode(innerPlanState(node));
490 491 492
}

/* ----------------------------------------------------------------
493
 *		ExecHashJoinOuterGetTuple
494
 *
495 496 497
 *		get the next outer tuple for hashjoin: either by
 *		executing a plan node as in the first pass, or from
 *		the tmp files for the hashjoin batches.
498 499 500 501
 * ----------------------------------------------------------------
 */

static TupleTableSlot *
B
Bruce Momjian 已提交
502
ExecHashJoinOuterGetTuple(PlanState * node, HashJoinState *hjstate)
503
{
B
Bruce Momjian 已提交
504 505
	HashJoinTable hashtable = hjstate->hj_HashTable;
	int			curbatch = hashtable->curbatch;
506 507 508 509
	TupleTableSlot *slot;

	if (curbatch == 0)
	{							/* if it is the first pass */
510
		slot = ExecProcNode(node);
B
Bruce Momjian 已提交
511
		if (!TupIsNull(slot))
512
			return slot;
B
Bruce Momjian 已提交
513

514
		/*
B
Bruce Momjian 已提交
515 516
		 * We have just reached the end of the first pass. Try to switch
		 * to a saved batch.
517 518
		 */
		curbatch = ExecHashJoinNewBatch(hjstate);
519 520 521
	}

	/*
B
Bruce Momjian 已提交
522 523
	 * Try to read from a temp file. Loop allows us to advance to new
	 * batch as needed.
524
	 */
525 526 527
	while (curbatch <= hashtable->nbatch)
	{
		slot = ExecHashJoinGetSavedTuple(hjstate,
B
Bruce Momjian 已提交
528
								 hashtable->outerBatchFile[curbatch - 1],
529
										 hjstate->hj_OuterTupleSlot);
B
Bruce Momjian 已提交
530
		if (!TupIsNull(slot))
531 532 533 534 535 536
			return slot;
		curbatch = ExecHashJoinNewBatch(hjstate);
	}

	/* Out of batches... */
	return NULL;
537 538 539
}

/* ----------------------------------------------------------------
540
 *		ExecHashJoinGetSavedTuple
541
 *
542
 *		read the next tuple from a tmp file
543 544 545 546
 * ----------------------------------------------------------------
 */

static TupleTableSlot *
547
ExecHashJoinGetSavedTuple(HashJoinState *hjstate,
548
						  BufFile *file,
549
						  TupleTableSlot *tupleSlot)
550
{
B
Bruce Momjian 已提交
551 552 553
	HeapTupleData htup;
	size_t		nread;
	HeapTuple	heapTuple;
554 555 556 557 558

	nread = BufFileRead(file, (void *) &htup, sizeof(HeapTupleData));
	if (nread == 0)
		return NULL;			/* end of file */
	if (nread != sizeof(HeapTupleData))
559 560 561
		ereport(ERROR,
				(errcode_for_file_access(),
				 errmsg("read from hashjoin temp file failed: %m")));
562 563
	heapTuple = palloc(HEAPTUPLESIZE + htup.t_len);
	memcpy((char *) heapTuple, (char *) &htup, sizeof(HeapTupleData));
564
	heapTuple->t_datamcxt = CurrentMemoryContext;
B
Bruce Momjian 已提交
565 566
	heapTuple->t_data = (HeapTupleHeader)
		((char *) heapTuple + HEAPTUPLESIZE);
567 568
	nread = BufFileRead(file, (void *) heapTuple->t_data, htup.t_len);
	if (nread != (size_t) htup.t_len)
569 570 571
		ereport(ERROR,
				(errcode_for_file_access(),
				 errmsg("read from hashjoin temp file failed: %m")));
572
	return ExecStoreTuple(heapTuple, tupleSlot, InvalidBuffer, true);
573 574 575
}

/* ----------------------------------------------------------------
576
 *		ExecHashJoinNewBatch
577
 *
578
 *		switch to a new hashjoin batch
579 580
 * ----------------------------------------------------------------
 */
581
static int
582
ExecHashJoinNewBatch(HashJoinState *hjstate)
583
{
584 585 586 587 588
	HashJoinTable hashtable = hjstate->hj_HashTable;
	int			nbatch = hashtable->nbatch;
	int			newbatch = hashtable->curbatch + 1;
	long	   *innerBatchSize = hashtable->innerBatchSize;
	long	   *outerBatchSize = hashtable->outerBatchSize;
B
Bruce Momjian 已提交
589
	BufFile    *innerFile;
590
	TupleTableSlot *slot;
591
	ExprContext *econtext;
592
	List	   *innerhashkeys;
593 594 595 596

	if (newbatch > 1)
	{
		/*
B
Bruce Momjian 已提交
597 598
		 * We no longer need the previous outer batch file; close it right
		 * away to free disk space.
599
		 */
600 601
		BufFileClose(hashtable->outerBatchFile[newbatch - 2]);
		hashtable->outerBatchFile[newbatch - 2] = NULL;
602 603
	}

604 605 606
	/*
	 * We can skip over any batches that are empty on either side. Release
	 * associated temp files right away.
607
	 */
608 609 610
	while (newbatch <= nbatch &&
		   (innerBatchSize[newbatch - 1] == 0L ||
			outerBatchSize[newbatch - 1] == 0L))
611
	{
612 613 614 615
		BufFileClose(hashtable->innerBatchFile[newbatch - 1]);
		hashtable->innerBatchFile[newbatch - 1] = NULL;
		BufFileClose(hashtable->outerBatchFile[newbatch - 1]);
		hashtable->outerBatchFile[newbatch - 1] = NULL;
616
		newbatch++;
617
	}
618

619
	if (newbatch > nbatch)
620
		return newbatch;		/* no more batches */
621

622
	/*
B
Bruce Momjian 已提交
623 624
	 * Rewind inner and outer batch files for this batch, so that we can
	 * start reading them.
625
	 */
626
	if (BufFileSeek(hashtable->outerBatchFile[newbatch - 1], 0, 0L, SEEK_SET))
627 628 629
		ereport(ERROR,
				(errcode_for_file_access(),
				 errmsg("failed to rewind hashjoin temp file: %m")));
630 631

	innerFile = hashtable->innerBatchFile[newbatch - 1];
632

633
	if (BufFileSeek(innerFile, 0, 0L, SEEK_SET))
634 635 636
		ereport(ERROR,
				(errcode_for_file_access(),
				 errmsg("failed to rewind hashjoin temp file: %m")));
637 638 639 640 641

	/*
	 * Reload the hash table with the new inner batch
	 */
	ExecHashTableReset(hashtable, innerBatchSize[newbatch - 1]);
642

643
	econtext = hjstate->js.ps.ps_ExprContext;
644
	innerhashkeys = hjstate->hj_InnerHashKeys;
645 646

	while ((slot = ExecHashJoinGetSavedTuple(hjstate,
647 648
											 innerFile,
											 hjstate->hj_HashTupleSlot))
649 650 651
		   && !TupIsNull(slot))
	{
		econtext->ecxt_innertuple = slot;
652
		ExecHashTableInsert(hashtable, econtext, innerhashkeys);
653 654 655
	}

	/*
B
Bruce Momjian 已提交
656 657
	 * after we build the hash table, the inner batch file is no longer
	 * needed
658
	 */
659 660
	BufFileClose(innerFile);
	hashtable->innerBatchFile[newbatch - 1] = NULL;
661

662
	hashtable->curbatch = newbatch;
663 664 665 666
	return newbatch;
}

/* ----------------------------------------------------------------
667
 *		ExecHashJoinSaveTuple
668
 *
669 670 671 672 673
 *		save a tuple to a tmp file.
 *
 * The data recorded in the file for each tuple is an image of its
 * HeapTupleData (with meaningless t_data pointer) followed by the
 * HeapTupleHeader and tuple data.
674 675 676
 * ----------------------------------------------------------------
 */

677
void
678
ExecHashJoinSaveTuple(HeapTuple heapTuple,
679
					  BufFile *file)
680
{
B
Bruce Momjian 已提交
681
	size_t		written;
682 683 684

	written = BufFileWrite(file, (void *) heapTuple, sizeof(HeapTupleData));
	if (written != sizeof(HeapTupleData))
685 686 687
		ereport(ERROR,
				(errcode_for_file_access(),
				 errmsg("write to hashjoin temp file failed: %m")));
688 689
	written = BufFileWrite(file, (void *) heapTuple->t_data, heapTuple->t_len);
	if (written != (size_t) heapTuple->t_len)
690 691 692
		ereport(ERROR,
				(errcode_for_file_access(),
				 errmsg("write to hashjoin temp file failed: %m")));
693
}
V
Vadim B. Mikheev 已提交
694 695

void
696
ExecReScanHashJoin(HashJoinState *node, ExprContext *exprCtxt)
V
Vadim B. Mikheev 已提交
697
{
698 699 700 701
	/*
	 * If we haven't yet built the hash table then we can just return;
	 * nothing done yet, so nothing to undo.
	 */
702
	if (!node->hj_hashdone)
V
Vadim B. Mikheev 已提交
703
		return;
704
	Assert(node->hj_HashTable != NULL);
705 706

	/*
B
Bruce Momjian 已提交
707 708 709 710 711
	 * In a multi-batch join, we currently have to do rescans the hard
	 * way, primarily because batch temp files may have already been
	 * released. But if it's a single-batch join, and there is no
	 * parameter change for the inner subnode, then we can just re-use the
	 * existing hash table without rebuilding it.
V
Vadim B. Mikheev 已提交
712
	 */
713 714 715 716 717 718
	if (node->hj_HashTable->nbatch == 0 &&
		((PlanState *) node)->righttree->chgParam == NULL)
	{
		/* okay to reuse the hash table; needn't rescan inner, either */
	}
	else
V
Vadim B. Mikheev 已提交
719
	{
720 721
		/* must destroy and rebuild hash table */
		node->hj_hashdone = false;
722 723
		ExecHashTableDestroy(node->hj_HashTable);
		node->hj_HashTable = NULL;
B
Bruce Momjian 已提交
724

725 726 727 728 729 730
		/*
		 * if chgParam of subnode is not null then plan will be re-scanned
		 * by first ExecProcNode.
		 */
		if (((PlanState *) node)->righttree->chgParam == NULL)
			ExecReScan(((PlanState *) node)->righttree, exprCtxt);
V
Vadim B. Mikheev 已提交
731
	}
732

733
	/* Always reset intra-tuple state */
734 735
	node->hj_CurBucketNo = 0;
	node->hj_CurTuple = (HashJoinTuple) NULL;
V
Vadim B. Mikheev 已提交
736

737 738 739 740
	node->js.ps.ps_OuterTupleSlot = (TupleTableSlot *) NULL;
	node->js.ps.ps_TupFromTlist = false;
	node->hj_NeedNewOuter = true;
	node->hj_MatchedOuter = false;
741 742

	/*
B
Bruce Momjian 已提交
743 744
	 * if chgParam of subnode is not null then plan will be re-scanned by
	 * first ExecProcNode.
V
Vadim B. Mikheev 已提交
745
	 */
746 747
	if (((PlanState *) node)->lefttree->chgParam == NULL)
		ExecReScan(((PlanState *) node)->lefttree, exprCtxt);
V
Vadim B. Mikheev 已提交
748
}