CCostModelGPDB.cpp 64.8 KB
Newer Older
E
Entong Shen 已提交
1 2 3 4 5 6 7 8 9 10 11
//---------------------------------------------------------------------------
//	Greenplum Database
//	Copyright (C) 2014 Pivotal Inc.
//
//	@filename:
//		CCostModelGPDB.cpp
//
//	@doc:
//		Implementation of GPDB cost model
//---------------------------------------------------------------------------

12 13
#include <limits>

14
#include "gpopt/base/CColRefSetIter.h"
E
Entong Shen 已提交
15 16 17 18 19 20 21
#include "gpopt/base/COrderSpec.h"
#include "gpopt/base/CWindowFrame.h"
#include "gpopt/metadata/CTableDescriptor.h"
#include "gpopt/metadata/CIndexDescriptor.h"
#include "gpopt/operators/CExpressionHandle.h"
#include "gpopt/operators/CPhysicalSequenceProject.h"
#include "gpopt/operators/CPhysicalIndexScan.h"
22
#include "gpopt/operators/CPhysicalIndexOnlyScan.h"
E
Entong Shen 已提交
23 24
#include "gpopt/operators/CPhysicalDynamicIndexScan.h"
#include "gpopt/operators/CPhysicalHashAgg.h"
25
#include "gpopt/operators/CPhysicalUnionAll.h"
26
#include "gpopt/operators/CPhysicalMotion.h"
H
Hans Zeller 已提交
27
#include "gpopt/operators/CPhysicalPartitionSelector.h"
E
Entong Shen 已提交
28
#include "gpopt/operators/CPredicateUtils.h"
29
#include "gpopt/operators/CScalarBitmapIndexProbe.h"
E
Entong Shen 已提交
30 31 32
#include "naucrates/statistics/CStatisticsUtils.h"
#include "gpopt/operators/CExpression.h"
#include "gpdbcost/CCostModelGPDB.h"
33 34
#include "gpopt/optimizer/COptimizerConfig.h"
#include "gpopt/engine/CHint.h"
E
Entong Shen 已提交
35 36 37 38 39 40

using namespace gpos;
using namespace gpdbcost;


// initialization of cost functions
J
Jesse Zhang 已提交
41
const CCostModelGPDB::SCostMapping CCostModelGPDB::m_rgcm[] = {
E
Entong Shen 已提交
42 43 44 45 46 47
	{COperator::EopPhysicalTableScan, CostScan},
	{COperator::EopPhysicalDynamicTableScan, CostScan},
	{COperator::EopPhysicalExternalScan, CostScan},

	{COperator::EopPhysicalFilter, CostFilter},

48
	{COperator::EopPhysicalIndexOnlyScan, CostIndexOnlyScan},
E
Entong Shen 已提交
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
	{COperator::EopPhysicalIndexScan, CostIndexScan},
	{COperator::EopPhysicalDynamicIndexScan, CostIndexScan},
	{COperator::EopPhysicalBitmapTableScan, CostBitmapTableScan},
	{COperator::EopPhysicalDynamicBitmapTableScan, CostBitmapTableScan},

	{COperator::EopPhysicalSequenceProject, CostSequenceProject},

	{COperator::EopPhysicalCTEProducer, CostCTEProducer},
	{COperator::EopPhysicalCTEConsumer, CostCTEConsumer},
	{COperator::EopPhysicalConstTableGet, CostConstTableGet},
	{COperator::EopPhysicalDML, CostDML},

	{COperator::EopPhysicalHashAgg, CostHashAgg},
	{COperator::EopPhysicalHashAggDeduplicate, CostHashAgg},
	{COperator::EopPhysicalScalarAgg, CostScalarAgg},
	{COperator::EopPhysicalStreamAgg, CostStreamAgg},
	{COperator::EopPhysicalStreamAggDeduplicate, CostStreamAgg},

	{COperator::EopPhysicalSequence, CostSequence},

	{COperator::EopPhysicalSort, CostSort},

	{COperator::EopPhysicalTVF, CostTVF},

73
	{COperator::EopPhysicalSerialUnionAll, CostUnionAll},
74
	{COperator::EopPhysicalParallelUnionAll, CostUnionAll},
E
Entong Shen 已提交
75 76 77 78 79 80 81

	{COperator::EopPhysicalInnerHashJoin, CostHashJoin},
	{COperator::EopPhysicalLeftSemiHashJoin, CostHashJoin},
	{COperator::EopPhysicalLeftAntiSemiHashJoin, CostHashJoin},
	{COperator::EopPhysicalLeftAntiSemiHashJoinNotIn, CostHashJoin},
	{COperator::EopPhysicalLeftOuterHashJoin, CostHashJoin},

82 83
	{COperator::EopPhysicalInnerIndexNLJoin, CostIndexNLJoin},
	{COperator::EopPhysicalLeftOuterIndexNLJoin, CostIndexNLJoin},
E
Entong Shen 已提交
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100

	{COperator::EopPhysicalMotionGather, CostMotion},
	{COperator::EopPhysicalMotionBroadcast, CostMotion},
	{COperator::EopPhysicalMotionHashDistribute, CostMotion},
	{COperator::EopPhysicalMotionRandom, CostMotion},
	{COperator::EopPhysicalMotionRoutedDistribute, CostMotion},

	{COperator::EopPhysicalInnerNLJoin, CostNLJoin},
	{COperator::EopPhysicalLeftSemiNLJoin, CostNLJoin},
	{COperator::EopPhysicalLeftAntiSemiNLJoin, CostNLJoin},
	{COperator::EopPhysicalLeftAntiSemiNLJoinNotIn, CostNLJoin},
	{COperator::EopPhysicalLeftOuterNLJoin, CostNLJoin},
	{COperator::EopPhysicalCorrelatedInnerNLJoin, CostNLJoin},
	{COperator::EopPhysicalCorrelatedLeftOuterNLJoin, CostNLJoin},
	{COperator::EopPhysicalCorrelatedLeftSemiNLJoin, CostNLJoin},
	{COperator::EopPhysicalCorrelatedInLeftSemiNLJoin, CostNLJoin},
	{COperator::EopPhysicalCorrelatedLeftAntiSemiNLJoin, CostNLJoin},
101 102 103
	{COperator::EopPhysicalCorrelatedNotInLeftAntiSemiNLJoin, CostNLJoin},

	{COperator::EopPhysicalFullMergeJoin, CostMergeJoin},
E
Entong Shen 已提交
104 105 106 107 108 109 110 111 112 113
};

//---------------------------------------------------------------------------
//	@function:
//		CCostModelGPDB::CCostModelGPDB
//
//	@doc:
//		Ctor
//
//---------------------------------------------------------------------------
J
Jesse Zhang 已提交
114 115 116
CCostModelGPDB::CCostModelGPDB(CMemoryPool *mp, ULONG ulSegments,
							   CCostModelParamsGPDB *pcp)
	: m_mp(mp), m_num_of_segments(ulSegments)
E
Entong Shen 已提交
117 118 119
{
	GPOS_ASSERT(0 < ulSegments);

120 121
	if (NULL == pcp)
	{
122
		m_cost_model_params = GPOS_NEW(mp) CCostModelParamsGPDB(mp);
123 124 125 126 127
	}
	else
	{
		GPOS_ASSERT(NULL != pcp);

128
		m_cost_model_params = pcp;
129
	}
E
Entong Shen 已提交
130 131 132 133 134 135 136 137 138 139 140 141
}


//---------------------------------------------------------------------------
//	@function:
//		CCostModelGPDB::DRowsPerHost
//
//	@doc:
//		Return number of rows per host
//
//---------------------------------------------------------------------------
CDouble
J
Jesse Zhang 已提交
142
CCostModelGPDB::DRowsPerHost(CDouble dRowsTotal) const
E
Entong Shen 已提交
143
{
144
	return dRowsTotal / m_num_of_segments;
E
Entong Shen 已提交
145 146 147 148 149 150 151 152 153 154 155 156 157
}


//---------------------------------------------------------------------------
//	@function:
//		CCostModelGPDB::~CCostModelGPDB
//
//	@doc:
//		Dtor
//
//---------------------------------------------------------------------------
CCostModelGPDB::~CCostModelGPDB()
{
158
	m_cost_model_params->Release();
E
Entong Shen 已提交
159 160 161 162 163 164 165 166 167 168 169 170
}


//---------------------------------------------------------------------------
//	@function:
//		CCostModelGPDB::CostTupleProcessing
//
//	@doc:
//		Cost of tuple processing
//
//---------------------------------------------------------------------------
CCost
J
Jesse Zhang 已提交
171 172
CCostModelGPDB::CostTupleProcessing(DOUBLE rows, DOUBLE width,
									ICostModelParams *pcp)
E
Entong Shen 已提交
173 174 175
{
	GPOS_ASSERT(NULL != pcp);

J
Jesse Zhang 已提交
176 177
	const CDouble dTupDefaultProcCostUnit =
		pcp->PcpLookup(CCostModelParamsGPDB::EcpTupDefaultProcCostUnit)->Get();
E
Entong Shen 已提交
178 179
	GPOS_ASSERT(0 < dTupDefaultProcCostUnit);

180
	return CCost(rows * width * dTupDefaultProcCostUnit);
E
Entong Shen 已提交
181 182 183 184 185 186 187 188 189 190 191 192 193 194
}


//
//---------------------------------------------------------------------------
//	@function:
//		CCostModelGPDB::CostScanOutput
//
//	@doc:
//		Helper function to return cost of producing output tuples from
//		Scan operator
//
//---------------------------------------------------------------------------
CCost
J
Jesse Zhang 已提交
195 196 197
CCostModelGPDB::CostScanOutput(CMemoryPool *,  // mp
							   DOUBLE rows, DOUBLE width, DOUBLE num_rebinds,
							   ICostModelParams *pcp)
E
Entong Shen 已提交
198 199 200
{
	GPOS_ASSERT(NULL != pcp);

J
Jesse Zhang 已提交
201 202
	const CDouble dOutputTupCostUnit =
		pcp->PcpLookup(CCostModelParamsGPDB::EcpOutputTupCostUnit)->Get();
E
Entong Shen 已提交
203 204
	GPOS_ASSERT(0 < dOutputTupCostUnit);

205
	return CCost(num_rebinds * (rows * width * dOutputTupCostUnit));
E
Entong Shen 已提交
206 207 208 209 210 211 212 213 214 215 216 217
}


//---------------------------------------------------------------------------
//	@function:
//		CCostModelGPDB::CostUnary
//
//	@doc:
//		Helper function to return cost of a plan rooted by unary operator
//
//---------------------------------------------------------------------------
CCost
J
Jesse Zhang 已提交
218 219
CCostModelGPDB::CostUnary(CMemoryPool *mp, CExpressionHandle &exprhdl,
						  const SCostingInfo *pci, ICostModelParams *pcp)
E
Entong Shen 已提交
220 221 222 223
{
	GPOS_ASSERT(NULL != pci);
	GPOS_ASSERT(NULL != pcp);

224 225 226
	DOUBLE rows = pci->Rows();
	DOUBLE width = pci->Width();
	DOUBLE num_rebinds = pci->NumRebinds();
E
Entong Shen 已提交
227

J
Jesse Zhang 已提交
228 229
	CCost costLocal =
		CCost(num_rebinds * CostTupleProcessing(rows, width, pcp).Get());
230
	CCost costChild = CostChildren(mp, exprhdl, pci, pcp);
E
Entong Shen 已提交
231 232 233 234 235 236 237 238 239 240 241 242 243 244

	return costLocal + costChild;
}


//---------------------------------------------------------------------------
//	@function:
//		CCostModelGPDB::CostSpooling
//
//	@doc:
//		Helper function to compute spooling cost
//
//---------------------------------------------------------------------------
CCost
J
Jesse Zhang 已提交
245 246
CCostModelGPDB::CostSpooling(CMemoryPool *mp, CExpressionHandle &exprhdl,
							 const SCostingInfo *pci, ICostModelParams *pcp)
E
Entong Shen 已提交
247 248 249 250
{
	GPOS_ASSERT(NULL != pci);
	GPOS_ASSERT(NULL != pcp);

J
Jesse Zhang 已提交
251 252
	const CDouble dMaterializeCostUnit =
		pcp->PcpLookup(CCostModelParamsGPDB::EcpMaterializeCostUnit)->Get();
E
Entong Shen 已提交
253 254
	GPOS_ASSERT(0 < dMaterializeCostUnit);

255 256 257
	DOUBLE rows = pci->Rows();
	DOUBLE width = pci->Width();
	DOUBLE num_rebinds = pci->NumRebinds();
E
Entong Shen 已提交
258 259

	// materialization cost is correlated with the number of rows and width of returning tuples.
J
Jesse Zhang 已提交
260 261
	CCost costLocal =
		CCost(num_rebinds * (width * rows * dMaterializeCostUnit));
262
	CCost costChild = CostChildren(mp, exprhdl, pci, pcp);
E
Entong Shen 已提交
263 264 265 266 267 268 269 270 271 272 273 274 275

	return costLocal + costChild;
}

//---------------------------------------------------------------------------
//	@function:
//		CCostModelGPDB::FUnary
//
//	@doc:
//		Check if given operator is unary
//
//---------------------------------------------------------------------------
BOOL
J
Jesse Zhang 已提交
276
CCostModelGPDB::FUnary(COperator::EOperatorId op_id)
E
Entong Shen 已提交
277
{
278
	return COperator::EopPhysicalAssert == op_id ||
J
Jesse Zhang 已提交
279 280 281 282 283 284 285
		   COperator::EopPhysicalComputeScalar == op_id ||
		   COperator::EopPhysicalLimit == op_id ||
		   COperator::EopPhysicalPartitionSelector == op_id ||
		   COperator::EopPhysicalPartitionSelectorDML == op_id ||
		   COperator::EopPhysicalRowTrigger == op_id ||
		   COperator::EopPhysicalSplit == op_id ||
		   COperator::EopPhysicalSpool == op_id;
E
Entong Shen 已提交
286 287 288 289 290 291 292 293 294 295 296 297
}


//---------------------------------------------------------------------------
//	@function:
//		CCostModelGPDB::CostChildren
//
//	@doc:
//		Add up children costs
//
//---------------------------------------------------------------------------
CCost
J
Jesse Zhang 已提交
298 299
CCostModelGPDB::CostChildren(CMemoryPool *mp, CExpressionHandle &exprhdl,
							 const SCostingInfo *pci, ICostModelParams *pcp)
E
Entong Shen 已提交
300 301 302 303 304
{
	GPOS_ASSERT(NULL != pci);
	GPOS_ASSERT(NULL != pcp);

	DOUBLE *pdCost = pci->PdCost();
305
	const ULONG size = pci->ChildCount();
J
Jesse Zhang 已提交
306 307
	BOOL fFilterParent =
		(COperator::EopPhysicalFilter == exprhdl.Pop()->Eopid());
E
Entong Shen 已提交
308

309 310
	DOUBLE res = 0.0;
	for (ULONG ul = 0; ul < size; ul++)
E
Entong Shen 已提交
311 312 313
	{
		DOUBLE dCostChild = pdCost[ul];
		COperator *popChild = exprhdl.Pop(ul);
H
Hans Zeller 已提交
314 315 316
		if (NULL != popChild &&
			(CUtils::FPhysicalScan(popChild) ||
			 COperator::EopPhysicalPartitionSelector == popChild->Eopid()))
E
Entong Shen 已提交
317 318 319
		{
			// by default, compute scan output cost based on full Scan
			DOUBLE dScanRows = pci->PdRows()[ul];
H
Hans Zeller 已提交
320
			COperator *scanOp = popChild;
E
Entong Shen 已提交
321 322 323

			if (fFilterParent)
			{
J
Jesse Zhang 已提交
324 325
				CPhysicalPartitionSelector *ps =
					dynamic_cast<CPhysicalPartitionSelector *>(popChild);
H
Hans Zeller 已提交
326 327 328 329 330

				if (ps)
				{
					CCostContext *grandchildContext = NULL;

J
Jesse Zhang 已提交
331 332 333
					scanOp = exprhdl.PopGrandchild(ul, 0, &grandchildContext);
					CPhysicalDynamicScan *scan =
						dynamic_cast<CPhysicalDynamicScan *>(scanOp);
H
Hans Zeller 已提交
334

J
Jesse Zhang 已提交
335 336
					if (scan && scan->ScanId() == ps->ScanId() &&
						grandchildContext)
H
Hans Zeller 已提交
337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354
					{
						// We have a filter on top of a partition selector on top of a scan.
						// Base the scan output cost on the combination (filter + part sel + scan)
						// on the rows that are produced by the scan, since the runtime execution
						// plan with be sequence(part_sel, scan+filter). Note that the cost of
						// the partition selector is ignored here. It may be higher than that of
						// the complete tree (filter + part sel + scan).
						// See method CTranslatorExprToDXL::PdxlnPartitionSelectorWithInlinedCondition()
						// for how we inline the predicate into the dynamic table scan.
						dCostChild = grandchildContext->Cost().Get();
						dScanRows = pci->Rows();
					}
				}
				else
				{
					// if parent is filter, compute scan output cost based on rows produced by Filter operator
					dScanRows = pci->Rows();
				}
E
Entong Shen 已提交
355 356
			}

H
Hans Zeller 已提交
357 358 359
			if (CUtils::FPhysicalScan(scanOp))
			{
				// Note: We assume that width and rebinds are the same for scan, partition selector and filter
J
Jesse Zhang 已提交
360 361 362 363
				dCostChild = dCostChild +
							 CostScanOutput(mp, dScanRows, pci->GetWidth()[ul],
											pci->PdRebinds()[ul], pcp)
								 .Get();
H
Hans Zeller 已提交
364
			}
E
Entong Shen 已提交
365 366
		}

367
		res = res + dCostChild;
E
Entong Shen 已提交
368 369
	}

370
	return CCost(res);
E
Entong Shen 已提交
371 372
}

373 374 375 376 377 378 379 380 381 382
//---------------------------------------------------------------------------
//	@function:
//		CCostModelGPDB::CostMaxChild
//
//	@doc:
//		Returns cost of highest costed child
//
//---------------------------------------------------------------------------

CCost
J
Jesse Zhang 已提交
383 384
CCostModelGPDB::CostMaxChild(CMemoryPool *, CExpressionHandle &,
							 const SCostingInfo *pci, ICostModelParams *)
385 386 387 388
{
	GPOS_ASSERT(NULL != pci);

	DOUBLE *pdCost = pci->PdCost();
389
	const ULONG size = pci->ChildCount();
390

391 392
	DOUBLE res = 0.0;
	for (ULONG ul = 0; ul < size; ul++)
393
	{
394
		if (pdCost[ul] > res)
395
		{
396
			res = pdCost[ul];
397 398 399
		}
	}

400
	return CCost(res);
401
}
E
Entong Shen 已提交
402 403 404 405 406 407 408 409 410 411

//---------------------------------------------------------------------------
//	@function:
//		CCostModelGPDB::CostCTEProducer
//
//	@doc:
//		Cost of CTE producer
//
//---------------------------------------------------------------------------
CCost
J
Jesse Zhang 已提交
412 413 414
CCostModelGPDB::CostCTEProducer(CMemoryPool *mp, CExpressionHandle &exprhdl,
								const CCostModelGPDB *pcmgpdb,
								const SCostingInfo *pci)
E
Entong Shen 已提交
415 416 417 418 419
{
	GPOS_ASSERT(NULL != pcmgpdb);
	GPOS_ASSERT(NULL != pci);
	GPOS_ASSERT(COperator::EopPhysicalCTEProducer == exprhdl.Pop()->Eopid());

420
	CCost cost = CostUnary(mp, exprhdl, pci, pcmgpdb->GetCostModelParams());
E
Entong Shen 已提交
421 422 423 424 425

	// In GPDB, the child of a ShareInputScan representing the producer can
	// only be a materialize or sort. Here, we check if a materialize node
	// needs to be added during DXL->PlStmt translation

426
	COperator *popChild = exprhdl.Pop(0 /*child_index*/);
E
Entong Shen 已提交
427 428 429 430 431 432
	if (NULL == popChild)
	{
		// child operator is not known, this could happen when computing cost bound
		return cost;
	}

433
	COperator::EOperatorId op_id = popChild->Eopid();
J
Jesse Zhang 已提交
434 435
	if (COperator::EopPhysicalSpool != op_id &&
		COperator::EopPhysicalSort != op_id)
E
Entong Shen 已提交
436 437 438 439 440 441 442
	{
		// no materialize needed
		return cost;
	}

	// a materialize (spool) node is added during DXL->PlStmt translation,
	// we need to add the cost of writing the tuples to disk
J
Jesse Zhang 已提交
443 444 445 446
	const CDouble dMaterializeCostUnit =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpMaterializeCostUnit)
			->Get();
E
Entong Shen 已提交
447 448
	GPOS_ASSERT(0 < dMaterializeCostUnit);

J
Jesse Zhang 已提交
449 450
	CCost costSpooling = CCost(pci->NumRebinds() * (pci->Rows() * pci->Width() *
													dMaterializeCostUnit));
E
Entong Shen 已提交
451 452 453 454 455 456 457 458 459 460 461 462 463 464

	return cost + costSpooling;
}


//---------------------------------------------------------------------------
//	@function:
//		CCostModelGPDB::CostCTEConsumer
//
//	@doc:
//		Cost of CTE consumer
//
//---------------------------------------------------------------------------
CCost
J
Jesse Zhang 已提交
465 466
CCostModelGPDB::CostCTEConsumer(CMemoryPool *,	// mp
								CExpressionHandle &
E
Entong Shen 已提交
467
#ifdef GPOS_DEBUG
J
Jesse Zhang 已提交
468 469 470 471 472
									exprhdl
#endif	// GPOS_DEBUG
								,
								const CCostModelGPDB *pcmgpdb,
								const SCostingInfo *pci)
E
Entong Shen 已提交
473 474 475 476 477
{
	GPOS_ASSERT(NULL != pcmgpdb);
	GPOS_ASSERT(NULL != pci);
	GPOS_ASSERT(COperator::EopPhysicalCTEConsumer == exprhdl.Pop()->Eopid());

J
Jesse Zhang 已提交
478 479 480 481 482 483 484 485 486 487 488 489
	const CDouble dInitScan =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpInitScanFactor)
			->Get();
	const CDouble dTableScanCostUnit =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpTableScanCostUnit)
			->Get();
	const CDouble dOutputTupCostUnit =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpOutputTupCostUnit)
			->Get();
E
Entong Shen 已提交
490 491 492
	GPOS_ASSERT(0 < dOutputTupCostUnit);
	GPOS_ASSERT(0 < dTableScanCostUnit);

J
Jesse Zhang 已提交
493 494 495
	return CCost(pci->NumRebinds() *
				 (dInitScan + pci->Rows() * pci->Width() *
								  (dTableScanCostUnit + dOutputTupCostUnit)));
E
Entong Shen 已提交
496 497 498 499 500 501 502 503 504 505 506 507
}


//---------------------------------------------------------------------------
//	@function:
//		CCostModelGPDB::CostConstTableGet
//
//	@doc:
//		Cost of const table get
//
//---------------------------------------------------------------------------
CCost
J
Jesse Zhang 已提交
508 509
CCostModelGPDB::CostConstTableGet(CMemoryPool *,  // mp
								  CExpressionHandle &
E
Entong Shen 已提交
510
#ifdef GPOS_DEBUG
J
Jesse Zhang 已提交
511 512 513 514 515
									  exprhdl
#endif	// GPOS_DEBUG
								  ,
								  const CCostModelGPDB *pcmgpdb,
								  const SCostingInfo *pci)
E
Entong Shen 已提交
516 517 518 519 520
{
	GPOS_ASSERT(NULL != pcmgpdb);
	GPOS_ASSERT(NULL != pci);
	GPOS_ASSERT(COperator::EopPhysicalConstTableGet == exprhdl.Pop()->Eopid());

J
Jesse Zhang 已提交
521 522 523 524
	return CCost(pci->NumRebinds() *
				 CostTupleProcessing(pci->Rows(), pci->Width(),
									 pcmgpdb->GetCostModelParams())
					 .Get());
E
Entong Shen 已提交
525 526 527 528 529 530 531 532 533 534 535 536
}


//---------------------------------------------------------------------------
//	@function:
//		CCostModelGPDB::CostDML
//
//	@doc:
//		Cost of DML
//
//---------------------------------------------------------------------------
CCost
J
Jesse Zhang 已提交
537 538
CCostModelGPDB::CostDML(CMemoryPool *mp, CExpressionHandle &exprhdl,
						const CCostModelGPDB *pcmgpdb, const SCostingInfo *pci)
E
Entong Shen 已提交
539 540 541 542 543
{
	GPOS_ASSERT(NULL != pcmgpdb);
	GPOS_ASSERT(NULL != pci);
	GPOS_ASSERT(COperator::EopPhysicalDML == exprhdl.Pop()->Eopid());

J
Jesse Zhang 已提交
544 545 546 547
	const CDouble dTupUpdateBandwidth =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpTupUpdateBandwith)
			->Get();
E
Entong Shen 已提交
548 549
	GPOS_ASSERT(0 < dTupUpdateBandwidth);

550 551
	const DOUBLE num_rows_outer = pci->PdRows()[0];
	const DOUBLE dWidthOuter = pci->GetWidth()[0];
E
Entong Shen 已提交
552

J
Jesse Zhang 已提交
553 554 555 556
	CCost costLocal = CCost(pci->NumRebinds() * (num_rows_outer * dWidthOuter) /
							dTupUpdateBandwidth);
	CCost costChild =
		CostChildren(mp, exprhdl, pci, pcmgpdb->GetCostModelParams());
E
Entong Shen 已提交
557 558 559 560 561 562 563 564 565 566 567 568 569 570

	return costLocal + costChild;
}


//---------------------------------------------------------------------------
//	@function:
//		CCostModelGPDB::CostScalarAgg
//
//	@doc:
//		Cost of scalar agg
//
//---------------------------------------------------------------------------
CCost
J
Jesse Zhang 已提交
571 572 573
CCostModelGPDB::CostScalarAgg(CMemoryPool *mp, CExpressionHandle &exprhdl,
							  const CCostModelGPDB *pcmgpdb,
							  const SCostingInfo *pci)
E
Entong Shen 已提交
574 575 576 577 578
{
	GPOS_ASSERT(NULL != pcmgpdb);
	GPOS_ASSERT(NULL != pci);
	GPOS_ASSERT(COperator::EopPhysicalScalarAgg == exprhdl.Pop()->Eopid());

579 580
	const DOUBLE num_rows_outer = pci->PdRows()[0];
	const DOUBLE dWidthOuter = pci->GetWidth()[0];
E
Entong Shen 已提交
581 582

	// get the number of aggregate columns
583
	const ULONG ulAggCols = exprhdl.DeriveUsedColumns(1)->Size();
E
Entong Shen 已提交
584
	// get the number of aggregate functions
585
	const ULONG ulAggFunctions = exprhdl.PexprScalarRepChild(1)->Arity();
E
Entong Shen 已提交
586

J
Jesse Zhang 已提交
587 588 589 590
	const CDouble dHashAggInputTupWidthCostUnit =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpHashAggInputTupWidthCostUnit)
			->Get();
E
Entong Shen 已提交
591 592 593 594 595
	GPOS_ASSERT(0 < dHashAggInputTupWidthCostUnit);

	// scalarAgg cost is correlated with rows and width of the input tuples and the number of columns used in aggregate
	// It also depends on the complexity of the aggregate algorithm, which is hard to model yet shared by all the aggregate
	// operators, thus we ignore this part of cost for all.
J
Jesse Zhang 已提交
596 597 598
	CCost costLocal = CCost(pci->NumRebinds() *
							(num_rows_outer * dWidthOuter * ulAggCols *
							 ulAggFunctions * dHashAggInputTupWidthCostUnit));
E
Entong Shen 已提交
599

J
Jesse Zhang 已提交
600 601
	CCost costChild =
		CostChildren(mp, exprhdl, pci, pcmgpdb->GetCostModelParams());
E
Entong Shen 已提交
602 603 604 605 606 607 608 609 610 611 612 613 614
	return costLocal + costChild;
}


//---------------------------------------------------------------------------
//	@function:
//		CCostModelGPDB::CostStreamAgg
//
//	@doc:
//		Cost of stream agg
//
//---------------------------------------------------------------------------
CCost
J
Jesse Zhang 已提交
615 616 617
CCostModelGPDB::CostStreamAgg(CMemoryPool *mp, CExpressionHandle &exprhdl,
							  const CCostModelGPDB *pcmgpdb,
							  const SCostingInfo *pci)
E
Entong Shen 已提交
618 619 620 621 622
{
	GPOS_ASSERT(NULL != pcmgpdb);
	GPOS_ASSERT(NULL != pci);

#ifdef GPOS_DEBUG
623 624
	COperator::EOperatorId op_id = exprhdl.Pop()->Eopid();
	GPOS_ASSERT(COperator::EopPhysicalStreamAgg == op_id ||
J
Jesse Zhang 已提交
625 626 627 628 629 630 631 632 633 634 635
				COperator::EopPhysicalStreamAggDeduplicate == op_id);
#endif	// GPOS_DEBUG

	const CDouble dHashAggOutputTupWidthCostUnit =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpHashAggOutputTupWidthCostUnit)
			->Get();
	const CDouble dTupDefaultProcCostUnit =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpTupDefaultProcCostUnit)
			->Get();
E
Entong Shen 已提交
636 637 638
	GPOS_ASSERT(0 < dHashAggOutputTupWidthCostUnit);
	GPOS_ASSERT(0 < dTupDefaultProcCostUnit);

639 640
	DOUBLE num_rows_outer = pci->PdRows()[0];
	DOUBLE dWidthOuter = pci->GetWidth()[0];
E
Entong Shen 已提交
641 642

	// streamAgg cost is correlated with rows and width of input tuples and rows and width of output tuples
J
Jesse Zhang 已提交
643 644 645 646 647 648
	CCost costLocal =
		CCost(pci->NumRebinds() *
			  (num_rows_outer * dWidthOuter * dTupDefaultProcCostUnit +
			   pci->Rows() * pci->Width() * dHashAggOutputTupWidthCostUnit));
	CCost costChild =
		CostChildren(mp, exprhdl, pci, pcmgpdb->GetCostModelParams());
E
Entong Shen 已提交
649 650 651 652 653 654 655 656 657 658 659 660 661
	return costLocal + costChild;
}


//---------------------------------------------------------------------------
//	@function:
//		CCostModelGPDB::CostSequence
//
//	@doc:
//		Cost of sequence
//
//---------------------------------------------------------------------------
CCost
J
Jesse Zhang 已提交
662 663 664
CCostModelGPDB::CostSequence(CMemoryPool *mp, CExpressionHandle &exprhdl,
							 const CCostModelGPDB *pcmgpdb,
							 const SCostingInfo *pci)
E
Entong Shen 已提交
665 666 667 668 669
{
	GPOS_ASSERT(NULL != pcmgpdb);
	GPOS_ASSERT(NULL != pci);
	GPOS_ASSERT(COperator::EopPhysicalSequence == exprhdl.Pop()->Eopid());

J
Jesse Zhang 已提交
670 671 672 673 674 675
	CCost costLocal = CCost(pci->NumRebinds() *
							CostTupleProcessing(pci->Rows(), pci->Width(),
												pcmgpdb->GetCostModelParams())
								.Get());
	CCost costChild =
		CostChildren(mp, exprhdl, pci, pcmgpdb->GetCostModelParams());
E
Entong Shen 已提交
676 677 678 679 680 681 682 683 684 685 686 687 688 689

	return costLocal + costChild;
}


//---------------------------------------------------------------------------
//	@function:
//		CCostModelGPDB::CostSort
//
//	@doc:
//		Cost of sort
//
//---------------------------------------------------------------------------
CCost
J
Jesse Zhang 已提交
690 691
CCostModelGPDB::CostSort(CMemoryPool *mp, CExpressionHandle &exprhdl,
						 const CCostModelGPDB *pcmgpdb, const SCostingInfo *pci)
E
Entong Shen 已提交
692 693 694 695 696 697
{
	GPOS_ASSERT(NULL != pcmgpdb);
	GPOS_ASSERT(NULL != pci);
	GPOS_ASSERT(COperator::EopPhysicalSort == exprhdl.Pop()->Eopid());

	// log operation below
698 699 700
	const CDouble rows = CDouble(std::max(1.0, pci->Rows()));
	const CDouble num_rebinds = CDouble(pci->NumRebinds());
	const CDouble width = CDouble(pci->Width());
E
Entong Shen 已提交
701

J
Jesse Zhang 已提交
702 703 704 705
	const CDouble dSortTupWidthCost =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpSortTupWidthCostUnit)
			->Get();
E
Entong Shen 已提交
706 707 708
	GPOS_ASSERT(0 < dSortTupWidthCost);

	// sort cost is correlated with the number of rows and width of input tuples. We use n*log(n) for sorting complexity.
J
Jesse Zhang 已提交
709 710 711 712
	CCost costLocal =
		CCost(num_rebinds * (rows * rows.Log2() * width * dSortTupWidthCost));
	CCost costChild =
		CostChildren(mp, exprhdl, pci, pcmgpdb->GetCostModelParams());
E
Entong Shen 已提交
713 714 715 716 717 718 719 720 721 722 723 724 725 726

	return costLocal + costChild;
}


//---------------------------------------------------------------------------
//	@function:
//		CCostModelGPDB::CostTVF
//
//	@doc:
//		Cost of table valued function
//
//---------------------------------------------------------------------------
CCost
J
Jesse Zhang 已提交
727 728
CCostModelGPDB::CostTVF(CMemoryPool *,	// mp
						CExpressionHandle &
E
Entong Shen 已提交
729
#ifdef GPOS_DEBUG
J
Jesse Zhang 已提交
730 731 732 733
							exprhdl
#endif	// GPOS_DEBUG
						,
						const CCostModelGPDB *pcmgpdb, const SCostingInfo *pci)
E
Entong Shen 已提交
734 735 736 737 738
{
	GPOS_ASSERT(NULL != pcmgpdb);
	GPOS_ASSERT(NULL != pci);
	GPOS_ASSERT(COperator::EopPhysicalTVF == exprhdl.Pop()->Eopid());

J
Jesse Zhang 已提交
739 740 741 742
	return CCost(pci->NumRebinds() *
				 CostTupleProcessing(pci->Rows(), pci->Width(),
									 pcmgpdb->GetCostModelParams())
					 .Get());
E
Entong Shen 已提交
743 744 745 746 747 748 749 750 751 752 753 754
}


//---------------------------------------------------------------------------
//	@function:
//		CCostModelGPDB::CostUnionAll
//
//	@doc:
//		Cost of UnionAll
//
//---------------------------------------------------------------------------
CCost
J
Jesse Zhang 已提交
755 756 757
CCostModelGPDB::CostUnionAll(CMemoryPool *mp, CExpressionHandle &exprhdl,
							 const CCostModelGPDB *pcmgpdb,
							 const SCostingInfo *pci)
E
Entong Shen 已提交
758 759 760
{
	GPOS_ASSERT(NULL != pcmgpdb);
	GPOS_ASSERT(NULL != pci);
761
	GPOS_ASSERT(NULL != CPhysicalUnionAll::PopConvert(exprhdl.Pop()));
E
Entong Shen 已提交
762

J
Jesse Zhang 已提交
763
	if (COperator::EopPhysicalParallelUnionAll == exprhdl.Pop()->Eopid())
764
	{
765
		return CostMaxChild(mp, exprhdl, pci, pcmgpdb->GetCostModelParams());
766 767
	}

J
Jesse Zhang 已提交
768 769 770 771 772 773
	CCost costLocal = CCost(pci->NumRebinds() *
							CostTupleProcessing(pci->Rows(), pci->Width(),
												pcmgpdb->GetCostModelParams())
								.Get());
	CCost costChild =
		CostChildren(mp, exprhdl, pci, pcmgpdb->GetCostModelParams());
E
Entong Shen 已提交
774 775 776 777 778 779 780 781 782 783 784 785 786 787

	return costLocal + costChild;
}


//---------------------------------------------------------------------------
//	@function:
//		CCostModelGPDB::CostHashAgg
//
//	@doc:
//		Cost of hash agg
//
//---------------------------------------------------------------------------
CCost
J
Jesse Zhang 已提交
788 789 790
CCostModelGPDB::CostHashAgg(CMemoryPool *mp, CExpressionHandle &exprhdl,
							const CCostModelGPDB *pcmgpdb,
							const SCostingInfo *pci)
E
Entong Shen 已提交
791 792 793 794 795
{
	GPOS_ASSERT(NULL != pcmgpdb);
	GPOS_ASSERT(NULL != pci);

#ifdef GPOS_DEBUG
796 797
	COperator::EOperatorId op_id = exprhdl.Pop()->Eopid();
	GPOS_ASSERT(COperator::EopPhysicalHashAgg == op_id ||
J
Jesse Zhang 已提交
798 799
				COperator::EopPhysicalHashAggDeduplicate == op_id);
#endif	// GPOS_DEBUG
E
Entong Shen 已提交
800

801
	DOUBLE num_rows_outer = pci->PdRows()[0];
E
Entong Shen 已提交
802 803 804 805 806 807 808 809 810 811 812

	// A local hash agg may stream partial aggregates to global agg when it's hash table is full to avoid spilling.
	// This is dertermined by the order of tuples received by local agg. In the worst case, the local hash agg may
	// see a tuple from each different group until its hash table fills up all available memory, and hence it produces
	// tuples as many as its input size. On the other hand, in the best case, the local agg may receive tuples sorted
	// by grouping columns, which allows it to complete all local aggregation in memory and produce exactly tuples as
	// the number of groups.
	//
	// Since we do not know the order of tuples received by local hash agg, we assume the number of output rows from local
	// agg is the average between input and output rows

813 814
	// the cardinality out as (rows + num_rows_outer)/2 to increase the local hash agg cost
	DOUBLE rows = pci->Rows();
E
Entong Shen 已提交
815
	CPhysicalHashAgg *popAgg = CPhysicalHashAgg::PopConvert(exprhdl.Pop());
J
Jesse Zhang 已提交
816 817
	if ((COperator::EgbaggtypeLocal == popAgg->Egbaggtype()) &&
		popAgg->FGeneratesDuplicates())
E
Entong Shen 已提交
818
	{
819
		rows = (rows + num_rows_outer) / 2.0;
E
Entong Shen 已提交
820 821 822
	}

	// get the number of grouping columns
J
Jesse Zhang 已提交
823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838
	const ULONG ulGrpCols = CPhysicalHashAgg::PopConvert(exprhdl.Pop())
								->PdrgpcrGroupingCols()
								->Size();

	const CDouble dHashAggInputTupColumnCostUnit =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpHashAggInputTupColumnCostUnit)
			->Get();
	const CDouble dHashAggInputTupWidthCostUnit =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpHashAggInputTupWidthCostUnit)
			->Get();
	const CDouble dHashAggOutputTupWidthCostUnit =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpHashAggOutputTupWidthCostUnit)
			->Get();
E
Entong Shen 已提交
839 840 841 842 843 844 845 846
	GPOS_ASSERT(0 < dHashAggInputTupColumnCostUnit);
	GPOS_ASSERT(0 < dHashAggInputTupWidthCostUnit);
	GPOS_ASSERT(0 < dHashAggOutputTupWidthCostUnit);

	// hashAgg cost contains three parts: build hash table, aggregate tuples, and output tuples.
	// 1. build hash table is correlated with the number of rows and width of input tuples and the number of columns used.
	// 2. cost of aggregate tuples depends on the complexity of aggregation algorithm and thus is ignored.
	// 3. cost of output tuples is correlated with rows and width of returning tuples.
J
Jesse Zhang 已提交
847 848 849 850 851 852 853 854
	CCost costLocal =
		CCost(pci->NumRebinds() *
			  (num_rows_outer * ulGrpCols * dHashAggInputTupColumnCostUnit +
			   num_rows_outer * ulGrpCols * pci->Width() *
				   dHashAggInputTupWidthCostUnit +
			   rows * pci->Width() * dHashAggOutputTupWidthCostUnit));
	CCost costChild =
		CostChildren(mp, exprhdl, pci, pcmgpdb->GetCostModelParams());
E
Entong Shen 已提交
855 856 857 858 859 860 861 862 863 864 865 866 867 868

	return costLocal + costChild;
}


//---------------------------------------------------------------------------
//	@function:
//		CCostModelGPDB::CostHashJoin
//
//	@doc:
//		Cost of hash join
//
//---------------------------------------------------------------------------
CCost
J
Jesse Zhang 已提交
869 870 871
CCostModelGPDB::CostHashJoin(CMemoryPool *mp, CExpressionHandle &exprhdl,
							 const CCostModelGPDB *pcmgpdb,
							 const SCostingInfo *pci)
E
Entong Shen 已提交
872 873 874 875
{
	GPOS_ASSERT(NULL != pcmgpdb);
	GPOS_ASSERT(NULL != pci);
#ifdef GPOS_DEBUG
876
	COperator::EOperatorId op_id = exprhdl.Pop()->Eopid();
J
Jesse Zhang 已提交
877 878 879 880 881 882
	GPOS_ASSERT(COperator::EopPhysicalInnerHashJoin == op_id ||
				COperator::EopPhysicalLeftSemiHashJoin == op_id ||
				COperator::EopPhysicalLeftAntiSemiHashJoin == op_id ||
				COperator::EopPhysicalLeftAntiSemiHashJoinNotIn == op_id ||
				COperator::EopPhysicalLeftOuterHashJoin == op_id);
#endif	// GPOS_DEBUG
E
Entong Shen 已提交
883

884 885
	const DOUBLE num_rows_outer = pci->PdRows()[0];
	const DOUBLE dWidthOuter = pci->GetWidth()[0];
E
Entong Shen 已提交
886
	const DOUBLE dRowsInner = pci->PdRows()[1];
887 888
	const DOUBLE dWidthInner = pci->GetWidth()[1];

J
Jesse Zhang 已提交
889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939
	const CDouble dHJHashTableInitCostFactor =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpHJHashTableInitCostFactor)
			->Get();
	const CDouble dHJHashTableColumnCostUnit =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpHJHashTableColumnCostUnit)
			->Get();
	const CDouble dHJHashTableWidthCostUnit =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpHJHashTableWidthCostUnit)
			->Get();
	const CDouble dJoinFeedingTupColumnCostUnit =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpJoinFeedingTupColumnCostUnit)
			->Get();
	const CDouble dJoinFeedingTupWidthCostUnit =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpJoinFeedingTupWidthCostUnit)
			->Get();
	const CDouble dHJHashingTupWidthCostUnit =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpHJHashingTupWidthCostUnit)
			->Get();
	const CDouble dJoinOutputTupCostUnit =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpJoinOutputTupCostUnit)
			->Get();
	const CDouble dHJSpillingMemThreshold =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpHJSpillingMemThreshold)
			->Get();
	const CDouble dHJFeedingTupColumnSpillingCostUnit =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(
				CCostModelParamsGPDB::EcpHJFeedingTupColumnSpillingCostUnit)
			->Get();
	const CDouble dHJFeedingTupWidthSpillingCostUnit =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(
				CCostModelParamsGPDB::EcpHJFeedingTupWidthSpillingCostUnit)
			->Get();
	const CDouble dHJHashingTupWidthSpillingCostUnit =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(
				CCostModelParamsGPDB::EcpHJHashingTupWidthSpillingCostUnit)
			->Get();
	const CDouble dPenalizeHJSkewUpperLimit =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpPenalizeHJSkewUpperLimit)
			->Get();
E
Entong Shen 已提交
940 941 942 943 944 945 946 947 948 949 950
	GPOS_ASSERT(0 < dHJHashTableInitCostFactor);
	GPOS_ASSERT(0 < dHJHashTableColumnCostUnit);
	GPOS_ASSERT(0 < dHJHashTableWidthCostUnit);
	GPOS_ASSERT(0 < dJoinFeedingTupColumnCostUnit);
	GPOS_ASSERT(0 < dJoinFeedingTupWidthCostUnit);
	GPOS_ASSERT(0 < dHJHashingTupWidthCostUnit);
	GPOS_ASSERT(0 < dJoinOutputTupCostUnit);
	GPOS_ASSERT(0 < dHJSpillingMemThreshold);
	GPOS_ASSERT(0 < dHJFeedingTupColumnSpillingCostUnit);
	GPOS_ASSERT(0 < dHJFeedingTupWidthSpillingCostUnit);
	GPOS_ASSERT(0 < dHJHashingTupWidthSpillingCostUnit);
951
	GPOS_ASSERT(0 < dPenalizeHJSkewUpperLimit);
E
Entong Shen 已提交
952 953

	// get the number of columns used in join condition
J
Jesse Zhang 已提交
954
	CExpression *pexprJoinCond = exprhdl.PexprScalarRepChild(2);
955
	CColRefSet *pcrsUsed = pexprJoinCond->DeriveUsedColumns();
956
	const ULONG ulColsUsed = pcrsUsed->Size();
E
Entong Shen 已提交
957 958 959 960 961

	// TODO 2014-03-14
	// currently, we hard coded a spilling memory threshold for judging whether hash join spills or not
	// In the future, we should calculate it based on the number of memory-intensive operators and statement memory available
	CCost costLocal(0);
962

J
Jesse Zhang 已提交
963
	// inner tuples fit in memory
E
Entong Shen 已提交
964
	if (dRowsInner * dWidthInner <= dHJSpillingMemThreshold)
965
	{
E
Entong Shen 已提交
966 967 968 969 970 971
		// hash join cost contains four parts:
		// 1. build hash table with inner tuples. This part is correlated with rows and width of
		// inner tuples and the number of columns used in join condition.
		// 2. feeding outer tuples. This part is correlated with rows and width of outer tuples
		// and the number of columns used.
		// 3. matching inner tuples. This part is correlated with rows and width of inner tuples.
972
		// 4. output tuples. This part is correlated with outer rows and width of the join result.
J
Jesse Zhang 已提交
973 974 975 976 977 978 979 980 981 982 983 984 985
		costLocal = CCost(
			pci->NumRebinds() *
			(
				// cost of building hash table
				dRowsInner * (ulColsUsed * dHJHashTableColumnCostUnit +
							  dWidthInner * dHJHashTableWidthCostUnit) +
				// cost of feeding outer tuples
				ulColsUsed * num_rows_outer * dJoinFeedingTupColumnCostUnit +
				dWidthOuter * num_rows_outer * dJoinFeedingTupWidthCostUnit +
				// cost of matching inner tuples
				dWidthInner * dRowsInner * dHJHashingTupWidthCostUnit +
				// cost of output tuples
				pci->Rows() * pci->Width() * dJoinOutputTupCostUnit));
E
Entong Shen 已提交
986 987 988
	}
	else
	{
J
Jesse Zhang 已提交
989
		// inner tuples spill
E
Entong Shen 已提交
990 991

		// hash join cost if spilling is the same as the non-spilling case, except that
992
		// parameter values are different.
J
Jesse Zhang 已提交
993 994 995 996 997 998 999 1000 1001
		costLocal = CCost(
			pci->NumRebinds() *
			(dHJHashTableInitCostFactor +
			 dRowsInner * (ulColsUsed * dHJHashTableColumnCostUnit +
						   dWidthInner * dHJHashTableWidthCostUnit) +
			 ulColsUsed * num_rows_outer * dHJFeedingTupColumnSpillingCostUnit +
			 dWidthOuter * num_rows_outer * dHJFeedingTupWidthSpillingCostUnit +
			 dWidthInner * dRowsInner * dHJHashingTupWidthSpillingCostUnit +
			 pci->Rows() * pci->Width() * dJoinOutputTupCostUnit));
E
Entong Shen 已提交
1002
	}
J
Jesse Zhang 已提交
1003 1004
	CCost costChild =
		CostChildren(mp, exprhdl, pci, pcmgpdb->GetCostModelParams());
E
Entong Shen 已提交
1005

1006 1007
	CDouble skew_ratio = 1;
	ULONG arity = exprhdl.Arity();
J
Jesse Zhang 已提交
1008

1009 1010
	// Hashjoin with skewed HashRedistribute below them are expensive
	// find out if there is a skewed redistribute child of this HashJoin.
J
Jesse Zhang 已提交
1011
	if (!GPOS_FTRACE(EopttracePenalizeSkewedHashJoin))
1012 1013 1014 1015
	{
		for (ULONG ul = 0; ul < arity - 1; ++ul)
		{
			COperator *popChild = exprhdl.Pop(ul);
J
Jesse Zhang 已提交
1016 1017
			if (NULL == popChild ||
				COperator::EopPhysicalMotionHashDistribute != popChild->Eopid())
1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041
			{
				continue;
			}

			CPhysicalMotion *motion = CPhysicalMotion::PopConvert(popChild);
			CColRefSet *columns = motion->Pds()->PcrsUsed(mp);

			// we decide if there is a skew by calculating the NDVs of the HashRedistribute
			CDouble ndv = 1.0;
			CColRefSetIter iter(*columns);
			while (iter.Advance())
			{
				CColRef *colref = iter.Pcr();
				ndv = ndv * pci->Pcstats(ul)->GetNDVs(colref);
			}

			// if the NDVs are less than number of segments then there is definitely
			// a skew. NDV < 1 implies no stats exist for the columns involved. So we don't
			// want to take any decision.
			// In case of a skew, penalize the local cost of HashJoin with a
			// skew ratio = (num of segments)/ndv
			if (ndv < pcmgpdb->UlHosts() && (ndv >= 1))
			{
				CDouble sk = pcmgpdb->UlHosts() / ndv;
J
Jesse Zhang 已提交
1042
				skew_ratio = CDouble(std::max(sk.Get(), skew_ratio.Get()));
1043 1044 1045 1046 1047 1048 1049 1050
			}

			columns->Release();
		}
	}

	// if we end up penalizing redistribute too much, we will start getting gather motions
	// which are not necessarily a good idea. So we maintain a upper limit of skew.
J
Jesse Zhang 已提交
1051 1052
	skew_ratio =
		CDouble(std::min(dPenalizeHJSkewUpperLimit.Get(), skew_ratio.Get()));
1053
	return costChild + CCost(costLocal.Get() * skew_ratio);
E
Entong Shen 已提交
1054 1055
}

1056 1057 1058 1059 1060 1061 1062 1063 1064
//---------------------------------------------------------------------------
//	@function:
//		CCostModelGPDB::CostMergeJoin
//
//	@doc:
//		Cost of merge join
//
//---------------------------------------------------------------------------
CCost
J
Jesse Zhang 已提交
1065 1066 1067
CCostModelGPDB::CostMergeJoin(CMemoryPool *mp, CExpressionHandle &exprhdl,
							  const CCostModelGPDB *pcmgpdb,
							  const SCostingInfo *pci)
1068 1069 1070 1071 1072 1073
{
	GPOS_ASSERT(NULL != pcmgpdb);
	GPOS_ASSERT(NULL != pci);
#ifdef GPOS_DEBUG
	COperator::EOperatorId op_id = exprhdl.Pop()->Eopid();
	GPOS_ASSERT(COperator::EopPhysicalFullMergeJoin == op_id);
J
Jesse Zhang 已提交
1074
#endif	// GPOS_DEBUG
1075 1076 1077 1078 1079 1080

	const DOUBLE num_rows_outer = pci->PdRows()[0];
	const DOUBLE dWidthOuter = pci->GetWidth()[0];
	const DOUBLE dRowsInner = pci->PdRows()[1];
	const DOUBLE dWidthInner = pci->GetWidth()[1];

J
Jesse Zhang 已提交
1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100
	const CDouble dJoinFeedingTupColumnCostUnit =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpJoinFeedingTupColumnCostUnit)
			->Get();
	const CDouble dJoinFeedingTupWidthCostUnit =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpJoinFeedingTupWidthCostUnit)
			->Get();
	const CDouble dJoinOutputTupCostUnit =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpJoinOutputTupCostUnit)
			->Get();
	const CDouble dFilterColCostUnit =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpFilterColCostUnit)
			->Get();
	const CDouble dOutputTupCostUnit =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpOutputTupCostUnit)
			->Get();
1101 1102 1103 1104 1105 1106 1107
	GPOS_ASSERT(0 < dJoinFeedingTupColumnCostUnit);
	GPOS_ASSERT(0 < dJoinFeedingTupWidthCostUnit);
	GPOS_ASSERT(0 < dJoinOutputTupCostUnit);
	GPOS_ASSERT(0 < dFilterColCostUnit);
	GPOS_ASSERT(0 < dOutputTupCostUnit);

	// get the number of columns used in join condition
J
Jesse Zhang 已提交
1108
	CExpression *pexprJoinCond = exprhdl.PexprScalarRepChild(2);
1109
	CColRefSet *pcrsUsed = pexprJoinCond->DeriveUsedColumns();
1110 1111 1112 1113
	const ULONG ulColsUsed = pcrsUsed->Size();

	// We assume for costing, that the outer tuples are unique. This means that
	// we will never have to rescan a portion of the inner side.
J
Jesse Zhang 已提交
1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128
	CCost costLocal = CCost(
		pci->NumRebinds() *
		(
			// feeding cost of outer
			ulColsUsed * num_rows_outer * dJoinFeedingTupColumnCostUnit +
			dWidthOuter * num_rows_outer * dJoinFeedingTupWidthCostUnit +
			// cost of matching
			(dRowsInner + num_rows_outer) * ulColsUsed * dFilterColCostUnit +
			// cost of extracting matched inner side
			pci->Rows() * dWidthInner * dOutputTupCostUnit +
			// cost of output tuples
			pci->Rows() * pci->Width() * dJoinOutputTupCostUnit));

	CCost costChild =
		CostChildren(mp, exprhdl, pci, pcmgpdb->GetCostModelParams());
1129 1130 1131 1132

	return costChild + costLocal;
}

E
Entong Shen 已提交
1133 1134 1135

//---------------------------------------------------------------------------
//	@function:
1136
//		CCostModelGPDB::CostIndexNLJoin
E
Entong Shen 已提交
1137 1138
//
//	@doc:
1139
//		Cost of inner or outer index-nljoin
E
Entong Shen 已提交
1140 1141 1142
//
//---------------------------------------------------------------------------
CCost
J
Jesse Zhang 已提交
1143 1144 1145
CCostModelGPDB::CostIndexNLJoin(CMemoryPool *mp, CExpressionHandle &exprhdl,
								const CCostModelGPDB *pcmgpdb,
								const SCostingInfo *pci)
E
Entong Shen 已提交
1146 1147 1148
{
	GPOS_ASSERT(NULL != pcmgpdb);
	GPOS_ASSERT(NULL != pci);
J
Jesse Zhang 已提交
1149 1150 1151
	GPOS_ASSERT(
		COperator::EopPhysicalInnerIndexNLJoin == exprhdl.Pop()->Eopid() ||
		COperator::EopPhysicalLeftOuterIndexNLJoin == exprhdl.Pop()->Eopid());
E
Entong Shen 已提交
1152

1153 1154
	const DOUBLE num_rows_outer = pci->PdRows()[0];
	const DOUBLE dWidthOuter = pci->GetWidth()[0];
E
Entong Shen 已提交
1155

J
Jesse Zhang 已提交
1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167
	const CDouble dJoinFeedingTupColumnCostUnit =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpJoinFeedingTupColumnCostUnit)
			->Get();
	const CDouble dJoinFeedingTupWidthCostUnit =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpJoinFeedingTupWidthCostUnit)
			->Get();
	const CDouble dJoinOutputTupCostUnit =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpJoinOutputTupCostUnit)
			->Get();
E
Entong Shen 已提交
1168 1169 1170 1171 1172
	GPOS_ASSERT(0 < dJoinFeedingTupColumnCostUnit);
	GPOS_ASSERT(0 < dJoinFeedingTupWidthCostUnit);
	GPOS_ASSERT(0 < dJoinOutputTupCostUnit);

	// get the number of columns used in join condition
J
Jesse Zhang 已提交
1173
	CExpression *pexprJoinCond = exprhdl.PexprScalarRepChild(2);
1174
	CColRefSet *pcrsUsed = pexprJoinCond->DeriveUsedColumns();
1175
	const ULONG ulColsUsed = pcrsUsed->Size();
E
Entong Shen 已提交
1176 1177 1178 1179 1180 1181 1182

	// cost of Index apply contains three parts:
	// 1. feeding outer tuples. This part is correlated with rows and width of outer tuples
	// and the number of columns used.
	// 2. repetitive index scan of inner side for each feeding tuple. This part of cost is
	// calculated and counted in its index scan child node.
	// 3. output tuples. This part is correlated with outer rows and width of the join result.
J
Jesse Zhang 已提交
1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193
	CCost costLocal =
		CCost(pci->NumRebinds() *
			  (
				  // cost of feeding outer tuples
				  ulColsUsed * num_rows_outer * dJoinFeedingTupColumnCostUnit +
				  dWidthOuter * num_rows_outer * dJoinFeedingTupWidthCostUnit +
				  // cost of output tuples
				  pci->Rows() * pci->Width() * dJoinOutputTupCostUnit));

	CCost costChild =
		CostChildren(mp, exprhdl, pci, pcmgpdb->GetCostModelParams());
E
Entong Shen 已提交
1194

1195
	ULONG risk = pci->Pcstats()->StatsEstimationRisk();
E
Entong Shen 已提交
1196 1197
	ULONG ulPenalizationFactor = 1;
	const CDouble dIndexJoinAllowedRiskThreshold =
J
Jesse Zhang 已提交
1198 1199 1200 1201 1202
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpIndexJoinAllowedRiskThreshold)
			->Get();
	BOOL fInnerJoin =
		COperator::EopPhysicalInnerIndexNLJoin == exprhdl.Pop()->Eopid();
1203 1204 1205 1206

	// Only apply penalize factor for inner index nestloop join, because we are more confident
	// on the cardinality estimation of outer join than inner join. So don't penalize outer join
	// cost, otherwise Orca generate bad plan.
1207
	if (fInnerJoin && dIndexJoinAllowedRiskThreshold < risk)
E
Entong Shen 已提交
1208
	{
1209
		ulPenalizationFactor = risk;
E
Entong Shen 已提交
1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224
	}

	return CCost(ulPenalizationFactor * (costLocal + costChild));
}


//---------------------------------------------------------------------------
//	@function:
//		CCostModelGPDB::CostNLJoin
//
//	@doc:
//		Cost of nljoin
//
//---------------------------------------------------------------------------
CCost
J
Jesse Zhang 已提交
1225 1226 1227
CCostModelGPDB::CostNLJoin(CMemoryPool *mp, CExpressionHandle &exprhdl,
						   const CCostModelGPDB *pcmgpdb,
						   const SCostingInfo *pci)
E
Entong Shen 已提交
1228 1229 1230 1231 1232
{
	GPOS_ASSERT(NULL != pcmgpdb);
	GPOS_ASSERT(NULL != pci);
	GPOS_ASSERT(CUtils::FNLJoin(exprhdl.Pop()));

1233 1234
	const DOUBLE num_rows_outer = pci->PdRows()[0];
	const DOUBLE dWidthOuter = pci->GetWidth()[0];
E
Entong Shen 已提交
1235
	const DOUBLE dRowsInner = pci->PdRows()[1];
1236
	const DOUBLE dWidthInner = pci->GetWidth()[1];
E
Entong Shen 已提交
1237

J
Jesse Zhang 已提交
1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269
	const CDouble dJoinFeedingTupColumnCostUnit =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpJoinFeedingTupColumnCostUnit)
			->Get();
	const CDouble dJoinFeedingTupWidthCostUnit =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpJoinFeedingTupWidthCostUnit)
			->Get();
	const CDouble dJoinOutputTupCostUnit =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpJoinOutputTupCostUnit)
			->Get();
	const CDouble dInitScan =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpInitScanFactor)
			->Get();
	const CDouble dTableScanCostUnit =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpTableScanCostUnit)
			->Get();
	const CDouble dFilterColCostUnit =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpFilterColCostUnit)
			->Get();
	const CDouble dOutputTupCostUnit =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpOutputTupCostUnit)
			->Get();
	const CDouble dNLJFactor =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpNLJFactor)
			->Get();
E
Entong Shen 已提交
1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280

	GPOS_ASSERT(0 < dJoinFeedingTupColumnCostUnit);
	GPOS_ASSERT(0 < dJoinFeedingTupWidthCostUnit);
	GPOS_ASSERT(0 < dJoinOutputTupCostUnit);
	GPOS_ASSERT(0 < dInitScan);
	GPOS_ASSERT(0 < dTableScanCostUnit);
	GPOS_ASSERT(0 < dFilterColCostUnit);
	GPOS_ASSERT(0 < dOutputTupCostUnit);
	GPOS_ASSERT(0 < dNLJFactor);

	// get the number of columns used in join condition
J
Jesse Zhang 已提交
1281
	CExpression *pexprJoinCond = exprhdl.PexprScalarRepChild(2);
1282
	CColRefSet *pcrsUsed = pexprJoinCond->DeriveUsedColumns();
1283
	const ULONG ulColsUsed = pcrsUsed->Size();
E
Entong Shen 已提交
1284 1285 1286 1287 1288 1289 1290 1291 1292 1293

	// cost of nested loop join contains three parts:
	// 1. feeding outer tuples. This part is correlated with rows and width of outer tuples
	// and the number of columns used.
	// 2. repetitive scan of inner side for each feeding tuple. This part of cost consists of
	// the following:
	// a. repetitive scan and filter of the materialized inner side
	// b. extract matched inner side tuples
	// with the cardinality of outer tuples, rows and width of the materialized inner side.
	// 3. output tuples. This part is correlated with outer rows and width of the join result.
J
Jesse Zhang 已提交
1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306
	CCost costLocal = CCost(
		pci->NumRebinds() *
		(
			// cost of feeding outer tuples
			ulColsUsed * num_rows_outer * dJoinFeedingTupColumnCostUnit +
			dWidthOuter * num_rows_outer * dJoinFeedingTupWidthCostUnit +
			// cost of repetitive table scan of inner side
			dInitScan +
			num_rows_outer * (
								 // cost of scan of inner side
								 dRowsInner * dWidthInner * dTableScanCostUnit +
								 // cost of filter of inner side
								 dRowsInner * ulColsUsed * dFilterColCostUnit)
E
Entong Shen 已提交
1307
			// cost of extracting matched inner side
J
Jesse Zhang 已提交
1308 1309 1310
			+ pci->Rows() * dWidthInner * dOutputTupCostUnit +
			// cost of output tuples
			pci->Rows() * pci->Width() * dJoinOutputTupCostUnit));
E
Entong Shen 已提交
1311

J
Jesse Zhang 已提交
1312 1313
	CCost costChild =
		CostChildren(mp, exprhdl, pci, pcmgpdb->GetCostModelParams());
E
Entong Shen 已提交
1314 1315 1316 1317 1318 1319 1320 1321

	CCost costTotal = CCost(costLocal + costChild);

	// amplify NLJ cost based on NLJ factor and stats estimation risk
	// we don't want to penalize index join compared to nested loop join, so we make sure
	// that every time index join is penalized, we penalize nested loop join by at least the
	// same amount
	CDouble dPenalization = dNLJFactor;
1322
	const CDouble dRisk(pci->Pcstats()->StatsEstimationRisk());
E
Entong Shen 已提交
1323 1324 1325
	if (dRisk > dPenalization)
	{
		const CDouble dIndexJoinAllowedRiskThreshold =
J
Jesse Zhang 已提交
1326 1327 1328 1329
			pcmgpdb->GetCostModelParams()
				->PcpLookup(
					CCostModelParamsGPDB::EcpIndexJoinAllowedRiskThreshold)
				->Get();
E
Entong Shen 已提交
1330 1331 1332 1333 1334 1335
		if (dIndexJoinAllowedRiskThreshold < dRisk)
		{
			dPenalization = dRisk;
		}
	}

1336
	return CCost(costTotal * dPenalization);
E
Entong Shen 已提交
1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348
}


//---------------------------------------------------------------------------
//	@function:
//		CCostModelGPDB::CostMotion
//
//	@doc:
//		Cost of motion
//
//---------------------------------------------------------------------------
CCost
J
Jesse Zhang 已提交
1349 1350 1351
CCostModelGPDB::CostMotion(CMemoryPool *mp, CExpressionHandle &exprhdl,
						   const CCostModelGPDB *pcmgpdb,
						   const SCostingInfo *pci)
E
Entong Shen 已提交
1352 1353 1354 1355
{
	GPOS_ASSERT(NULL != pcmgpdb);
	GPOS_ASSERT(NULL != pci);

1356 1357
	COperator::EOperatorId op_id = exprhdl.Pop()->Eopid();
	GPOS_ASSERT(COperator::EopPhysicalMotionGather == op_id ||
J
Jesse Zhang 已提交
1358 1359 1360 1361
				COperator::EopPhysicalMotionBroadcast == op_id ||
				COperator::EopPhysicalMotionHashDistribute == op_id ||
				COperator::EopPhysicalMotionRandom == op_id ||
				COperator::EopPhysicalMotionRoutedDistribute == op_id);
E
Entong Shen 已提交
1362

1363 1364
	const DOUBLE num_rows_outer = pci->PdRows()[0];
	const DOUBLE dWidthOuter = pci->GetWidth()[0];
1365

E
Entong Shen 已提交
1366 1367
	// motion cost contains three parts: sending cost, interconnect cost, and receiving cost.
	// TODO 2014-03-18
1368 1369
	// in current cost model, interconnect cost is tied with receiving cost. Because we
	// only have one set calibration results in the dimension of the number of segments.
E
Entong Shen 已提交
1370 1371 1372
	// Once we calibrate the cost model with different number of segments, I will update
	// the function.

1373 1374 1375 1376
	CDouble dSendCostUnit(0);
	CDouble dRecvCostUnit(0);
	CDouble recvCost(0);

E
Entong Shen 已提交
1377
	CCost costLocal(0);
1378
	if (COperator::EopPhysicalMotionBroadcast == op_id)
E
Entong Shen 已提交
1379 1380
	{
		// broadcast cost is amplified by the number of segments
J
Jesse Zhang 已提交
1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391
		dSendCostUnit =
			pcmgpdb->GetCostModelParams()
				->PcpLookup(CCostModelParamsGPDB::EcpBroadcastSendCostUnit)
				->Get();
		dRecvCostUnit =
			pcmgpdb->GetCostModelParams()
				->PcpLookup(CCostModelParamsGPDB::EcpBroadcastRecvCostUnit)
				->Get();

		recvCost =
			num_rows_outer * dWidthOuter * pcmgpdb->UlHosts() * dRecvCostUnit;
E
Entong Shen 已提交
1392
	}
1393
	else if (COperator::EopPhysicalMotionHashDistribute == op_id ||
J
Jesse Zhang 已提交
1394 1395
			 COperator::EopPhysicalMotionRandom == op_id ||
			 COperator::EopPhysicalMotionRoutedDistribute == op_id)
E
Entong Shen 已提交
1396
	{
J
Jesse Zhang 已提交
1397 1398 1399 1400 1401 1402 1403 1404
		dSendCostUnit =
			pcmgpdb->GetCostModelParams()
				->PcpLookup(CCostModelParamsGPDB::EcpRedistributeSendCostUnit)
				->Get();
		dRecvCostUnit =
			pcmgpdb->GetCostModelParams()
				->PcpLookup(CCostModelParamsGPDB::EcpRedistributeRecvCostUnit)
				->Get();
E
Entong Shen 已提交
1405

1406 1407 1408 1409 1410 1411
		// Adjust the cost of no-op hashed distribution to correctly reflect that no tuple movement is needed
		CPhysicalMotion *pMotion = CPhysicalMotion::PopConvert(exprhdl.Pop());
		CDistributionSpec *pds = pMotion->Pds();
		if (CDistributionSpec::EdtHashedNoOp == pds->Edt())
		{
			// promote the plan with redistribution on same distributed columns of base table for parallel append
J
Jesse Zhang 已提交
1412 1413 1414 1415 1416 1417 1418 1419
			dSendCostUnit =
				pcmgpdb->GetCostModelParams()
					->PcpLookup(CCostModelParamsGPDB::EcpNoOpCostUnit)
					->Get();
			dRecvCostUnit =
				pcmgpdb->GetCostModelParams()
					->PcpLookup(CCostModelParamsGPDB::EcpNoOpCostUnit)
					->Get();
1420 1421
		}

1422
		recvCost = pci->Rows() * pci->Width() * dRecvCostUnit;
E
Entong Shen 已提交
1423
	}
1424
	else if (COperator::EopPhysicalMotionGather == op_id)
E
Entong Shen 已提交
1425
	{
J
Jesse Zhang 已提交
1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436
		dSendCostUnit =
			pcmgpdb->GetCostModelParams()
				->PcpLookup(CCostModelParamsGPDB::EcpGatherSendCostUnit)
				->Get();
		dRecvCostUnit =
			pcmgpdb->GetCostModelParams()
				->PcpLookup(CCostModelParamsGPDB::EcpGatherRecvCostUnit)
				->Get();

		recvCost =
			num_rows_outer * dWidthOuter * pcmgpdb->UlHosts() * dRecvCostUnit;
E
Entong Shen 已提交
1437
	}
1438 1439 1440 1441

	GPOS_ASSERT(0 <= dSendCostUnit);
	GPOS_ASSERT(0 <= dRecvCostUnit);

J
Jesse Zhang 已提交
1442 1443 1444
	costLocal =
		CCost(pci->NumRebinds() *
			  (num_rows_outer * dWidthOuter * dSendCostUnit + recvCost));
1445

1446

J
Jesse Zhang 已提交
1447
	if (COperator::EopPhysicalMotionBroadcast == op_id)
1448
	{
J
Jesse Zhang 已提交
1449 1450 1451 1452
		COptimizerConfig *optimizer_config =
			COptCtxt::PoctxtFromTLS()->GetOptimizerConfig();
		ULONG broadcast_threshold =
			optimizer_config->GetHint()->UlBroadcastThreshold();
1453

J
Jesse Zhang 已提交
1454
		if (num_rows_outer > broadcast_threshold)
1455
		{
1456 1457
			DOUBLE ulPenalizationFactor = 100000000000000.0;
			costLocal = CCost(ulPenalizationFactor);
1458 1459 1460 1461
		}
	}


J
Jesse Zhang 已提交
1462 1463
	CCost costChild =
		CostChildren(mp, exprhdl, pci, pcmgpdb->GetCostModelParams());
E
Entong Shen 已提交
1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477

	return costLocal + costChild;
}


//---------------------------------------------------------------------------
//	@function:
//		CCostModelGPDB::CostSequenceProject
//
//	@doc:
//		Cost of sequence project
//
//---------------------------------------------------------------------------
CCost
J
Jesse Zhang 已提交
1478 1479 1480
CCostModelGPDB::CostSequenceProject(CMemoryPool *mp, CExpressionHandle &exprhdl,
									const CCostModelGPDB *pcmgpdb,
									const SCostingInfo *pci)
E
Entong Shen 已提交
1481 1482 1483
{
	GPOS_ASSERT(NULL != pcmgpdb);
	GPOS_ASSERT(NULL != pci);
J
Jesse Zhang 已提交
1484 1485
	GPOS_ASSERT(COperator::EopPhysicalSequenceProject ==
				exprhdl.Pop()->Eopid());
E
Entong Shen 已提交
1486

1487 1488
	const DOUBLE num_rows_outer = pci->PdRows()[0];
	const DOUBLE dWidthOuter = pci->GetWidth()[0];
E
Entong Shen 已提交
1489 1490

	ULONG ulSortCols = 0;
J
Jesse Zhang 已提交
1491 1492
	COrderSpecArray *pdrgpos =
		CPhysicalSequenceProject::PopConvert(exprhdl.Pop())->Pdrgpos();
1493
	const ULONG ulOrderSpecs = pdrgpos->Size();
E
Entong Shen 已提交
1494 1495 1496 1497 1498 1499
	for (ULONG ul = 0; ul < ulOrderSpecs; ul++)
	{
		COrderSpec *pos = (*pdrgpos)[ul];
		ulSortCols += pos->UlSortColumns();
	}

J
Jesse Zhang 已提交
1500 1501 1502 1503
	const CDouble dTupDefaultProcCostUnit =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpTupDefaultProcCostUnit)
			->Get();
E
Entong Shen 已提交
1504 1505 1506
	GPOS_ASSERT(0 < dTupDefaultProcCostUnit);

	// we process (sorted window of) input tuples to compute window function values
J
Jesse Zhang 已提交
1507 1508 1509 1510 1511
	CCost costLocal =
		CCost(pci->NumRebinds() * (ulSortCols * num_rows_outer * dWidthOuter *
								   dTupDefaultProcCostUnit));
	CCost costChild =
		CostChildren(mp, exprhdl, pci, pcmgpdb->GetCostModelParams());
E
Entong Shen 已提交
1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525

	return costLocal + costChild;
}


//---------------------------------------------------------------------------
//	@function:
//		CCostModelGPDB::CostIndexScan
//
//	@doc:
//		Cost of index scan
//
//---------------------------------------------------------------------------
CCost
J
Jesse Zhang 已提交
1526 1527 1528 1529
CCostModelGPDB::CostIndexScan(CMemoryPool *,  // mp
							  CExpressionHandle &exprhdl,
							  const CCostModelGPDB *pcmgpdb,
							  const SCostingInfo *pci)
E
Entong Shen 已提交
1530 1531 1532 1533 1534
{
	GPOS_ASSERT(NULL != pcmgpdb);
	GPOS_ASSERT(NULL != pci);

	COperator *pop = exprhdl.Pop();
1535 1536
	COperator::EOperatorId op_id = pop->Eopid();
	GPOS_ASSERT(COperator::EopPhysicalIndexScan == op_id ||
J
Jesse Zhang 已提交
1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553
				COperator::EopPhysicalDynamicIndexScan == op_id);

	const CDouble dTableWidth =
		CPhysicalScan::PopConvert(pop)->PstatsBaseTable()->Width();

	const CDouble dIndexFilterCostUnit =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpIndexFilterCostUnit)
			->Get();
	const CDouble dIndexScanTupCostUnit =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpIndexScanTupCostUnit)
			->Get();
	const CDouble dIndexScanTupRandomFactor =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpIndexScanTupRandomFactor)
			->Get();
E
Entong Shen 已提交
1554 1555 1556 1557
	GPOS_ASSERT(0 < dIndexFilterCostUnit);
	GPOS_ASSERT(0 < dIndexScanTupCostUnit);
	GPOS_ASSERT(0 < dIndexScanTupRandomFactor);

1558
	CDouble dRowsIndex = pci->Rows();
E
Entong Shen 已提交
1559 1560

	ULONG ulIndexKeys = 1;
1561
	if (COperator::EopPhysicalIndexScan == op_id)
E
Entong Shen 已提交
1562
	{
1563
		ulIndexKeys = CPhysicalIndexScan::PopConvert(pop)->Pindexdesc()->Keys();
E
Entong Shen 已提交
1564 1565 1566
	}
	else
	{
J
Jesse Zhang 已提交
1567 1568
		ulIndexKeys =
			CPhysicalDynamicIndexScan::PopConvert(pop)->Pindexdesc()->Keys();
E
Entong Shen 已提交
1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579
	}

	// TODO: 2014-02-01
	// Add logic to judge if the index column used in the filter is the first key of a multi-key index or not.
	// and separate the cost functions for the two cases.

	// index scan cost contains two parts: index-column lookup and output tuple cost.
	// 1. index-column lookup: correlated with index lookup rows, the number of index columns used in lookup,
	// table width and a randomIOFactor.
	// 2. output tuple cost: this is handled by the Filter on top of IndexScan, if no Filter exists, we add output cost
	// when we sum-up children cost
S
Sambitesh Dash 已提交
1580

J
Jesse Zhang 已提交
1581 1582 1583 1584
	CDouble dCostPerIndexRow = ulIndexKeys * dIndexFilterCostUnit +
							   dTableWidth * dIndexScanTupCostUnit;
	return CCost(pci->NumRebinds() *
				 (dRowsIndex * dCostPerIndexRow + dIndexScanTupRandomFactor));
E
Entong Shen 已提交
1585 1586 1587
}


1588
CCost
J
Jesse Zhang 已提交
1589 1590 1591 1592 1593
CCostModelGPDB::CostIndexOnlyScan(CMemoryPool *,		   // mp
								  CExpressionHandle &,	   //exprhdl
								  const CCostModelGPDB *,  // pcmgpdb
								  const SCostingInfo *pci  //pci
)
1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604
{
	// FIXME: Gather relation's visibility map statistics and use that info to
	// create a cost model. When a block is visible then the scan can rely
	// solely on the values stored in the index and does not have to open the
	// corresponding heap page of the relation. If the blocks are not visible
	// then there is no benefit over an index scan and you pay overhead of
	// looking at visibility map.
	pci->NumRebinds();
	return CCost(std::numeric_limits<double>::max());
}

E
Entong Shen 已提交
1605
CCost
J
Jesse Zhang 已提交
1606 1607 1608
CCostModelGPDB::CostBitmapTableScan(CMemoryPool *mp, CExpressionHandle &exprhdl,
									const CCostModelGPDB *pcmgpdb,
									const SCostingInfo *pci)
E
Entong Shen 已提交
1609 1610 1611
{
	GPOS_ASSERT(NULL != pcmgpdb);
	GPOS_ASSERT(NULL != pci);
J
Jesse Zhang 已提交
1612 1613 1614
	GPOS_ASSERT(
		COperator::EopPhysicalBitmapTableScan == exprhdl.Pop()->Eopid() ||
		COperator::EopPhysicalDynamicBitmapTableScan == exprhdl.Pop()->Eopid());
E
Entong Shen 已提交
1615

1616
	CCost result(0.0);
J
Jesse Zhang 已提交
1617 1618
	CExpression *pexprIndexCond =
		exprhdl.PexprScalarRepChild(1 /*child_index*/);
1619
	CColRefSet *pcrsUsed = pexprIndexCond->DeriveUsedColumns();
1620
	CColRefSet *outerRefs = exprhdl.DeriveOuterReferences();
1621
	CColRefSet *pcrsLocalUsed = GPOS_NEW(mp) CColRefSet(mp, *pcrsUsed);
1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633
	IMDIndex::EmdindexType indexType = IMDIndex::EmdindSentinel;

	if (COperator::EopScalarBitmapIndexProbe == pexprIndexCond->Pop()->Eopid())
	{
		indexType = CScalarBitmapIndexProbe::PopConvert(pexprIndexCond->Pop())
						->Pindexdesc()
						->IndexType();
	}

	BOOL isInPredOnBtreeIndex =
		(IMDIndex::EmdindBtree == indexType &&
		 COperator::EopScalarArrayCmp == (*pexprIndexCond)[0]->Pop()->Eopid());
E
Entong Shen 已提交
1634

1635 1636 1637 1638
	// subtract outer references from the used colrefs, so we can see
	// how many colrefs are used for this table
	pcrsLocalUsed->Exclude(outerRefs);

1639 1640
	const DOUBLE rows = pci->Rows();
	const DOUBLE width = pci->Width();
J
Jesse Zhang 已提交
1641 1642 1643 1644 1645 1646 1647
	CDouble dInitRebind =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpBitmapScanRebindCost)
			->Get();

	if (COperator::EopScalarBitmapIndexProbe !=
			pexprIndexCond->Pop()->Eopid() ||
1648 1649 1650
		1 < pcrsLocalUsed->Size() ||
		(isInPredOnBtreeIndex && rows > 2.0 &&
		 !GPOS_FTRACE(EopttraceCalibratedBitmapIndexCostModel)))
E
Entong Shen 已提交
1651
	{
1652 1653 1654 1655 1656 1657 1658
		// Child is Bitmap AND/OR, or we use Multi column index or this is an IN predicate
		// that's used with the "calibrated" cost model.
		// Handling the IN predicate in this code path is to avoid plan regressions from
		// earlier versions of the code that treated IN predicates like ORs and therefore
		// also handled them in this code path. This is especially noticeable for btree
		// indexes that often have a high NDV, because the small/large NDV cost model
		// produces very high cost for cases with a higher NDV.
J
Jesse Zhang 已提交
1659 1660 1661 1662
		const CDouble dIndexFilterCostUnit =
			pcmgpdb->GetCostModelParams()
				->PcpLookup(CCostModelParamsGPDB::EcpIndexFilterCostUnit)
				->Get();
1663 1664

		GPOS_ASSERT(0 < dIndexFilterCostUnit);
1665 1666

		// check whether the user specified overriding values in gucs
J
Jesse Zhang 已提交
1667 1668 1669 1670
		CDouble dInitScan =
			pcmgpdb->GetCostModelParams()
				->PcpLookup(CCostModelParamsGPDB::EcpInitScanFactor)
				->Get();
1671

1672
		GPOS_ASSERT(dInitScan >= 0 && dInitRebind >= 0);
1673 1674 1675

		// For now we are trying to cost Bitmap Scan similar to Index Scan. dIndexFilterCostUnit is
		// the dominant factor in costing Index Scan so we are using it in our model. Also we are giving
1676 1677 1678
		// Bitmap Scan a start up cost similar to Sequential Scan. Note that in this code path we add the
		// relatively high dInitScan cost, while in the other code paths below (CostBitmapLargeNDV and
		// CostBitmapSmallNDV) we don't. That's something we should look into.
E
Entong Shen 已提交
1679

1680 1681
		// Conceptually the cost of evaluating index qual is also linear in the
		// number of index columns, but we're only accounting for the dominant cost
E
Entong Shen 已提交
1682

J
Jesse Zhang 已提交
1683 1684 1685 1686 1687 1688
		result =
			CCost(	// cost for each byte returned by the index scan plus cost for incremental rebinds
				pci->NumRebinds() *
					(rows * width * dIndexFilterCostUnit + dInitRebind) +
				// init cost
				dInitScan);
1689
	}
1690
	else
E
Entong Shen 已提交
1691
	{
1692 1693 1694
		// if the expression is const table get, the pcrsUsed is empty
		// so we use minimum value MinDistinct for dNDV in that case.
		CDouble dNDV = CHistogram::MinDistinct;
1695 1696 1697 1698 1699
		CDouble dNDVThreshold =
			pcmgpdb->GetCostModelParams()
				->PcpLookup(CCostModelParamsGPDB::EcpBitmapNDVThreshold)
				->Get();

1700 1701 1702 1703 1704
		if (rows < 1.0)
		{
			// if we aren't accessing a row every rebind, then don't charge a cost for those cases where we don't have a row
			dNDV = rows;
		}
J
Jesse Zhang 已提交
1705
		else if (1 == pcrsLocalUsed->Size())  // if you only have one local pred
1706
		{
J
Jesse Zhang 已提交
1707
			CColRef *pcrIndexCond = pcrsLocalUsed->PcrFirst();
1708
			GPOS_ASSERT(NULL != pcrIndexCond);
1709
			// get the num distinct for the rows returned by the predicate
1710
			dNDV = pci->Pcstats()->GetNDVs(pcrIndexCond);
1711
			// if there's an outerref
J
Jesse Zhang 已提交
1712 1713
			if (1 < pcrsUsed->Size() && COperator::EopScalarBitmapIndexProbe ==
											pexprIndexCond->Pop()->Eopid())
1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724
			{
				CExpression *pexprScalarCmp = (*pexprIndexCond)[0];
				// The index condition contains an outer reference. Adjust
				// the NDV to 1, if we compare the colref with a single value
				if (CPredicateUtils::IsEqualityOp(pexprScalarCmp))
				{
					dNDV = 1.0;
				}
			}
		}

J
Jesse Zhang 已提交
1725
		if (!GPOS_FTRACE(EopttraceCalibratedBitmapIndexCostModel))
1726
		{
1727
			// optimizer_cost_model = 'calibrated'
1728 1729 1730 1731 1732 1733 1734 1735
			if (dNDVThreshold <= dNDV)
			{
				result = CostBitmapLargeNDV(pcmgpdb, pci, dNDV);
			}
			else
			{
				result = CostBitmapSmallNDV(pcmgpdb, pci, dNDV);
			}
1736 1737 1738
		}
		else
		{
1739
			// optimizer_cost_model = 'experimental'
J
Jesse Zhang 已提交
1740 1741 1742 1743
			CDouble dBitmapIO =
				pcmgpdb->GetCostModelParams()
					->PcpLookup(CCostModelParamsGPDB::EcpBitmapIOCostSmallNDV)
					->Get();
1744
			CDouble c5_dInitScan =
J
Jesse Zhang 已提交
1745 1746 1747
				pcmgpdb->GetCostModelParams()
					->PcpLookup(CCostModelParamsGPDB::EcpInitScanFactor)
					->Get();
1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760
			CDouble c3_dBitmapPageCost =
				pcmgpdb->GetCostModelParams()
					->PcpLookup(CCostModelParamsGPDB::EcpBitmapPageCost)
					->Get();
			BOOL isAOTable = CPhysicalScan::PopConvert(exprhdl.Pop())
								 ->Ptabdesc()
								 ->IsAORowOrColTable();

			// some cost constants determined with the cal_bitmap_test.py script
			CDouble c1_cost_per_row(0.03);
			CDouble c2_cost_per_byte(0.0001);
			CDouble bitmap_union_cost_per_distinct_value(0.000027);
			CDouble init_cost_advantage_for_bitmap_scan(0.9);
J
Jesse Zhang 已提交
1761

1762
			if (IMDIndex::EmdindBtree == indexType)
1763
			{
1764 1765
				// btree indexes are not sensitive to the NDV, since they don't have any bitmaps
				c3_dBitmapPageCost = 0.0;
1766
			}
1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781

			// Give the index scan a small initial advantage over the table scan, so we use indexes
			// for small tables - this should avoid having table scan and index scan costs being
			// very close together for many small queries.
			c5_dInitScan = c5_dInitScan * init_cost_advantage_for_bitmap_scan;

			// The numbers below were experimentally determined using regression analysis in the cal_bitmap_test.py script
			// The following dSizeCost is in the form C1 * rows + C2 * rows * width. This is because the width should have
			// significantly less weight than rows as the execution time does not grow as fast in regards to width
			CDouble dSizeCost = dBitmapIO * (rows * c1_cost_per_row +
											 rows * width * c2_cost_per_byte);

			CDouble bitmapUnionCost = 0;

			if (!isAOTable && indexType == IMDIndex::EmdindBitmap && dNDV > 1.0)
1782
			{
1783 1784 1785 1786 1787 1788 1789 1790 1791 1792
				CDouble baseTableRows = CPhysicalScan::PopConvert(exprhdl.Pop())
											->PstatsBaseTable()
											->Rows();

				// for bitmap index scans on heap tables, we found that there is an additional cost
				// associated with unioning them that is proportional to the number of bitmaps involved
				// (dNDV-1) times the width of the bitmap (proportional to the number of rows in the table)
				bitmapUnionCost = std::max(0.0, dNDV.Get() - 1.0) *
								  baseTableRows *
								  bitmap_union_cost_per_distinct_value;
1793
			}
1794 1795 1796 1797 1798

			result = CCost(pci->NumRebinds() *
							   (dSizeCost + dNDV * c3_dBitmapPageCost +
								dInitRebind + bitmapUnionCost) +
						   c5_dInitScan);
1799
		}
E
Entong Shen 已提交
1800 1801
	}

1802 1803 1804
	pcrsLocalUsed->Release();

	return result;
E
Entong Shen 已提交
1805 1806 1807 1808
}


CCost
J
Jesse Zhang 已提交
1809 1810
CCostModelGPDB::CostBitmapSmallNDV(const CCostModelGPDB *pcmgpdb,
								   const SCostingInfo *pci, CDouble dNDV)
E
Entong Shen 已提交
1811
{
1812 1813
	const DOUBLE rows = pci->Rows();
	const DOUBLE width = pci->Width();
E
Entong Shen 已提交
1814

1815
	CDouble dSize = (rows * width) * 0.001;
E
Entong Shen 已提交
1816

J
Jesse Zhang 已提交
1817 1818 1819 1820 1821 1822 1823 1824
	CDouble dBitmapIO =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpBitmapIOCostSmallNDV)
			->Get();
	CDouble dBitmapPageCost =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpBitmapPageCostSmallNDV)
			->Get();
1825 1826 1827 1828 1829 1830 1831
	CDouble effectiveNDV = dNDV;

	if (rows < 1.0)
	{
		// if we aren't accessing a row every rebind, then don't charge a cost for those cases where we don't have a row
		effectiveNDV = rows;
	}
E
Entong Shen 已提交
1832

J
Jesse Zhang 已提交
1833 1834
	return CCost(pci->NumRebinds() *
				 (dBitmapIO * dSize + dBitmapPageCost * effectiveNDV));
E
Entong Shen 已提交
1835 1836 1837 1838
}


CCost
J
Jesse Zhang 已提交
1839 1840
CCostModelGPDB::CostBitmapLargeNDV(const CCostModelGPDB *pcmgpdb,
								   const SCostingInfo *pci, CDouble dNDV)
E
Entong Shen 已提交
1841
{
1842 1843
	const DOUBLE rows = pci->Rows();
	const DOUBLE width = pci->Width();
E
Entong Shen 已提交
1844

1845
	CDouble dSize = (rows * width * dNDV) * 0.001;
J
Jesse Zhang 已提交
1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856
	CDouble dBitmapIO =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpBitmapIOCostLargeNDV)
			->Get();
	CDouble dBitmapPageCost =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpBitmapPageCostLargeNDV)
			->Get();

	return CCost(pci->NumRebinds() *
				 (dBitmapIO * dSize + dBitmapPageCost * dNDV));
E
Entong Shen 已提交
1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867
}

//---------------------------------------------------------------------------
//	@function:
//		CCostModelGPDB::CostScan
//
//	@doc:
//		Cost of scan
//
//---------------------------------------------------------------------------
CCost
J
Jesse Zhang 已提交
1868 1869 1870
CCostModelGPDB::CostScan(CMemoryPool *,	 // mp
						 CExpressionHandle &exprhdl,
						 const CCostModelGPDB *pcmgpdb, const SCostingInfo *pci)
E
Entong Shen 已提交
1871 1872 1873 1874 1875
{
	GPOS_ASSERT(NULL != pcmgpdb);
	GPOS_ASSERT(NULL != pci);

	COperator *pop = exprhdl.Pop();
1876 1877 1878 1879
	COperator::EOperatorId op_id = pop->Eopid();
	GPOS_ASSERT(COperator::EopPhysicalTableScan == op_id ||
				COperator::EopPhysicalDynamicTableScan == op_id ||
				COperator::EopPhysicalExternalScan == op_id);
E
Entong Shen 已提交
1880

J
Jesse Zhang 已提交
1881 1882 1883 1884 1885 1886
	const CDouble dInitScan =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpInitScanFactor)
			->Get();
	const CDouble dTableWidth =
		CPhysicalScan::PopConvert(pop)->PstatsBaseTable()->Width();
1887

E
Entong Shen 已提交
1888
	// Get total rows for each host to scan
J
Jesse Zhang 已提交
1889 1890 1891 1892
	const CDouble dTableScanCostUnit =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpTableScanCostUnit)
			->Get();
E
Entong Shen 已提交
1893 1894
	GPOS_ASSERT(0 < dTableScanCostUnit);

1895
	switch (op_id)
E
Entong Shen 已提交
1896 1897 1898 1899 1900 1901 1902 1903
	{
		case COperator::EopPhysicalTableScan:
		case COperator::EopPhysicalDynamicTableScan:
		case COperator::EopPhysicalExternalScan:
			// table scan cost considers only retrieving tuple cost,
			// since we scan the entire table here, the cost is correlated with table rows and table width,
			// since Scan's parent operator may be a filter that will be pushed into Scan node in GPDB plan,
			// we add Scan output tuple cost in the parent operator and not here
J
Jesse Zhang 已提交
1904 1905 1906
			return CCost(
				pci->NumRebinds() *
				(dInitScan + pci->Rows() * dTableWidth * dTableScanCostUnit));
E
Entong Shen 已提交
1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922
		default:
			GPOS_ASSERT(!"invalid index scan");
			return CCost(0);
	}
}


//---------------------------------------------------------------------------
//	@function:
//		CCostModelGPDB::CostFilter
//
//	@doc:
//		Cost of filter
//
//---------------------------------------------------------------------------
CCost
J
Jesse Zhang 已提交
1923 1924 1925
CCostModelGPDB::CostFilter(CMemoryPool *mp, CExpressionHandle &exprhdl,
						   const CCostModelGPDB *pcmgpdb,
						   const SCostingInfo *pci)
E
Entong Shen 已提交
1926 1927 1928 1929 1930 1931
{
	GPOS_ASSERT(NULL != pcmgpdb);
	GPOS_ASSERT(NULL != pci);
	GPOS_ASSERT(COperator::EopPhysicalFilter == exprhdl.Pop()->Eopid());

	const DOUBLE dInput = pci->PdRows()[0];
1932
	const ULONG ulFilterCols = exprhdl.DeriveUsedColumns(1)->Size();
E
Entong Shen 已提交
1933

J
Jesse Zhang 已提交
1934 1935 1936 1937
	const CDouble dFilterColCostUnit =
		pcmgpdb->GetCostModelParams()
			->PcpLookup(CCostModelParamsGPDB::EcpFilterColCostUnit)
			->Get();
E
Entong Shen 已提交
1938 1939 1940 1941 1942
	GPOS_ASSERT(0 < dFilterColCostUnit);

	// filter cost is correlated with the input rows and the number of filter columns.
	CCost costLocal = CCost(dInput * ulFilterCols * dFilterColCostUnit);

1943
	costLocal = CCost(costLocal.Get() * pci->NumRebinds());
J
Jesse Zhang 已提交
1944 1945
	CCost costChild =
		CostChildren(mp, exprhdl, pci, pcmgpdb->GetCostModelParams());
E
Entong Shen 已提交
1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959

	return costLocal + costChild;
}


//---------------------------------------------------------------------------
//	@function:
//		CCostModelGPDB::Cost
//
//	@doc:
//		Main driver
//
//---------------------------------------------------------------------------
CCost
J
Jesse Zhang 已提交
1960 1961 1962
CCostModelGPDB::Cost(
	CExpressionHandle &exprhdl,	 // handle gives access to expression properties
	const SCostingInfo *pci) const
E
Entong Shen 已提交
1963 1964 1965
{
	GPOS_ASSERT(NULL != pci);

1966 1967
	COperator::EOperatorId op_id = exprhdl.Pop()->Eopid();
	if (FUnary(op_id))
E
Entong Shen 已提交
1968
	{
1969
		return CostUnary(m_mp, exprhdl, pci, m_cost_model_params);
E
Entong Shen 已提交
1970 1971 1972
	}

	FnCost *pfnc = NULL;
1973
	const ULONG size = GPOS_ARRAY_SIZE(m_rgcm);
E
Entong Shen 已提交
1974 1975

	// find the cost function corresponding to the given operator
1976
	for (ULONG ul = 0; pfnc == NULL && ul < size; ul++)
E
Entong Shen 已提交
1977
	{
1978
		if (op_id == m_rgcm[ul].m_eopid)
E
Entong Shen 已提交
1979 1980 1981 1982 1983 1984
		{
			pfnc = m_rgcm[ul].m_pfnc;
		}
	}
	GPOS_ASSERT(NULL != pfnc);

1985
	return pfnc(m_mp, exprhdl, this, pci);
E
Entong Shen 已提交
1986 1987 1988
}

// EOF