提交 819107b7 编写于 作者: B Bhuvnesh Chaudhary 提交者: Bhuvnesh

[#147774653] Implemented ValuesScan Operator in ORCA

This commit introduces a new operator for ValuesScan, earlier we
generated `UNION ALL` for cases where VALUES lists passed are all
constants, but now a new Operator CLogicalConstTable with an array of
const tuples will be generated

Once the plan is generated by ORCA, it will be translated to valuesscan
node in GPDB.

This enhancement helps significantly in improving the total run time for the queries
involving values scan in ORCA with const values.
Signed-off-by: NEkta Khanna <ekhanna@pivotal.io>
上级 1cb80806
......@@ -151,6 +151,7 @@ CTranslatorDXLToPlStmt::InitTranslators()
{EdxlopPhysicalDynamicBitmapTableScan, &gpopt::CTranslatorDXLToPlStmt::PplanBitmapTableScan},
{EdxlopPhysicalCTAS, &gpopt::CTranslatorDXLToPlStmt::PplanCTAS},
{EdxlopPhysicalPartitionSelector, &gpopt::CTranslatorDXLToPlStmt::PplanPartitionSelector},
{EdxlopPhysicalValuesScan, &gpopt::CTranslatorDXLToPlStmt::PplanValueScan},
};
const ULONG ulTranslators = GPOS_ARRAY_SIZE(rgTranslators);
......@@ -2001,6 +2002,77 @@ CTranslatorDXLToPlStmt::PrteFromDXLTVF
return prte;
}
// create a range table entry from a CDXLPhysicalValuesScan node
RangeTblEntry *
CTranslatorDXLToPlStmt::PrteFromDXLValueScan
(
const CDXLNode *pdxlnValueScan,
CDXLTranslateContext *pdxltrctxOut,
CDXLTranslateContextBaseTable *pdxltrctxbt,
Plan *pplanParent
)
{
CDXLPhysicalValuesScan *pdxlop = CDXLPhysicalValuesScan::PdxlopConvert(pdxlnValueScan->Pdxlop());
RangeTblEntry *prte = MakeNode(RangeTblEntry);
prte->relid = InvalidOid;
prte->subquery = NULL;
prte->rtekind = RTE_VALUES;
prte->inh = false; /* never true for values RTEs */
prte->inFromCl = true;
prte->requiredPerms = 0;
prte->checkAsUser = InvalidOid;
Alias *palias = MakeNode(Alias);
palias->colnames = NIL;
// get value alias
palias->aliasname = CTranslatorUtils::SzFromWsz(pdxlop->PstrOpName()->Wsz());
// project list
CDXLNode *pdxlnPrL = (*pdxlnValueScan)[EdxltsIndexProjList];
// get column names
const ULONG ulCols = pdxlnPrL->UlArity();
for (ULONG ul = 0; ul < ulCols; ul++)
{
CDXLNode *pdxlnPrElem = (*pdxlnPrL)[ul];
CDXLScalarProjElem *pdxlopPrEl = CDXLScalarProjElem::PdxlopConvert(pdxlnPrElem->Pdxlop());
CHAR *szColName = CTranslatorUtils::SzFromWsz(pdxlopPrEl->PmdnameAlias()->Pstr()->Wsz());
Value *pvalColName = gpdb::PvalMakeString(szColName);
palias->colnames = gpdb::PlAppendElement(palias->colnames, pvalColName);
// save mapping col id -> index in translate context
(void) pdxltrctxbt->FInsertMapping(pdxlopPrEl->UlId(), ul+1 /*iAttno*/);
}
CMappingColIdVarPlStmt mapcidvarplstmt = CMappingColIdVarPlStmt(m_pmp, pdxltrctxbt, NULL, pdxltrctxOut, m_pctxdxltoplstmt, pplanParent);
const ULONG ulChildren = pdxlnValueScan->UlArity();
List *values_lists = NIL;
for (ULONG ulValue = EdxlValIndexConstStart; ulValue < ulChildren; ulValue++)
{
CDXLNode *pdxlnValueList = (*pdxlnValueScan)[ulValue];
const ULONG ulCols = pdxlnValueList->UlArity();
List *value = NIL;
for (ULONG ulCol = 0; ulCol < ulCols ; ulCol++)
{
Expr *pconst = m_pdxlsctranslator->PexprFromDXLNodeScalar((*pdxlnValueList)[ulCol], &mapcidvarplstmt);
value = gpdb::PlAppendElement(value, pconst);
}
values_lists = gpdb::PlAppendElement(values_lists, value);
}
prte->values_lists = (List *) values_lists;
prte->eref = palias;
return prte;
}
//---------------------------------------------------------------------------
// @function:
// CTranslatorDXLToPlStmt::PnljFromDXLNLJ
......@@ -6115,4 +6187,69 @@ CTranslatorDXLToPlStmt::PplanBitmapIndexProbe
return pplan;
}
// translates a DXL Value Scan node into a GPDB Value scan node
Plan *
CTranslatorDXLToPlStmt::PplanValueScan
(
const CDXLNode *pdxlnValueScan,
CDXLTranslateContext *pdxltrctxOut,
Plan *pplanParent,
DrgPdxltrctx *pdrgpdxltrctxPrevSiblings
)
{
// translation context for column mappings
CDXLTranslateContextBaseTable dxltrctxbt(m_pmp);
// we will add the new range table entry as the last element of the range table
Index iRel = gpdb::UlListLength(m_pctxdxltoplstmt->PlPrte()) + 1;
dxltrctxbt.SetIdx(iRel);
// create value scan node
ValuesScan *pvaluescan = MakeNode(ValuesScan);
pvaluescan->scan.scanrelid = iRel;
Plan *pplan = &(pvaluescan->scan.plan);
RangeTblEntry *prte = PrteFromDXLValueScan(pdxlnValueScan, pdxltrctxOut, &dxltrctxbt, pplan);
GPOS_ASSERT(NULL != prte);
pvaluescan->values_lists = (List *)gpdb::PvCopyObject(prte->values_lists);
m_pctxdxltoplstmt->AddRTE(prte);
pplan->plan_node_id = m_pctxdxltoplstmt->UlNextPlanId();
pplan->plan_parent_node_id = IPlanId(pplanParent);
pplan->nMotionNodes = 0;
// translate operator costs
TranslatePlanCosts
(
CDXLPhysicalProperties::PdxlpropConvert(pdxlnValueScan->Pdxlprop())->Pdxlopcost(),
&(pplan->startup_cost),
&(pplan->total_cost),
&(pplan->plan_rows),
&(pplan->plan_width)
);
// a table scan node must have at least 2 children: projection list and at least 1 value list
GPOS_ASSERT(2 <= pdxlnValueScan->UlArity());
CDXLNode *pdxlnPrL = (*pdxlnValueScan)[EdxltsIndexProjList];
// translate proj list
List *plTargetList = PlTargetListFromProjList
(
pdxlnPrL,
&dxltrctxbt,
NULL,
pdxltrctxOut,
pplan
);
pplan->targetlist = plTargetList;
return (Plan *) pvaluescan;
}
// EOF
......@@ -3012,23 +3012,29 @@ CTranslatorQueryToDXL::PdxlnFromValues
{
List *plTuples = prte->values_lists;
GPOS_ASSERT(NULL != plTuples);
const ULONG ulValues = gpdb::UlListLength(plTuples);
GPOS_ASSERT(0 < ulValues);
// children of the UNION ALL
DrgPdxln *pdrgpdxln = GPOS_NEW(m_pmp) DrgPdxln(m_pmp);
// array of datum arrays for Values
DrgPdrgPdxldatum *pdrgpdrgpdxldatumValues = GPOS_NEW(m_pmp) DrgPdrgPdxldatum(m_pmp);
// array of input colid arrays
DrgPdrgPul *pdrgpdrgulInputColIds = GPOS_NEW(m_pmp) DrgPdrgPul(m_pmp);
// array of column descriptor for the UNION ALL operator
DrgPdxlcd *pdrgpdxlcd = GPOS_NEW(m_pmp) DrgPdxlcd(m_pmp);
// translate the tuples in the value scan
ULONG ulTuplePos = 0;
ListCell *plcTuple = NULL;
GPOS_ASSERT(NULL != prte->eref);
// flag for checking value list has only constants. For all constants --> VALUESCAN operator else retain UnionAll
BOOL fAllConstant = true;
ForEach (plcTuple, plTuples)
{
List *plTuple = (List *) lfirst(plcTuple);
......@@ -3039,24 +3045,24 @@ CTranslatorQueryToDXL::PdxlnFromValues
// array of project elements (for expression elements)
DrgPdxln *pdrgpdxlnPrEl = GPOS_NEW(m_pmp) DrgPdxln(m_pmp);
// array of datum (for datum constant values)
DrgPdxldatum *pdrgpdxldatum = GPOS_NEW(m_pmp) DrgPdxldatum(m_pmp);
// array of column descriptors for the CTG containing the datum array
DrgPdxlcd *pdrgpdxlcdCTG = GPOS_NEW(m_pmp) DrgPdxlcd(m_pmp);
List *plColnames = prte->eref->colnames;
GPOS_ASSERT(NULL != plColnames);
GPOS_ASSERT(gpdb::UlListLength(plTuple) == gpdb::UlListLength(plColnames));
// translate the columns
ULONG ulColPos = 0;
ListCell *plcColumn = NULL;
ForEach (plcColumn, plTuple)
{
Expr *pexpr = (Expr *) lfirst(plcColumn);
CHAR *szColName = (CHAR *) strVal(gpdb::PvListNth(plColnames, ulColPos));
ULONG ulColId = ULONG_MAX;
if (IsA(pexpr, Const))
......@@ -3065,13 +3071,13 @@ CTranslatorQueryToDXL::PdxlnFromValues
Const *pconst = (Const *) pexpr;
CDXLDatum *pdxldatum = m_psctranslator->Pdxldatum(pconst);
pdrgpdxldatum->Append(pdxldatum);
ulColId = m_pidgtorCol->UlNextId();
CWStringDynamic *pstrAlias = CDXLUtils::PstrFromSz(m_pmp, szColName);
CMDName *pmdname = GPOS_NEW(m_pmp) CMDName(m_pmp, pstrAlias);
GPOS_DELETE(pstrAlias);
CDXLColDescr *pdxlcd = GPOS_NEW(m_pmp) CDXLColDescr
(
m_pmp,
......@@ -3091,17 +3097,18 @@ CTranslatorQueryToDXL::PdxlnFromValues
}
else
{
fAllConstant = false;
// translate the scalar expression into a project element
CDXLNode *pdxlnPrE = PdxlnPrEFromGPDBExpr(pexpr, szColName, true /* fInsistNewColIds */ );
pdrgpdxlnPrEl->Append(pdxlnPrE);
ulColId = CDXLScalarProjElem::PdxlopConvert(pdxlnPrE->Pdxlop())->UlId();
if (0 == ulTuplePos)
{
CWStringDynamic *pstrAlias = CDXLUtils::PstrFromSz(m_pmp, szColName);
CMDName *pmdname = GPOS_NEW(m_pmp) CMDName(m_pmp, pstrAlias);
GPOS_DELETE(pstrAlias);
CDXLColDescr *pdxlcd = GPOS_NEW(m_pmp) CDXLColDescr
(
m_pmp,
......@@ -3113,52 +3120,74 @@ CTranslatorQueryToDXL::PdxlnFromValues
);
pdrgpdxlcd->Append(pdxlcd);
}
}
}
GPOS_ASSERT(ULONG_MAX != ulColId);
pdrgpulColIds->Append(GPOS_NEW(m_pmp) ULONG(ulColId));
ulColPos++;
}
pdrgpdxln->Append(PdxlnFromColumnValues(pdrgpdxldatum, pdrgpdxlcdCTG, pdrgpdxlnPrEl));
if (fAllConstant)
{
pdrgpdxldatum->AddRef();
pdrgpdrgpdxldatumValues->Append(pdrgpdxldatum);
}
pdrgpdrgulInputColIds->Append(pdrgpulColIds);
ulTuplePos++;
// cleanup
pdrgpdxldatum->Release();
pdrgpdxlnPrEl->Release();
pdrgpdxlcdCTG->Release();
}
GPOS_ASSERT(NULL != pdrgpdxlcd);
if (1 < ulValues)
if (fAllConstant)
{
// create Const Table DXL Node
CDXLLogicalConstTable *pdxlop = GPOS_NEW(m_pmp) CDXLLogicalConstTable(m_pmp, pdrgpdxlcd, pdrgpdrgpdxldatumValues);
CDXLNode *pdxln = GPOS_NEW(m_pmp) CDXLNode(m_pmp, pdxlop);
// make note of new columns from Value Scan
m_pmapvarcolid->LoadColumns(m_ulQueryLevel, ulRTIndex, pdxlop->Pdrgpdxlcd());
// cleanup
pdrgpdxln->Release();
pdrgpdrgulInputColIds->Release();
return pdxln;
}
else if (1 < ulValues)
{
// create a UNION ALL operator
CDXLLogicalSetOp *pdxlop = GPOS_NEW(m_pmp) CDXLLogicalSetOp(m_pmp, EdxlsetopUnionAll, pdrgpdxlcd, pdrgpdrgulInputColIds, false);
CDXLNode *pdxln = GPOS_NEW(m_pmp) CDXLNode(m_pmp, pdxlop, pdrgpdxln);
// make note of new columns from UNION ALL
m_pmapvarcolid->LoadColumns(m_ulQueryLevel, ulRTIndex, pdxlop->Pdrgpdxlcd());
pdrgpdrgpdxldatumValues->Release();
return pdxln;
}
GPOS_ASSERT(1 == pdrgpdxln->UlLength());
CDXLNode *pdxln = (*pdrgpdxln)[0];
pdxln->AddRef();
// make note of new columns
m_pmapvarcolid->LoadColumns(m_ulQueryLevel, ulRTIndex, pdrgpdxlcd);
//cleanup
pdrgpdrgpdxldatumValues->Release();
pdrgpdxln->Release();
pdrgpdrgulInputColIds->Release();
pdrgpdxlcd->Release();
return pdxln;
}
......
......@@ -581,6 +581,15 @@ namespace gpdxl
DrgPdxltrctx *pdrgpdxltrctxPrevSiblings // translation contexts of previous siblings
);
// translate a DXL Value Scan into GPDB Value Scan
Plan *PplanValueScan
(
const CDXLNode *pdxlnValueScan,
CDXLTranslateContext *pdxltrctxOut,
Plan *pplanParent,
DrgPdxltrctx *pdrgpdxltrctxPrevSiblings
);
// translate DXL filter list into GPDB filter list
List *PlFilterList
(
......@@ -600,6 +609,15 @@ namespace gpdxl
Plan *pplanParent
);
// create range table entry from a CDXLPhysicalValueScan node
RangeTblEntry *PrteFromDXLValueScan
(
const CDXLNode *pdxlnValueScan,
CDXLTranslateContext *pdxltrctxOut,
CDXLTranslateContextBaseTable *pdxltrctxbt,
Plan *pplanParent
);
// create range table entry from a table descriptor
RangeTblEntry *PrteFromTblDescr
(
......@@ -736,7 +754,7 @@ namespace gpdxl
// check if the given operator is a DML operator on a distributed table
BOOL FTargetTableDistributed(CDXLOperator *pdxlop);
// add a target entry for the given colid to the given target list
ULONG UlAddTargetEntryForColId
(
......
......@@ -27,17 +27,13 @@ explain select * from bfv_tab1, (values(147, 'RFAAAA'), (931, 'VJAAAA')) as v (i
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..2.00 rows=1 width=256)
-> Nested Loop (cost=0.00..2.00 rows=1 width=256)
Join Filter: true
-> Append (cost=0.00..0.00 rows=1 width=12)
-> Result (cost=0.00..0.00 rows=1 width=12)
-> Result (cost=0.00..0.00 rows=1 width=12)
-> Result (cost=0.00..0.00 rows=1 width=12)
-> Result (cost=0.00..0.00 rows=1 width=12)
-> Index Scan using bfv_tab1_idx1 on bfv_tab1 (cost=0.00..2.00 rows=1 width=244)
Index Cond: bfv_tab1.unique1 = "outer".column1
Filter: bfv_tab1.stringu1::text = "outer".column2
Settings: optimizer=on
Optimizer status: PQO version 1.621
(13 rows)
-> Result (cost=0.00..0.00 rows=2 width=12)
-> Values Scan on "Values" (cost=0.00..0.00 rows=2 width=12)
-> Index Scan using bfv_tab1_idx1 on bfv_tab1 (cost=0.00..6.00 rows=1 width=244)
Index Cond: bfv_tab1.unique1 = "Values".column1
Filter: bfv_tab1.stringu1::text = "Values".column2
Optimizer status: PQO version 2.35.1
(9 rows)
set gp_enable_relsize_collection=on;
explain select * from bfv_tab1, (values(147, 'RFAAAA'), (931, 'VJAAAA')) as v (i, j)
......@@ -47,17 +43,14 @@ explain select * from bfv_tab1, (values(147, 'RFAAAA'), (931, 'VJAAAA')) as v (i
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..2.00 rows=1 width=256)
-> Nested Loop (cost=0.00..2.00 rows=1 width=256)
Join Filter: true
-> Append (cost=0.00..0.00 rows=1 width=12)
-> Result (cost=0.00..0.00 rows=1 width=12)
-> Result (cost=0.00..0.00 rows=1 width=12)
-> Result (cost=0.00..0.00 rows=1 width=12)
-> Result (cost=0.00..0.00 rows=1 width=12)
-> Index Scan using bfv_tab1_idx1 on bfv_tab1 (cost=0.00..2.00 rows=1 width=244)
Index Cond: bfv_tab1.unique1 = (147)
Filter: bfv_tab1.stringu1::text = ('RFAAAA'::text)
Settings: gp_enable_relsize_collection=on; optimizer=on
Optimizer status: PQO version 1.667
(13 rows)
-> Result (cost=0.00..0.00 rows=2 width=12)
-> Values Scan on "Values" (cost=0.00..0.00 rows=2 width=12)
-> Index Scan using bfv_tab1_idx1 on bfv_tab1 (cost=0.00..6.00 rows=1 width=244)
Index Cond: bfv_tab1.unique1 = "Values".column1
Filter: bfv_tab1.stringu1::text = "Values".column2
Settings: gp_enable_relsize_collection=on
Optimizer status: PQO version 2.35.1
(10 rows)
reset gp_enable_relsize_collection;
--start_ignore
......
......@@ -94,17 +94,13 @@ WHERE mpp22263.unique1 = v.i and mpp22263.stringu1 = v.j;
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..2.00 rows=1 width=256)
-> Nested Loop (cost=0.00..2.00 rows=1 width=256)
Join Filter: true
-> Append (cost=0.00..0.00 rows=1 width=12)
-> Result (cost=0.00..0.00 rows=1 width=12)
-> Result (cost=0.00..0.00 rows=1 width=12)
-> Result (cost=0.00..0.00 rows=1 width=12)
-> Result (cost=0.00..0.00 rows=1 width=12)
-> Result (cost=0.00..0.00 rows=1 width=12)
-> Values Scan on "Values" (cost=0.00..0.00 rows=1 width=12)
-> Index Scan using mpp22263_idx1 on mpp22263 (cost=0.00..2.00 rows=1 width=244)
Index Cond: mpp22263.unique1 = "outer".column1
Filter: mpp22263.stringu1::text = "outer".column2
Settings: optimizer=on
Optimizer status: PQO version 2.7.0
(13 rows)
Index Cond: mpp22263.unique1 = "Values".column1
Filter: mpp22263.stringu1::text = "Values".column2
Optimizer status: PQO version 2.35.1
(9 rows)
-- atmsort.pm masks out differences in the Filter line, so just memorizing
-- the output of the above EXPLAIN isn't enough to catch a faulty Filter line.
......@@ -115,10 +111,10 @@ select * from mpp22263, (values(147, 'RFAAAA'), (931, 'VJAAAA')) as v (i, j)
WHERE mpp22263.unique1 = v.i and mpp22263.stringu1 = v.j;
$$) as et
WHERE et like '%Filter: %';
et
-----------------------------------------------------------------
et
------------------------------------------------------------------
Join Filter: true
Filter: mpp22263.stringu1::text = "outer".column2
Filter: mpp22263.stringu1::text = "Values".column2
(2 rows)
--
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册