提交 aa6754d1 编写于 作者: B Bhuvnesh Chaudhary 提交者: Bhuvnesh

Preprocess query to ensure Scalar Ident is on LHS

CScalarCmp and CScalarIsDistinctFrom operator must have the CScalarIdent operator
on the LHS and CScalarConst operator on the RHS. If it's
not the case the predicate is assigned a default selectivity
due to which the Cardinality estimate is impacted.

This patch fixes the issue by reordering the children of
CScalarCmp and CScalarIsDistinctFrom operator if CScalarIdent operator is on
the RHS on CScalarConst is on LHS. Also the Comparision operator is changed
due to the reordering of the arguments, if supported. If the
corresponding comparision operator does not exist or supported, the
children are not reordered.
Only cases of the type CONST = VAR are handled by this patch.
Signed-off-by: NOmer Arap <oarap@pivotal.io>
上级 350ca788
......@@ -301,9 +301,9 @@ SELECT * FROM my_tq_agg_small tq WHERE 110 >= tq.ets;
</dxl:OutputColumns>
<dxl:CTEList/>
<dxl:LogicalSelect>
<dxl:Comparison ComparisonOperator="&gt;=" OperatorMdid="0.525.1.0">
<dxl:ConstValue TypeMdid="0.23.1.0" IsNull="false" IsByValue="true" Value="110"/>
<dxl:Comparison ComparisonOperator="&lt;=" OperatorMdid="0.523.1.0">
<dxl:Ident ColId="1" ColName="ets" TypeMdid="0.23.1.0"/>
<dxl:ConstValue TypeMdid="0.23.1.0" IsNull="false" IsByValue="true" Value="110"/>
</dxl:Comparison>
<dxl:LogicalGet>
<dxl:TableDescriptor Mdid="0.31763855.1.1" TableName="my_tq_agg_small">
......@@ -372,9 +372,9 @@ SELECT * FROM my_tq_agg_small tq WHERE 110 >= tq.ets;
</dxl:ProjList>
<dxl:Filter/>
<dxl:RecheckCond>
<dxl:Comparison ComparisonOperator="&gt;=" OperatorMdid="0.525.1.0">
<dxl:ConstValue TypeMdid="0.23.1.0" IsNull="false" IsByValue="true" Value="110"/>
<dxl:Comparison ComparisonOperator="&lt;=" OperatorMdid="0.523.1.0">
<dxl:Ident ColId="0" ColName="ets" TypeMdid="0.23.1.0"/>
<dxl:ConstValue TypeMdid="0.23.1.0" IsNull="false" IsByValue="true" Value="110"/>
</dxl:Comparison>
</dxl:RecheckCond>
<dxl:BitmapIndexProbe>
......
......@@ -267,7 +267,7 @@
<dxl:Plan Id="0" SpaceSize="1">
<dxl:GatherMotion InputSegments="0,1" OutputSegments="-1">
<dxl:Properties>
<dxl:Cost StartupCost="0" TotalCost="2.429688" Rows="40.000000" Width="4"/>
<dxl:Cost StartupCost="0" TotalCost="2.371094" Rows="30.000000" Width="4"/>
</dxl:Properties>
<dxl:ProjList>
<dxl:ProjElem ColId="0" Alias="a">
......@@ -278,7 +278,7 @@
<dxl:SortingColumnList/>
<dxl:TableScan>
<dxl:Properties>
<dxl:Cost StartupCost="0" TotalCost="1.351562" Rows="40.000000" Width="4"/>
<dxl:Cost StartupCost="0" TotalCost="1.312500" Rows="30.000000" Width="4"/>
</dxl:Properties>
<dxl:ProjList>
<dxl:ProjElem ColId="0" Alias="a">
......@@ -288,8 +288,8 @@
<dxl:Filter>
<dxl:Not>
<dxl:IsDistinctFrom OperatorMdid="0.96.1.0">
<dxl:ConstValue TypeMdid="0.23.1.0" IsNull="true" IsByValue="true"/>
<dxl:Ident ColId="0" ColName="a" TypeMdid="0.23.1.0"/>
<dxl:ConstValue TypeMdid="0.23.1.0" IsNull="true" IsByValue="true"/>
</dxl:IsDistinctFrom>
</dxl:Not>
</dxl:Filter>
......
......@@ -40,7 +40,6 @@ add_library(gpopt
include/gpopt/operators/CPhysicalCorrelatedLeftSemiNLJoin.h
include/gpopt/operators/CPhysicalCorrelatedNotInLeftAntiSemiNLJoin.h
include/gpopt/operators/CPhysicalLeftAntiSemiNLJoinNotIn.h
include/gpopt/operators/CScalarIsDistinctFrom.h
include/gpopt/operators/CScalarSubqueryExists.h
include/gpopt/operators/CScalarSubqueryNotExists.h
include/gpopt/operators/ops.h
......@@ -897,6 +896,8 @@ add_library(gpopt
src/operators/CStrictHashedDistributions.cpp
include/gpopt/operators/CPhysicalParallelUnionAll.h
src/operators/CPhysicalParallelUnionAll.cpp
include/gpopt/operators/CScalarIsDistinctFrom.h
src/operators/CScalarIsDistinctFrom.cpp
include/gpopt/base/CDistributionSpecStrictHashed.h
src/base/CDistributionSpecStrictHashed.cpp
include/gpopt/base/CDatumSortedSet.h
......
......@@ -194,6 +194,10 @@ namespace gpopt
static BOOL
FConvert2InIsConvertable(CExpression *pexpr, CScalarBoolOp::EBoolOperator eboolop);
// reorder the scalar cmp children to ensure that left child is Scalar Ident and right Child is Scalar Const
static CExpression *
PexprReorderScalarCmpChildren(IMemoryPool *pmp, CExpression *pexpr);
// private ctor
CExpressionPreprocessor();
......
......@@ -149,6 +149,20 @@ namespace gpopt
return dynamic_cast<CScalarCmp*>(pop);
}
// get commuted scalar comparision operator
virtual
CScalarCmp *PopCommutedOp(IMemoryPool *pmp, COperator *pop);
// get the string representation of a metadata object
static
CWStringConst *Pstr(IMemoryPool *pmp, CMDAccessor *pmda, IMDId *pmdid);
// get metadata id of the commuted operator
static
IMDId *PmdidCommuteOp(CMDAccessor *pmda, COperator *pop);
}; // class CScalarCmp
......
......@@ -71,35 +71,15 @@ namespace gpopt
}
virtual
BOOL FMatch
(
COperator *pop
)
const
{
if (pop->Eopid() == Eopid())
{
CScalarIsDistinctFrom *popIDF = CScalarIsDistinctFrom::PopConvert(pop);
// match if operator mdids are identical
return PmdidOp()->FEquals(popIDF->PmdidOp());
}
return false;
}
BOOL FMatch(COperator *pop) const;
// conversion function
static
CScalarIsDistinctFrom *PopConvert
(
COperator *pop
)
{
GPOS_ASSERT(NULL != pop);
GPOS_ASSERT(EopScalarIsDistinctFrom == pop->Eopid());
CScalarIsDistinctFrom *PopConvert(COperator *pop);
return reinterpret_cast<CScalarIsDistinctFrom*>(pop);
}
// get commuted scalar IDF operator
virtual
CScalarIsDistinctFrom *PopCommutedOp(IMemoryPool *pmp, COperator *pop);
}; // class CScalarIsDistinctFrom
......
......@@ -2206,6 +2206,51 @@ CExpressionPreprocessor::PexprPruneProjListProjectOrGbAgg
return pexprResult;
}
// reorder the child for scalar comparision to ensure that left child is a scalar ident and right child is a scalar const if not
CExpression *
CExpressionPreprocessor::PexprReorderScalarCmpChildren
(
IMemoryPool *pmp,
CExpression *pexpr
)
{
GPOS_ASSERT(NULL != pexpr);
COperator *pop = pexpr->Pop();
if (CUtils::FScalarCmp(pexpr) || COperator::EopScalarIsDistinctFrom == pexpr->Pop()->Eopid())
{
GPOS_ASSERT(2 == pexpr->UlArity());
CExpression *pexprLeft = (*pexpr)[0];
CExpression *pexprRight = (*pexpr)[1];
if (CUtils::FScalarConst(pexprLeft) && CUtils::FScalarIdent(pexprRight))
{
CScalarCmp *popScalarCmpCommuted = (dynamic_cast<CScalarCmp *>(pop))->PopCommutedOp(pmp, pop);
if (popScalarCmpCommuted)
{
pexprLeft->AddRef();
pexprRight->AddRef();
return GPOS_NEW(pmp) CExpression(pmp, popScalarCmpCommuted, pexprRight, pexprLeft);
}
}
}
// process children
DrgPexpr *pdrgpexpr = GPOS_NEW(pmp) DrgPexpr(pmp);
const ULONG ulChildren = pexpr->UlArity();
for (ULONG ul = 0; ul < ulChildren; ul++)
{
CExpression *pexprChild = PexprReorderScalarCmpChildren(pmp, (*pexpr)[ul]);
pdrgpexpr->Append(pexprChild);
}
pop->AddRef();
return GPOS_NEW(pmp) CExpression(pmp, pop, pdrgpexpr);
}
//---------------------------------------------------------------------------
// @function:
// CExpressionPreprocessor::PexprPreprocess
......@@ -2353,7 +2398,12 @@ CExpressionPreprocessor::PexprPreprocess
GPOS_CHECK_ABORT;
pexprCollapsedProjects->Release();
return pexprSubquery;
// (24) reorder the children of scalar cmp operator to ensure that left child is scalar ident and right child is scalar const
CExpression *pexrReorderedScalarCmpChildren = PexprReorderScalarCmpChildren(pmp, pexprSubquery);
GPOS_CHECK_ABORT;
pexprSubquery->Release();
return pexrReorderedScalarCmpChildren;
}
// EOF
......@@ -2167,20 +2167,15 @@ CPredicateUtils::PexprIndexLookupKeyOnRight
if (CUtils::FScalarCmp(pexprScalar))
{
CScalarCmp *popScCmp = CScalarCmp::PopConvert(pexprScalar->Pop());
const IMDScalarOp *pmdscalarop = pmda->Pmdscop(popScCmp->PmdidOp());
IMDId *pmdidscalaropCommute = pmdscalarop->PmdidOpCommute();
CScalarCmp *popScCmpCommute = popScCmp->PopCommutedOp(pmp, pexprScalar->Pop());
if (pmdidscalaropCommute->FValid())
if (popScCmpCommute)
{
// build new comparison after switching arguments and using commutative comparison operator
pexprRight->AddRef();
pexprLeft->AddRef();
pmdidscalaropCommute->AddRef();
const CWStringConst *pstr = pmda->Pmdscop(pmdidscalaropCommute)->Mdname().Pstr();
CExpression *pexprCommuted = CUtils::PexprScalarCmp(pmp, pexprRight, pexprLeft, *pstr, pmdidscalaropCommute);
CExpression *pexprCommuted = GPOS_NEW(pmp) CExpression(pmp, popScCmpCommute, pexprRight, pexprLeft);
CExpression *pexprIndexCond = PexprIndexLookupKeyOnLeft(pmp, pmda, pexprCommuted, pmdindex, pdrgpcrIndex, pcrsOuterRefs);
pexprCommuted->Release();
......
......@@ -21,6 +21,7 @@
#include "gpopt/operators/CExpressionHandle.h"
#include "naucrates/md/IMDTypeBool.h"
#include "naucrates/md/IMDScalarOp.h"
using namespace gpopt;
using namespace gpmd;
......@@ -179,6 +180,51 @@ CScalarCmp::Eber
return EberUnknown;
}
// get metadata id of the commuted operator
IMDId *
CScalarCmp::PmdidCommuteOp
(
CMDAccessor *pmda,
COperator *pop
)
{
CScalarCmp *popScalarCmp = dynamic_cast<CScalarCmp *>(pop);
const IMDScalarOp *pmdScalarCmpOp = pmda->Pmdscop(popScalarCmp->PmdidOp());
IMDId *pmdidScalarCmpCommute = pmdScalarCmpOp->PmdidOpCommute();
return pmdidScalarCmpCommute;
}
// get the string representation of a metadata object
CWStringConst *
CScalarCmp::Pstr
(
IMemoryPool *pmp,
CMDAccessor *pmda,
IMDId *pmdid
)
{
pmdid->AddRef();
return GPOS_NEW(pmp) CWStringConst(pmp, (pmda->Pmdscop(pmdid)->Mdname().Pstr())->Wsz());
}
// get commuted scalar comparision operator
CScalarCmp *
CScalarCmp::PopCommutedOp
(
IMemoryPool *pmp,
COperator *pop
)
{
CMDAccessor *pmda = COptCtxt::PoctxtFromTLS()->Pmda();
IMDId *pmdid = PmdidCommuteOp(pmda, pop);
if (NULL != pmdid && pmdid->FValid())
{
return GPOS_NEW(pmp) CScalarCmp(pmp, pmdid, Pstr(pmp, pmda, pmdid), CUtils::Ecmpt(pmdid));
}
return NULL;
}
//---------------------------------------------------------------------------
// @function:
......
//---------------------------------------------------------------------------
// Greenplum Database
// Copyright (C) 2009 Greenplum, Inc.
//
// @filename:
// CScalarIsDistinctFrom.cpp
//
// @doc:
// Implementation of scalar IDF comparison operator
//---------------------------------------------------------------------------
#include "gpopt/base/COptCtxt.h"
#include "gpopt/mdcache/CMDAccessorUtils.h"
#include "gpopt/operators/CScalarIsDistinctFrom.h"
using namespace gpopt;
using namespace gpmd;
// conversion function
CScalarIsDistinctFrom *
CScalarIsDistinctFrom::PopConvert
(
COperator *pop
)
{
GPOS_ASSERT(NULL != pop);
GPOS_ASSERT(EopScalarIsDistinctFrom == pop->Eopid());
return reinterpret_cast<CScalarIsDistinctFrom*>(pop);
}
BOOL
CScalarIsDistinctFrom::FMatch
(
COperator *pop
)
const
{
if (pop->Eopid() == Eopid())
{
CScalarIsDistinctFrom *popIDF = CScalarIsDistinctFrom::PopConvert(pop);
// match if operator mdids are identical
return PmdidOp()->FEquals(popIDF->PmdidOp());
}
return false;
}
// get commuted scalar IDF operator
CScalarIsDistinctFrom *
CScalarIsDistinctFrom::PopCommutedOp
(
IMemoryPool *pmp,
COperator *pop
)
{
CMDAccessor *pmda = COptCtxt::PoctxtFromTLS()->Pmda();
IMDId *pmdid = PmdidCommuteOp(pmda, pop);
if (NULL != pmdid && pmdid->FValid())
{
return GPOS_NEW(pmp) CScalarIsDistinctFrom(pmp, pmdid, Pstr(pmp, pmda, pmdid));
}
return NULL;
}
// EOF
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册