From aa6754d1ba31e16e985270cb00f02ed0731c513c Mon Sep 17 00:00:00 2001 From: Bhuvnesh Chaudhary Date: Thu, 29 Jun 2017 18:48:38 -0700 Subject: [PATCH] Preprocess query to ensure Scalar Ident is on LHS CScalarCmp and CScalarIsDistinctFrom operator must have the CScalarIdent operator on the LHS and CScalarConst operator on the RHS. If it's not the case the predicate is assigned a default selectivity due to which the Cardinality estimate is impacted. This patch fixes the issue by reordering the children of CScalarCmp and CScalarIsDistinctFrom operator if CScalarIdent operator is on the RHS on CScalarConst is on LHS. Also the Comparision operator is changed due to the reordering of the arguments, if supported. If the corresponding comparision operator does not exist or supported, the children are not reordered. Only cases of the type CONST = VAR are handled by this patch. Signed-off-by: Omer Arap --- .../BitmapTableScan-ColumnOnRightSide.mdp | 8 +-- data/dxl/minidump/NullConstant-INDF-Col.mdp | 6 +- libgpopt/CMakeLists.txt | 3 +- .../gpopt/operators/CExpressionPreprocessor.h | 4 ++ libgpopt/include/gpopt/operators/CScalarCmp.h | 14 ++++ .../gpopt/operators/CScalarIsDistinctFrom.h | 30 ++------ .../src/operators/CExpressionPreprocessor.cpp | 52 +++++++++++++- libgpopt/src/operators/CPredicateUtils.cpp | 11 +-- libgpopt/src/operators/CScalarCmp.cpp | 46 ++++++++++++ .../src/operators/CScalarIsDistinctFrom.cpp | 72 +++++++++++++++++++ 10 files changed, 204 insertions(+), 42 deletions(-) create mode 100644 libgpopt/src/operators/CScalarIsDistinctFrom.cpp diff --git a/data/dxl/minidump/BitmapTableScan-ColumnOnRightSide.mdp b/data/dxl/minidump/BitmapTableScan-ColumnOnRightSide.mdp index d0bc1f5fdd..93cee2b9bd 100644 --- a/data/dxl/minidump/BitmapTableScan-ColumnOnRightSide.mdp +++ b/data/dxl/minidump/BitmapTableScan-ColumnOnRightSide.mdp @@ -301,9 +301,9 @@ SELECT * FROM my_tq_agg_small tq WHERE 110 >= tq.ets; - - + + @@ -372,9 +372,9 @@ SELECT * FROM my_tq_agg_small tq WHERE 110 >= tq.ets; - - + + diff --git a/data/dxl/minidump/NullConstant-INDF-Col.mdp b/data/dxl/minidump/NullConstant-INDF-Col.mdp index 4e21c29a91..74ccda45fb 100644 --- a/data/dxl/minidump/NullConstant-INDF-Col.mdp +++ b/data/dxl/minidump/NullConstant-INDF-Col.mdp @@ -267,7 +267,7 @@ - + @@ -278,7 +278,7 @@ - + @@ -288,8 +288,8 @@ - + diff --git a/libgpopt/CMakeLists.txt b/libgpopt/CMakeLists.txt index e190881609..41d49038c3 100644 --- a/libgpopt/CMakeLists.txt +++ b/libgpopt/CMakeLists.txt @@ -40,7 +40,6 @@ add_library(gpopt include/gpopt/operators/CPhysicalCorrelatedLeftSemiNLJoin.h include/gpopt/operators/CPhysicalCorrelatedNotInLeftAntiSemiNLJoin.h include/gpopt/operators/CPhysicalLeftAntiSemiNLJoinNotIn.h - include/gpopt/operators/CScalarIsDistinctFrom.h include/gpopt/operators/CScalarSubqueryExists.h include/gpopt/operators/CScalarSubqueryNotExists.h include/gpopt/operators/ops.h @@ -897,6 +896,8 @@ add_library(gpopt src/operators/CStrictHashedDistributions.cpp include/gpopt/operators/CPhysicalParallelUnionAll.h src/operators/CPhysicalParallelUnionAll.cpp + include/gpopt/operators/CScalarIsDistinctFrom.h + src/operators/CScalarIsDistinctFrom.cpp include/gpopt/base/CDistributionSpecStrictHashed.h src/base/CDistributionSpecStrictHashed.cpp include/gpopt/base/CDatumSortedSet.h diff --git a/libgpopt/include/gpopt/operators/CExpressionPreprocessor.h b/libgpopt/include/gpopt/operators/CExpressionPreprocessor.h index 8dae559f95..7cc24a019a 100644 --- a/libgpopt/include/gpopt/operators/CExpressionPreprocessor.h +++ b/libgpopt/include/gpopt/operators/CExpressionPreprocessor.h @@ -194,6 +194,10 @@ namespace gpopt static BOOL FConvert2InIsConvertable(CExpression *pexpr, CScalarBoolOp::EBoolOperator eboolop); + // reorder the scalar cmp children to ensure that left child is Scalar Ident and right Child is Scalar Const + static CExpression * + PexprReorderScalarCmpChildren(IMemoryPool *pmp, CExpression *pexpr); + // private ctor CExpressionPreprocessor(); diff --git a/libgpopt/include/gpopt/operators/CScalarCmp.h b/libgpopt/include/gpopt/operators/CScalarCmp.h index 2b5a7b8560..0b3298577b 100644 --- a/libgpopt/include/gpopt/operators/CScalarCmp.h +++ b/libgpopt/include/gpopt/operators/CScalarCmp.h @@ -149,6 +149,20 @@ namespace gpopt return dynamic_cast(pop); } + + // get commuted scalar comparision operator + virtual + CScalarCmp *PopCommutedOp(IMemoryPool *pmp, COperator *pop); + + // get the string representation of a metadata object + static + CWStringConst *Pstr(IMemoryPool *pmp, CMDAccessor *pmda, IMDId *pmdid); + + // get metadata id of the commuted operator + static + IMDId *PmdidCommuteOp(CMDAccessor *pmda, COperator *pop); + + }; // class CScalarCmp diff --git a/libgpopt/include/gpopt/operators/CScalarIsDistinctFrom.h b/libgpopt/include/gpopt/operators/CScalarIsDistinctFrom.h index 1c9ba22bd0..94f7c8e051 100644 --- a/libgpopt/include/gpopt/operators/CScalarIsDistinctFrom.h +++ b/libgpopt/include/gpopt/operators/CScalarIsDistinctFrom.h @@ -71,35 +71,15 @@ namespace gpopt } virtual - BOOL FMatch - ( - COperator *pop - ) - const - { - if (pop->Eopid() == Eopid()) - { - CScalarIsDistinctFrom *popIDF = CScalarIsDistinctFrom::PopConvert(pop); - - // match if operator mdids are identical - return PmdidOp()->FEquals(popIDF->PmdidOp()); - } - - return false; - } + BOOL FMatch(COperator *pop) const; // conversion function static - CScalarIsDistinctFrom *PopConvert - ( - COperator *pop - ) - { - GPOS_ASSERT(NULL != pop); - GPOS_ASSERT(EopScalarIsDistinctFrom == pop->Eopid()); + CScalarIsDistinctFrom *PopConvert(COperator *pop); - return reinterpret_cast(pop); - } + // get commuted scalar IDF operator + virtual + CScalarIsDistinctFrom *PopCommutedOp(IMemoryPool *pmp, COperator *pop); }; // class CScalarIsDistinctFrom diff --git a/libgpopt/src/operators/CExpressionPreprocessor.cpp b/libgpopt/src/operators/CExpressionPreprocessor.cpp index 98840ae437..4b64ef7dc5 100644 --- a/libgpopt/src/operators/CExpressionPreprocessor.cpp +++ b/libgpopt/src/operators/CExpressionPreprocessor.cpp @@ -2206,6 +2206,51 @@ CExpressionPreprocessor::PexprPruneProjListProjectOrGbAgg return pexprResult; } + +// reorder the child for scalar comparision to ensure that left child is a scalar ident and right child is a scalar const if not +CExpression * +CExpressionPreprocessor::PexprReorderScalarCmpChildren + ( + IMemoryPool *pmp, + CExpression *pexpr + ) +{ + GPOS_ASSERT(NULL != pexpr); + + COperator *pop = pexpr->Pop(); + if (CUtils::FScalarCmp(pexpr) || COperator::EopScalarIsDistinctFrom == pexpr->Pop()->Eopid()) + { + GPOS_ASSERT(2 == pexpr->UlArity()); + CExpression *pexprLeft = (*pexpr)[0]; + CExpression *pexprRight = (*pexpr)[1]; + + if (CUtils::FScalarConst(pexprLeft) && CUtils::FScalarIdent(pexprRight)) + { + CScalarCmp *popScalarCmpCommuted = (dynamic_cast(pop))->PopCommutedOp(pmp, pop); + if (popScalarCmpCommuted) + { + pexprLeft->AddRef(); + pexprRight->AddRef(); + return GPOS_NEW(pmp) CExpression(pmp, popScalarCmpCommuted, pexprRight, pexprLeft); + } + } + } + + // process children + DrgPexpr *pdrgpexpr = GPOS_NEW(pmp) DrgPexpr(pmp); + const ULONG ulChildren = pexpr->UlArity(); + + for (ULONG ul = 0; ul < ulChildren; ul++) + { + CExpression *pexprChild = PexprReorderScalarCmpChildren(pmp, (*pexpr)[ul]); + pdrgpexpr->Append(pexprChild); + } + + pop->AddRef(); + return GPOS_NEW(pmp) CExpression(pmp, pop, pdrgpexpr); +} + + //--------------------------------------------------------------------------- // @function: // CExpressionPreprocessor::PexprPreprocess @@ -2353,7 +2398,12 @@ CExpressionPreprocessor::PexprPreprocess GPOS_CHECK_ABORT; pexprCollapsedProjects->Release(); - return pexprSubquery; + // (24) reorder the children of scalar cmp operator to ensure that left child is scalar ident and right child is scalar const + CExpression *pexrReorderedScalarCmpChildren = PexprReorderScalarCmpChildren(pmp, pexprSubquery); + GPOS_CHECK_ABORT; + pexprSubquery->Release(); + + return pexrReorderedScalarCmpChildren; } // EOF diff --git a/libgpopt/src/operators/CPredicateUtils.cpp b/libgpopt/src/operators/CPredicateUtils.cpp index 68d9216756..ae70de765c 100644 --- a/libgpopt/src/operators/CPredicateUtils.cpp +++ b/libgpopt/src/operators/CPredicateUtils.cpp @@ -2167,20 +2167,15 @@ CPredicateUtils::PexprIndexLookupKeyOnRight if (CUtils::FScalarCmp(pexprScalar)) { CScalarCmp *popScCmp = CScalarCmp::PopConvert(pexprScalar->Pop()); - - const IMDScalarOp *pmdscalarop = pmda->Pmdscop(popScCmp->PmdidOp()); - IMDId *pmdidscalaropCommute = pmdscalarop->PmdidOpCommute(); + CScalarCmp *popScCmpCommute = popScCmp->PopCommutedOp(pmp, pexprScalar->Pop()); - if (pmdidscalaropCommute->FValid()) + if (popScCmpCommute) { // build new comparison after switching arguments and using commutative comparison operator pexprRight->AddRef(); pexprLeft->AddRef(); - pmdidscalaropCommute->AddRef(); - const CWStringConst *pstr = pmda->Pmdscop(pmdidscalaropCommute)->Mdname().Pstr(); - - CExpression *pexprCommuted = CUtils::PexprScalarCmp(pmp, pexprRight, pexprLeft, *pstr, pmdidscalaropCommute); + CExpression *pexprCommuted = GPOS_NEW(pmp) CExpression(pmp, popScCmpCommute, pexprRight, pexprLeft); CExpression *pexprIndexCond = PexprIndexLookupKeyOnLeft(pmp, pmda, pexprCommuted, pmdindex, pdrgpcrIndex, pcrsOuterRefs); pexprCommuted->Release(); diff --git a/libgpopt/src/operators/CScalarCmp.cpp b/libgpopt/src/operators/CScalarCmp.cpp index e90fbed321..3b9de599a1 100644 --- a/libgpopt/src/operators/CScalarCmp.cpp +++ b/libgpopt/src/operators/CScalarCmp.cpp @@ -21,6 +21,7 @@ #include "gpopt/operators/CExpressionHandle.h" #include "naucrates/md/IMDTypeBool.h" +#include "naucrates/md/IMDScalarOp.h" using namespace gpopt; using namespace gpmd; @@ -179,6 +180,51 @@ CScalarCmp::Eber return EberUnknown; } +// get metadata id of the commuted operator +IMDId * +CScalarCmp::PmdidCommuteOp + ( + CMDAccessor *pmda, + COperator *pop + ) +{ + CScalarCmp *popScalarCmp = dynamic_cast(pop); + const IMDScalarOp *pmdScalarCmpOp = pmda->Pmdscop(popScalarCmp->PmdidOp()); + + IMDId *pmdidScalarCmpCommute = pmdScalarCmpOp->PmdidOpCommute(); + return pmdidScalarCmpCommute; +} + +// get the string representation of a metadata object +CWStringConst * +CScalarCmp::Pstr + ( + IMemoryPool *pmp, + CMDAccessor *pmda, + IMDId *pmdid + ) +{ + pmdid->AddRef(); + return GPOS_NEW(pmp) CWStringConst(pmp, (pmda->Pmdscop(pmdid)->Mdname().Pstr())->Wsz()); +} + +// get commuted scalar comparision operator +CScalarCmp * +CScalarCmp::PopCommutedOp + ( + IMemoryPool *pmp, + COperator *pop + ) +{ + + CMDAccessor *pmda = COptCtxt::PoctxtFromTLS()->Pmda(); + IMDId *pmdid = PmdidCommuteOp(pmda, pop); + if (NULL != pmdid && pmdid->FValid()) + { + return GPOS_NEW(pmp) CScalarCmp(pmp, pmdid, Pstr(pmp, pmda, pmdid), CUtils::Ecmpt(pmdid)); + } + return NULL; +} //--------------------------------------------------------------------------- // @function: diff --git a/libgpopt/src/operators/CScalarIsDistinctFrom.cpp b/libgpopt/src/operators/CScalarIsDistinctFrom.cpp new file mode 100644 index 0000000000..b2a6d2f66e --- /dev/null +++ b/libgpopt/src/operators/CScalarIsDistinctFrom.cpp @@ -0,0 +1,72 @@ +//--------------------------------------------------------------------------- +// Greenplum Database +// Copyright (C) 2009 Greenplum, Inc. +// +// @filename: +// CScalarIsDistinctFrom.cpp +// +// @doc: +// Implementation of scalar IDF comparison operator +//--------------------------------------------------------------------------- + +#include "gpopt/base/COptCtxt.h" + +#include "gpopt/mdcache/CMDAccessorUtils.h" + +#include "gpopt/operators/CScalarIsDistinctFrom.h" + +using namespace gpopt; +using namespace gpmd; + + +// conversion function +CScalarIsDistinctFrom * +CScalarIsDistinctFrom::PopConvert +( + COperator *pop + ) +{ + GPOS_ASSERT(NULL != pop); + GPOS_ASSERT(EopScalarIsDistinctFrom == pop->Eopid()); + + return reinterpret_cast(pop); +} + +BOOL +CScalarIsDistinctFrom::FMatch +( + COperator *pop + ) +const +{ + if (pop->Eopid() == Eopid()) + { + CScalarIsDistinctFrom *popIDF = CScalarIsDistinctFrom::PopConvert(pop); + + // match if operator mdids are identical + return PmdidOp()->FEquals(popIDF->PmdidOp()); + } + + return false; +} + +// get commuted scalar IDF operator +CScalarIsDistinctFrom * +CScalarIsDistinctFrom::PopCommutedOp + ( + IMemoryPool *pmp, + COperator *pop + ) +{ + + CMDAccessor *pmda = COptCtxt::PoctxtFromTLS()->Pmda(); + IMDId *pmdid = PmdidCommuteOp(pmda, pop); + if (NULL != pmdid && pmdid->FValid()) + { + return GPOS_NEW(pmp) CScalarIsDistinctFrom(pmp, pmdid, Pstr(pmp, pmda, pmdid)); + } + return NULL; +} + +// EOF + -- GitLab