From f826d52624b737ac8ea2e880e577f4b5c180436b Mon Sep 17 00:00:00 2001 From: Omer Arap Date: Wed, 21 Jun 2017 17:31:59 -0700 Subject: [PATCH] [#146190079] Falls back due to CTE prod-cons inconsistency This commit introduces a check to detect if the CTE producer and matching consumer is executed on the right place. So if CTE producer is executed on master/segments/segment, then matching consumer also has to execute on master/segments/segment. In rare cases, orca generates plans that violate this assumption. This commit detects plans of that kind and falls back. Signed-off-by: Venkatesh Raghavan Signed-off-by: Ekta Khanna --- .../CTEMisAlignedProducerConsumer.mdp | 662 ++++++++++++++++++ libgpopt/include/gpopt/base/CUtils.h | 18 + libgpopt/include/gpopt/exception.h | 1 + libgpopt/include/gpopt/optimizer/COptimizer.h | 3 + libgpopt/src/base/CUtils.cpp | 107 +++ libgpopt/src/exception.cpp | 6 + libgpopt/src/optimizer/COptimizer.cpp | 25 + .../src/unittest/gpopt/minidump/CICGTest.cpp | 3 +- 8 files changed, 824 insertions(+), 1 deletion(-) create mode 100644 data/dxl/minidump/CTEMisAlignedProducerConsumer.mdp diff --git a/data/dxl/minidump/CTEMisAlignedProducerConsumer.mdp b/data/dxl/minidump/CTEMisAlignedProducerConsumer.mdp new file mode 100644 index 0000000000..ed4e2f4faf --- /dev/null +++ b/data/dxl/minidump/CTEMisAlignedProducerConsumer.mdp @@ -0,0 +1,662 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/libgpopt/include/gpopt/base/CUtils.h b/libgpopt/include/gpopt/base/CUtils.h index c3a3f27cab..b39ad3525d 100644 --- a/libgpopt/include/gpopt/base/CUtils.h +++ b/libgpopt/include/gpopt/base/CUtils.h @@ -27,6 +27,7 @@ #include "gpopt/operators/CScalarSubquery.h" #include "gpopt/operators/CScalarAggFunc.h" #include "naucrates/md/CMDTypeInt4GPDB.h" +#include "naucrates/statistics/IStatistics.h" // fwd declarations namespace gpmd @@ -59,6 +60,7 @@ namespace gpopt { private: + // check if the expression is a scalar boolean const static BOOL FScalarConstBool(CExpression *pexpr, BOOL fVal); @@ -81,6 +83,13 @@ namespace gpopt public: + enum EExecLocalityType + { + EeltMaster, + EeltSegments, + EeltSingleton + }; + #ifdef GPOS_DEBUG // print given expression to debug trace @@ -99,6 +108,11 @@ namespace gpopt // Helpers for generating expressions //------------------------------------------------------------------- + // recursively check for a plan with CTE, if both CTEProducer and CTEConsumer are executed on the same locality. + // raises an exception if CTE Producer and CTE Consumer does not have the same locality + static + void ValidateCTEProducerConsumerLocality(IMemoryPool *pmp, CExpression *pexpr, EExecLocalityType edt, HMUlUl *phmulul); + // Check is a comparison between given types or a comparison after casting // one side to an another exists static @@ -1057,6 +1071,10 @@ namespace gpopt // check if the equivalance classes are same static BOOL FEquivalanceClassesEqual(IMemoryPool *pmp, DrgPcrs *pdrgpcrsFst, DrgPcrs *pdrgpcrsSnd); + + // get execution locality + static + EExecLocalityType ExecLocalityType(CDistributionSpec *pds); }; // class CUtils // hash set from expressions diff --git a/libgpopt/include/gpopt/exception.h b/libgpopt/include/gpopt/exception.h index aa901a045a..ad24b26e52 100644 --- a/libgpopt/include/gpopt/exception.h +++ b/libgpopt/include/gpopt/exception.h @@ -36,6 +36,7 @@ namespace gpopt ExmiUnsupportedNonDeterministicUpdate, ExmiUnsatisfiedRequiredProperties, ExmiEvalUnsupportedScalarExpr, + ExmiCTEProducerConsumerMisAligned, ExmiSentinel }; diff --git a/libgpopt/include/gpopt/optimizer/COptimizer.h b/libgpopt/include/gpopt/optimizer/COptimizer.h index de654d54bc..6d93885125 100644 --- a/libgpopt/include/gpopt/optimizer/COptimizer.h +++ b/libgpopt/include/gpopt/optimizer/COptimizer.h @@ -90,6 +90,9 @@ namespace gpopt static void PrintQueryOrPlan(IMemoryPool *pmp, CExpression *pexpr, CQueryContext *pqc = NULL); + // Check for a plan with CTE, if both CTEProducer and CTEConsumer are executed on the same locality. + static + void CheckCTEConsistency(IMemoryPool *pmp, CExpression *pexpr); public: // main optimizer function diff --git a/libgpopt/src/base/CUtils.cpp b/libgpopt/src/base/CUtils.cpp index a2e9ca9fcf..3feeec530f 100644 --- a/libgpopt/src/base/CUtils.cpp +++ b/libgpopt/src/base/CUtils.cpp @@ -6338,4 +6338,111 @@ CUtils::FEquivalanceClassesEqual phmcscrs->Release(); return true; } + +// This function provides a check for a plan with CTE, if both +// CTEProducer and CTEConsumer are executed on the same locality. +// If it is not the case, the plan is bogus and cannot be executed +// by the executor, therefore it throws an exception causing fallback +// to planner. +// +// The overall algorithm for detecting CTE producer and consumer +// inconsistency employs a HashMap while preorder traversing the tree. +// Preorder traversal will guarantee that we visit the producer before +// we visit the consumer. In this regard, when we see a CTE producer, +// we add its CTE id as a key and its execution locality as a value to +// the HashMap. +// And when we encounter the matching CTE consumer while we traverse the +// tree, we check if the locality matches by looking up the CTE id from +// the HashMap. If we see a non-matching locality, we report the +// anamoly. +// +// We change the locality and push it down the tree whenever we detect +// a motion and the motion type enforces a locality change. We pass the +// locality type by value instead of referance to avoid locality changes +// affect parent and sibling localities. +void +CUtils::ValidateCTEProducerConsumerLocality + ( + IMemoryPool *pmp, + CExpression *pexpr, + EExecLocalityType eelt, + HMUlUl *phmulul // Hash Map containing the CTE Producer id and its execution locality + ) +{ + COperator *pop = pexpr->Pop(); + if (COperator::EopPhysicalCTEProducer == pop->Eopid()) + { + // record the location (either master or segment or singleton) + // where the CTE producer is being executed + ULONG ulCTEID = CPhysicalCTEProducer::PopConvert(pop)->UlCTEId(); + phmulul->FInsert(GPOS_NEW(pmp) ULONG(ulCTEID), GPOS_NEW(pmp) ULONG(eelt)); + } + else if (COperator::EopPhysicalCTEConsumer == pop->Eopid()) + { + ULONG ulCTEID = CPhysicalCTEConsumer::PopConvert(pop)->UlCTEId(); + ULONG *pulLocProducer = phmulul->PtLookup(&ulCTEID); + + // check if the CTEConsumer is being executed in the same location + // as the CTE Producer + if (NULL == pulLocProducer || *pulLocProducer != (ULONG) eelt) + { + phmulul->Release(); + GPOS_RAISE(gpopt::ExmaGPOPT, gpopt::ExmiCTEProducerConsumerMisAligned, ulCTEID); + } + } + // In case of a Gather motion, the execution locality is set to segments + // since the child of Gather motion executes on segments + else if (COperator::EopPhysicalMotionGather == pop->Eopid()) + { + eelt = EeltSegments; + } + else if (COperator::EopPhysicalMotionHashDistribute == pop->Eopid() || COperator::EopPhysicalMotionRandom == pop->Eopid() || COperator::EopPhysicalMotionBroadcast == pop->Eopid()) + { + // For any of these physical motions, the outer child's execution needs to be + // tracked for depending upon the distribution spec + CDrvdPropPlan *pdpplanChild = CDrvdPropPlan::Pdpplan((*pexpr)[0]->PdpDerive()); + CDistributionSpec *pdsChild = pdpplanChild->Pds(); + + eelt = CUtils::ExecLocalityType(pdsChild); + } + + const ULONG ulLen = pexpr->UlArity(); + for (ULONG ul = 0; ul < ulLen; ul++) + { + CExpression *pexprChild = (*pexpr)[ul]; + + if (!pexprChild->Pop()->FScalar()) + { + ValidateCTEProducerConsumerLocality(pmp, pexprChild, eelt, phmulul); + } + } +} + +// get execution locality type +CUtils::EExecLocalityType +CUtils::ExecLocalityType + ( + CDistributionSpec *pds + ) +{ + EExecLocalityType eelt; + if (CDistributionSpec::EdtSingleton == pds->Edt() || CDistributionSpec::EdtStrictSingleton == pds->Edt()) + { + CDistributionSpecSingleton *pdss = CDistributionSpecSingleton::PdssConvert(pds); + if (pdss->FOnMaster()) + { + eelt = EeltMaster; + } + else + { + eelt = EeltSingleton; + } + } + else + { + eelt = EeltSegments; + } + return eelt; +} + // EOF diff --git a/libgpopt/src/exception.cpp b/libgpopt/src/exception.cpp index 96bc22a55c..eb2dbf3b31 100644 --- a/libgpopt/src/exception.cpp +++ b/libgpopt/src/exception.cpp @@ -88,6 +88,12 @@ gpopt::EresExceptionInit GPOS_WSZ_WSZLEN("Expecting a scalar expression without a subquery, received a %s"), 1, GPOS_WSZ_WSZLEN("Non scalar expression or scalar expression with a subquery")), + + CMessage(CException(gpopt::ExmaGPOPT, gpopt::ExmiCTEProducerConsumerMisAligned), + CException::ExsevError, + GPOS_WSZ_WSZLEN("CTE Producer-Consumer execution locality mismatch for CTE id %lld"), + 1, + GPOS_WSZ_WSZLEN("CTE Producer-Consumer execution locality mismatch")), }; GPOS_RESULT eres = GPOS_FAILED; diff --git a/libgpopt/src/optimizer/COptimizer.cpp b/libgpopt/src/optimizer/COptimizer.cpp index 85256306cb..f958a885e5 100644 --- a/libgpopt/src/optimizer/COptimizer.cpp +++ b/libgpopt/src/optimizer/COptimizer.cpp @@ -22,6 +22,7 @@ #include "gpopt/engine/CEngine.h" #include "gpopt/engine/CEnumeratorConfig.h" #include "gpopt/engine/CStatisticsConfig.h" +#include "gpopt/exception.h" #include "gpopt/minidump/CMiniDumperDXL.h" #include "gpopt/minidump/CMinidumperUtils.h" #include "gpopt/minidump/CSerializableStackTrace.h" @@ -340,6 +341,28 @@ COptimizer::HandleExceptionAfterFinalizingMinidump GPOS_RETHROW(ex); } +// This function provides an entry point to check for a plan with CTE, +// if both CTEProducer and CTEConsumer are executed on the same locality. +// If it is not the case, the plan is bogus and cannot be executed +// by the executor and an exception is raised. +// +// To be able to enter the recursive logic, the execution locality of root +// is determined before the recursive call. +void +COptimizer::CheckCTEConsistency + ( + IMemoryPool *pmp, + CExpression *pexpr + ) +{ + HMUlUl *phmulul = GPOS_NEW(pmp) HMUlUl(pmp); + CDrvdPropPlan *pdpplanChild = CDrvdPropPlan::Pdpplan(pexpr->PdpDerive()); + CDistributionSpec *pdsChild = pdpplanChild->Pds(); + + CUtils::EExecLocalityType eelt = CUtils::ExecLocalityType(pdsChild); + CUtils::ValidateCTEProducerConsumerLocality(pmp, pexpr, eelt, phmulul); + phmulul->Release(); +} //--------------------------------------------------------------------------- // @function: @@ -366,6 +389,8 @@ COptimizer::PexprOptimize CExpression *pexprPlan = eng.PexprExtractPlan(); (void) pexprPlan->PrppCompute(pmp, pqc->Prpp()); + CheckCTEConsistency(pmp, pexprPlan); + GPOS_CHECK_ABORT; return pexprPlan; diff --git a/server/src/unittest/gpopt/minidump/CICGTest.cpp b/server/src/unittest/gpopt/minidump/CICGTest.cpp index d8379d1c85..6965161178 100644 --- a/server/src/unittest/gpopt/minidump/CICGTest.cpp +++ b/server/src/unittest/gpopt/minidump/CICGTest.cpp @@ -248,7 +248,8 @@ const struct UnSupportedTestCase unSupportedTestCases[] = { {"../data/dxl/minidump/OneSegmentGather.mdp", gpdxl::ExmaDXL, gpdxl::ExmiExpr2DXLUnsupportedFeature}, {"../data/dxl/minidump/CTEWithOuterReferences.mdp", gpopt::ExmaGPOPT, gpopt::ExmiUnsupportedOp}, - {"../data/dxl/minidump/BitmapIndexUnsupportedOperator.mdp", gpopt::ExmaGPOPT, gpopt::ExmiNoPlanFound} + {"../data/dxl/minidump/BitmapIndexUnsupportedOperator.mdp", gpopt::ExmaGPOPT, gpopt::ExmiNoPlanFound}, + {"../data/dxl/minidump/CTEMisAlignedProducerConsumer.mdp",gpopt::ExmaGPOPT, gpopt::ExmiCTEProducerConsumerMisAligned} }; // negative index apply tests -- GitLab