diff --git a/src/backend/gpopt/gpdbwrappers.cpp b/src/backend/gpopt/gpdbwrappers.cpp
index 40525220d85caeb8f6e49e3a6fecd21a69a49487..c94dac639d268fc45aaaf26e24a953bdb777be2a 100644
--- a/src/backend/gpopt/gpdbwrappers.cpp
+++ b/src/backend/gpopt/gpdbwrappers.cpp
@@ -639,6 +639,28 @@ gpdb::FuncStrict
return false;
}
+bool
+gpdb::IsFuncNDVPreserving
+ (
+ Oid funcid
+ )
+{
+ // Given a function oid, return whether it's one of a list of NDV-preserving
+ // functions (estimated NDV of output is similar to that of the input)
+ switch (funcid)
+ {
+ // for now, these are the functions we consider for this optimization
+ case LOWER_OID:
+ case LTRIM_SPACE_OID:
+ case BTRIM_SPACE_OID:
+ case RTRIM_SPACE_OID:
+ case UPPER_OID:
+ return true;
+ default:
+ return false;
+ }
+}
+
char
gpdb::FuncStability
(
@@ -2128,6 +2150,24 @@ gpdb::IsOpStrict
return false;
}
+bool
+gpdb::IsOpNDVPreserving
+ (
+ Oid opno
+ )
+{
+ switch (opno)
+ {
+ // for now, we consider only the concatenation op as NDV-preserving
+ // (note that we do additional checks later, e.g. col || 'const' is
+ // NDV-preserving, while col1 || col2 is not)
+ case OIDTextConcatenateOperator:
+ return true;
+ default:
+ return false;
+ }
+}
+
void
gpdb::GetOpInputTypes
(
diff --git a/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp b/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp
index fde60dc9f977c9d3ae3de7564a67a156a010ded1..6b6e0433c171c839fdb60cbcda456551bc23a342 100644
--- a/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp
+++ b/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp
@@ -1750,6 +1750,7 @@ CTranslatorRelcacheToDXL::RetrieveScOp
}
BOOL returns_null_on_null_input = gpdb::IsOpStrict(op_oid);
+ BOOL is_ndv_preserving = gpdb::IsOpNDVPreserving(op_oid);
CMDIdGPDB *mdid_hash_opfamily = NULL;
OID distr_opfamily = gpdb::GetCompatibleHashOpFamily(op_oid);
@@ -1781,7 +1782,8 @@ CTranslatorRelcacheToDXL::RetrieveScOp
returns_null_on_null_input,
RetrieveScOpOpFamilies(mp, mdid),
mdid_hash_opfamily,
- mdid_legacy_hash_opfamily
+ mdid_legacy_hash_opfamily,
+ is_ndv_preserving
);
return md_scalar_op;
}
@@ -1802,12 +1804,14 @@ CTranslatorRelcacheToDXL::LookupFuncProps
IMDFunction::EFuncStbl *stability, // output: function stability
IMDFunction::EFuncDataAcc *access, // output: function datya access
BOOL *is_strict, // output: is function strict?
+ BOOL *is_ndv_preserving, // output: preserves NDVs of inputs
BOOL *returns_set // output: does function return set?
)
{
GPOS_ASSERT(NULL != stability);
GPOS_ASSERT(NULL != access);
GPOS_ASSERT(NULL != is_strict);
+ GPOS_ASSERT(NULL != is_ndv_preserving);
GPOS_ASSERT(NULL != returns_set);
*stability = GetFuncStability(gpdb::FuncStability(func_oid));
@@ -1818,6 +1822,7 @@ CTranslatorRelcacheToDXL::LookupFuncProps
*returns_set = gpdb::GetFuncRetset(func_oid);
*is_strict = gpdb::FuncStrict(func_oid);
+ *is_ndv_preserving = gpdb::IsFuncNDVPreserving(func_oid);
}
@@ -1886,7 +1891,8 @@ CTranslatorRelcacheToDXL::RetrieveFunc
IMDFunction::EFuncDataAcc access = IMDFunction::EfdaNoSQL;
BOOL is_strict = true;
BOOL returns_set = true;
- LookupFuncProps(func_oid, &stability, &access, &is_strict, &returns_set);
+ BOOL is_ndv_preserving = true;
+ LookupFuncProps(func_oid, &stability, &access, &is_strict, &is_ndv_preserving, &returns_set);
mdid->AddRef();
CMDFunctionGPDB *md_func = GPOS_NEW(mp) CMDFunctionGPDB
@@ -1899,7 +1905,8 @@ CTranslatorRelcacheToDXL::RetrieveFunc
returns_set,
stability,
access,
- is_strict
+ is_strict,
+ is_ndv_preserving
);
return md_func;
diff --git a/src/backend/gporca/data/dxl/minidump/EquiJoinOnExpr-Supported.mdp b/src/backend/gporca/data/dxl/minidump/EquiJoinOnExpr-Supported.mdp
new file mode 100644
index 0000000000000000000000000000000000000000..92b15155a0b740d55262f80fe7457555240fd729
--- /dev/null
+++ b/src/backend/gporca/data/dxl/minidump/EquiJoinOnExpr-Supported.mdp
@@ -0,0 +1,2071 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/backend/gporca/data/dxl/minidump/EquiJoinOnExpr-Unsupported.mdp b/src/backend/gporca/data/dxl/minidump/EquiJoinOnExpr-Unsupported.mdp
new file mode 100644
index 0000000000000000000000000000000000000000..f00628df1d329dd37e3b1a26ef68b5177dd7c52d
--- /dev/null
+++ b/src/backend/gporca/data/dxl/minidump/EquiJoinOnExpr-Unsupported.mdp
@@ -0,0 +1,1634 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/backend/gporca/data/dxl/minidump/InnerJoin-With-OuterRefs.mdp b/src/backend/gporca/data/dxl/minidump/InnerJoin-With-OuterRefs.mdp
index c39067eb0ae2c4ce46aa7d35b7847728843ca6ce..a6b943ab6f1350d86498c477fa3e4e7d3b28faed 100644
--- a/src/backend/gporca/data/dxl/minidump/InnerJoin-With-OuterRefs.mdp
+++ b/src/backend/gporca/data/dxl/minidump/InnerJoin-With-OuterRefs.mdp
@@ -530,7 +530,7 @@
-
+
@@ -541,7 +541,7 @@
-
+
@@ -552,7 +552,7 @@
-
+
@@ -681,7 +681,7 @@
-
+
diff --git a/src/backend/gporca/data/dxl/minidump/OuterJoin-With-OuterRefs.mdp b/src/backend/gporca/data/dxl/minidump/OuterJoin-With-OuterRefs.mdp
index 5c97d92c3bc47479c5d0f8ac3f7e916e363bd60f..0349e20623c51c1ae2953c513f6d4139f90d84ae 100644
--- a/src/backend/gporca/data/dxl/minidump/OuterJoin-With-OuterRefs.mdp
+++ b/src/backend/gporca/data/dxl/minidump/OuterJoin-With-OuterRefs.mdp
@@ -1,5 +1,26 @@
+
+
diff --git a/src/backend/gporca/data/dxl/parse_tests/q26-Metadata.xml b/src/backend/gporca/data/dxl/parse_tests/q26-Metadata.xml
index d215b168e6fede89737317bfbb0adcf801ace279..6d6e051d23d415b805a62595a965e351694a01f2 100644
--- a/src/backend/gporca/data/dxl/parse_tests/q26-Metadata.xml
+++ b/src/backend/gporca/data/dxl/parse_tests/q26-Metadata.xml
@@ -173,7 +173,7 @@
-
+
@@ -185,14 +185,14 @@
-
+
-
+
diff --git a/src/backend/gporca/libgpopt/include/gpopt/base/CUtils.h b/src/backend/gporca/libgpopt/include/gpopt/base/CUtils.h
index 19b490e4608dd7fdd13da3bddddf43420e61b33e..5567919714072f0820b97aeb8932defe54533d72 100644
--- a/src/backend/gporca/libgpopt/include/gpopt/base/CUtils.h
+++ b/src/backend/gporca/libgpopt/include/gpopt/base/CUtils.h
@@ -1077,11 +1077,9 @@ namespace gpopt
static
BOOL FCrossJoin(CExpression *pexpr);
- // extract scalar ident column reference from scalar expression containing
- // only one scalar ident in the tree
- const static
- CColRef *PcrExtractFromScExpression(CExpression *pexpr);
-
+ // is this scalar expression an NDV-preserving function (used for join stats derivation)
+ static
+ BOOL IsExprNDVPreserving(CExpression *pexpr, const CColRef **underlying_colref);
// search the given array of predicates for predicates with equality or IS NOT
// DISTINCT FROM operators that has one side equal to the given expression
diff --git a/src/backend/gporca/libgpopt/src/base/CUtils.cpp b/src/backend/gporca/libgpopt/src/base/CUtils.cpp
index 00c2b91e476e79bfec52e881a2b28d95f5d69f95..51dc0b183ddb6c0a404fafcbd5f9d98c8ae11e77 100644
--- a/src/backend/gporca/libgpopt/src/base/CUtils.cpp
+++ b/src/backend/gporca/libgpopt/src/base/CUtils.cpp
@@ -5116,18 +5116,112 @@ CUtils::FCrossJoin
return fCrossJoin;
}
-// extract scalar ident column reference from scalar expression containing
-// only one scalar ident in the tree
-const CColRef *
-CUtils::PcrExtractFromScExpression
+// Determine whether a scalar expression consists only of a scalar id and NDV-preserving
+// functions plus casts. If so, return the corresponding CColRef.
+BOOL
+CUtils::IsExprNDVPreserving
(
- CExpression *pexpr
+ CExpression *pexpr,
+ const CColRef **underlying_colref
)
{
- if (pexpr->DeriveUsedColumns()->Size() == 1)
- return pexpr->DeriveUsedColumns()->PcrFirst();
+ CExpression *curr_expr = pexpr;
+
+ *underlying_colref = NULL;
+
+ // go down the expression tree, visiting the child containing a scalar ident until
+ // we found the ident or until we found a non-NDV-preserving function (at which point there
+ // is no more need to check)
+ while (1)
+ {
+ COperator *pop = curr_expr->Pop();
+ ULONG child_with_scalar_ident = 0;
+
+ switch (pop->Eopid())
+ {
+ case COperator::EopScalarIdent:
+ {
+ // we reached the bottom of the expression, return the ColRef
+ CScalarIdent *cr = CScalarIdent::PopConvert(pop);
+
+ *underlying_colref = cr->Pcr();
+ GPOS_ASSERT(1 == pexpr->DeriveUsedColumns()->Size());
+ return true;
+ }
+
+ case COperator::EopScalarCast:
+ // skip over casts
+ // Note: We might in the future investigate whether there are some casts
+ // that reduce NDVs by too much. Most, if not all, casts that have that potential are
+ // converted to functions, though. Examples: timestamp -> date, double precision -> int.
+ break;
+
+ case COperator::EopScalarCoalesce:
+ {
+ // coalesce(col, const1, ... constn) is treated as an NDV-preserving function
+ for (ULONG c=1; cArity(); c++)
+ {
+ if (0 < (*curr_expr)[c]->DeriveUsedColumns()->Size())
+ {
+ // this coalesce has a ColRef in the second or later arguments, assume for
+ // now that this doesn't preserve NDVs (we could add logic to support this case later)
+ return false;
+ }
+ }
+ break;
+ }
+ case COperator::EopScalarFunc:
+ {
+ // check whether the function is NDV-preserving
+ CMDAccessor *md_accessor = COptCtxt::PoctxtFromTLS()->Pmda();
+ CScalarFunc *sf = CScalarFunc::PopConvert(pop);
+ const IMDFunction *pmdfunc = md_accessor->RetrieveFunc(sf->FuncMdId());
+
+ if (!pmdfunc->IsNDVPreserving() || 1 != curr_expr->Arity())
+ {
+ return false;
+ }
+ break;
+ }
+
+ case COperator::EopScalarOp:
+ {
+ CMDAccessor *md_accessor = COptCtxt::PoctxtFromTLS()->Pmda();
+ CScalarOp *so = CScalarOp::PopConvert(pop);
+ const IMDScalarOp *pmdscop = md_accessor->RetrieveScOp(so->MdIdOp());
+
+ if (!pmdscop->IsNDVPreserving() || 2 != curr_expr->Arity())
+ {
+ return false;
+ }
- return NULL;
+ // col const is NDV-preserving, and so is const col
+ if (0 ==(*curr_expr)[1]->DeriveUsedColumns()->Size())
+ {
+ // col const
+ child_with_scalar_ident = 0;
+ }
+ else if (0 ==(*curr_expr)[0]->DeriveUsedColumns()->Size())
+ {
+ // const col
+ child_with_scalar_ident = 1;
+ }
+ else
+ {
+ // give up for now, both children reference a column,
+ // e.g. col1 col2
+ return false;
+ }
+ break;
+ }
+
+ default:
+ // anything else we see is considered non-NDV-preserving
+ return false;
+ }
+
+ curr_expr = (*curr_expr)[child_with_scalar_ident];
+ }
}
diff --git a/src/backend/gporca/libgpopt/src/operators/CLogicalDifference.cpp b/src/backend/gporca/libgpopt/src/operators/CLogicalDifference.cpp
index bcd0f57b4de16af28d99dfd0d2ae17631b4a3804..3a76ce46e7aa3567f7b1eb968d570b21a36688f9 100644
--- a/src/backend/gporca/libgpopt/src/operators/CLogicalDifference.cpp
+++ b/src/backend/gporca/libgpopt/src/operators/CLogicalDifference.cpp
@@ -182,7 +182,8 @@ CLogicalDifference::PstatsDerive
exprhdl,
pexprScCond,
output_colrefsets,
- outer_refs
+ outer_refs,
+ true // is an LASJ
);
IStatistics *LASJ_stats = outer_stats->CalcLASJoinStats
(
diff --git a/src/backend/gporca/libgpopt/src/operators/CLogicalDifferenceAll.cpp b/src/backend/gporca/libgpopt/src/operators/CLogicalDifferenceAll.cpp
index 0d0f43b411aeee233efe4dd53cbf4425511aa448..2ed49ed5d614040fff4cde1401b1d4c8ea0f61a9 100644
--- a/src/backend/gporca/libgpopt/src/operators/CLogicalDifferenceAll.cpp
+++ b/src/backend/gporca/libgpopt/src/operators/CLogicalDifferenceAll.cpp
@@ -179,7 +179,8 @@ CLogicalDifferenceAll::PstatsDerive
exprhdl,
pexprScCond,
output_colrefsets,
- outer_refs
+ outer_refs,
+ true // is an LASJ
);
IStatistics *LASJ_stats = outer_stats->CalcLASJoinStats
(
diff --git a/src/backend/gporca/libgpopt/src/operators/CLogicalIntersectAll.cpp b/src/backend/gporca/libgpopt/src/operators/CLogicalIntersectAll.cpp
index 9477c4a53e40a5b7036361a6a2e0c8923e519ca5..85594fb6d951bb70e16e1066585b5b84119053cb 100644
--- a/src/backend/gporca/libgpopt/src/operators/CLogicalIntersectAll.cpp
+++ b/src/backend/gporca/libgpopt/src/operators/CLogicalIntersectAll.cpp
@@ -200,7 +200,8 @@ CLogicalIntersectAll::PstatsDerive
exprhdl,
pexprScCond,
output_colrefsets,
- outer_refs
+ outer_refs,
+ true // is a semi-join
);
IStatistics *pstatsSemiJoin = CLogicalLeftSemiJoin::PstatsDerive(mp, join_preds_stats, outer_stats, inner_side_stats);
diff --git a/src/backend/gporca/libgpopt/src/operators/CLogicalLeftAntiSemiJoin.cpp b/src/backend/gporca/libgpopt/src/operators/CLogicalLeftAntiSemiJoin.cpp
index 33a9017c15796900d429c8c42e9dbabbeca4949f..497d40548322a1d49f2927ab9fbba38d49cbf978 100644
--- a/src/backend/gporca/libgpopt/src/operators/CLogicalLeftAntiSemiJoin.cpp
+++ b/src/backend/gporca/libgpopt/src/operators/CLogicalLeftAntiSemiJoin.cpp
@@ -149,7 +149,7 @@ CLogicalLeftAntiSemiJoin::PstatsDerive
GPOS_ASSERT(Esp(exprhdl) > EspNone);
IStatistics *outer_stats = exprhdl.Pstats(0);
IStatistics *inner_side_stats = exprhdl.Pstats(1);
- CStatsPredJoinArray *join_preds_stats = CStatsPredUtils::ExtractJoinStatsFromExprHandle(mp, exprhdl);
+ CStatsPredJoinArray *join_preds_stats = CStatsPredUtils::ExtractJoinStatsFromExprHandle(mp, exprhdl, true /*LASJ*/);
IStatistics *pstatsLASJoin = outer_stats->CalcLASJoinStats
(
mp,
diff --git a/src/backend/gporca/libgpopt/src/operators/CLogicalLeftSemiJoin.cpp b/src/backend/gporca/libgpopt/src/operators/CLogicalLeftSemiJoin.cpp
index aef81ad4946dda4f50ba3b586594ad1ea93021e1..42919cd645932323c47be60a8028ef8781623c7a 100644
--- a/src/backend/gporca/libgpopt/src/operators/CLogicalLeftSemiJoin.cpp
+++ b/src/backend/gporca/libgpopt/src/operators/CLogicalLeftSemiJoin.cpp
@@ -171,7 +171,7 @@ CLogicalLeftSemiJoin::PstatsDerive
GPOS_ASSERT(Esp(exprhdl) > EspNone);
IStatistics *outer_stats = exprhdl.Pstats(0);
IStatistics *inner_side_stats = exprhdl.Pstats(1);
- CStatsPredJoinArray *join_preds_stats = CStatsPredUtils::ExtractJoinStatsFromExprHandle(mp, exprhdl);
+ CStatsPredJoinArray *join_preds_stats = CStatsPredUtils::ExtractJoinStatsFromExprHandle(mp, exprhdl, true/*semi-join*/);
IStatistics *pstatsSemiJoin = PstatsDerive(mp, join_preds_stats, outer_stats, inner_side_stats);
join_preds_stats->Release();
diff --git a/src/backend/gporca/libnaucrates/include/naucrates/dxl/parser/CParseHandlerMDGPDBFunc.h b/src/backend/gporca/libnaucrates/include/naucrates/dxl/parser/CParseHandlerMDGPDBFunc.h
index 3453b4ff224a83158e2a61c4f87791c903544c67..de1b05d02365750db76b8ed8b543cda9619546b2 100644
--- a/src/backend/gporca/libnaucrates/include/naucrates/dxl/parser/CParseHandlerMDGPDBFunc.h
+++ b/src/backend/gporca/libnaucrates/include/naucrates/dxl/parser/CParseHandlerMDGPDBFunc.h
@@ -60,6 +60,8 @@ namespace gpdxl
// function strictness (i.e. whether func returns NULL on NULL input)
BOOL m_is_strict;
+
+ BOOL m_is_ndv_preserving;
// private copy ctor
CParseHandlerMDGPDBFunc(const CParseHandlerMDGPDBFunc &);
diff --git a/src/backend/gporca/libnaucrates/include/naucrates/dxl/parser/CParseHandlerMDGPDBScalarOp.h b/src/backend/gporca/libnaucrates/include/naucrates/dxl/parser/CParseHandlerMDGPDBScalarOp.h
index 792ab5433b1db2a360e258698917796c101a5b27..fdf220f26aac7b18879fae19fc5b6ff7d8bb729f 100644
--- a/src/backend/gporca/libnaucrates/include/naucrates/dxl/parser/CParseHandlerMDGPDBScalarOp.h
+++ b/src/backend/gporca/libnaucrates/include/naucrates/dxl/parser/CParseHandlerMDGPDBScalarOp.h
@@ -65,6 +65,9 @@ namespace gpdxl
IMDId *m_mdid_hash_opfamily;
IMDId *m_mdid_legacy_hash_opfamily;
+ // preserves NDVs of inputs
+ BOOL m_is_ndv_preserving;
+
// private copy ctor
CParseHandlerMDGPDBScalarOp(const CParseHandlerMDGPDBScalarOp &);
diff --git a/src/backend/gporca/libnaucrates/include/naucrates/dxl/xml/dxltokens.h b/src/backend/gporca/libnaucrates/include/naucrates/dxl/xml/dxltokens.h
index 51b676f7cb5586874d321ed4bc163ed2df100dc7..8ab4c239d10597a165381b087323cc74f3759d4b 100644
--- a/src/backend/gporca/libnaucrates/include/naucrates/dxl/xml/dxltokens.h
+++ b/src/backend/gporca/libnaucrates/include/naucrates/dxl/xml/dxltokens.h
@@ -573,6 +573,7 @@ namespace gpdxl
EdxltokenCmpOther,
EdxltokenReturnsNullOnNullInput,
+ EdxltokenIsNDVPreserving,
EdxltokenTriggers,
EdxltokenTrigger,
@@ -598,6 +599,7 @@ namespace gpdxl
EdxltokenGPDBFuncResultTypeId,
EdxltokenGPDBFuncReturnsSet,
EdxltokenGPDBFuncStrict,
+ EdxltokenGPDBFuncNDVPreserving,
EdxltokenGPDBCast,
EdxltokenGPDBCastBinaryCoercible,
diff --git a/src/backend/gporca/libnaucrates/include/naucrates/md/CMDFunctionGPDB.h b/src/backend/gporca/libnaucrates/include/naucrates/md/CMDFunctionGPDB.h
index 8f24e11beacff17557c59bddedddf11856f7fcaa..8547f0a36c7ca0f40ed5ce4f55d3b3b782742f51 100644
--- a/src/backend/gporca/libnaucrates/include/naucrates/md/CMDFunctionGPDB.h
+++ b/src/backend/gporca/libnaucrates/include/naucrates/md/CMDFunctionGPDB.h
@@ -50,7 +50,7 @@ namespace gpmd
IMDId *m_mdid_type_result;
// output argument types
- IMdIdArray *m_mdid_types_array;
+ IMdIdArray *m_mdid_types_array;
// whether function returns a set of values
BOOL m_returns_set;
@@ -64,6 +64,10 @@ namespace gpmd
// function strictness (i.e. whether func returns NULL on NULL input)
BOOL m_is_strict;
+ // function result has very similar number of distinct values as the
+ // single function argument (used for cardinality estimation)
+ BOOL m_is_ndv_preserving;
+
// dxl token array for stability
Edxltoken m_dxl_func_stability_array[EfsSentinel];
@@ -97,7 +101,8 @@ namespace gpmd
BOOL ReturnsSet,
EFuncStbl func_stability,
EFuncDataAcc func_data_access,
- BOOL is_strict
+ BOOL is_strict,
+ BOOL is_ndv_preserving
);
virtual
@@ -133,6 +138,12 @@ namespace gpmd
return m_is_strict;
}
+ virtual
+ BOOL IsNDVPreserving() const
+ {
+ return m_is_ndv_preserving;
+ }
+
// function stability
virtual
EFuncStbl GetFuncStability() const
diff --git a/src/backend/gporca/libnaucrates/include/naucrates/md/CMDScalarOpGPDB.h b/src/backend/gporca/libnaucrates/include/naucrates/md/CMDScalarOpGPDB.h
index 83a03cbd5cf3778b932f99754802559a2032086f..ab7577355330a4302b09aaab6561fa0e1f08ddd4 100644
--- a/src/backend/gporca/libnaucrates/include/naucrates/md/CMDScalarOpGPDB.h
+++ b/src/backend/gporca/libnaucrates/include/naucrates/md/CMDScalarOpGPDB.h
@@ -71,7 +71,7 @@ namespace gpmd
// does operator return NULL when all inputs are NULL?
BOOL m_returns_null_on_null_input;
-
+
// operator classes this operator belongs to
IMdIdArray *m_mdid_opfamilies_array;
@@ -81,6 +81,10 @@ namespace gpmd
// compatible legacy hash op family using legacy (cdbhash) opclass
IMDId *m_mdid_legacy_hash_opfamily;
+ // does operator preserve the NDV of its input(s)
+ // (used for cardinality estimation)
+ BOOL m_is_ndv_preserving;
+
CMDScalarOpGPDB(const CMDScalarOpGPDB &);
public:
@@ -101,7 +105,8 @@ namespace gpmd
BOOL returns_null_on_null_input,
IMdIdArray *mdid_opfamilies_array,
IMDId *m_mdid_hash_opfamily,
- IMDId *mdid_legacy_hash_opfamily
+ IMDId *mdid_legacy_hash_opfamily,
+ BOOL is_ndv_preserving
);
~CMDScalarOpGPDB();
@@ -155,6 +160,10 @@ namespace gpmd
virtual
BOOL ReturnsNullOnNullInput() const;
+ // preserves NDVs of its inputs?
+ virtual
+ BOOL IsNDVPreserving() const;
+
// comparison type
virtual
IMDType::ECmpType ParseCmpType() const;
diff --git a/src/backend/gporca/libnaucrates/include/naucrates/md/IMDFunction.h b/src/backend/gporca/libnaucrates/include/naucrates/md/IMDFunction.h
index 8edc61170486a57d89af7420904b9c2ca1f45719..264a1da8695330ac84231004222c6aa86ca5f526 100644
--- a/src/backend/gporca/libnaucrates/include/naucrates/md/IMDFunction.h
+++ b/src/backend/gporca/libnaucrates/include/naucrates/md/IMDFunction.h
@@ -65,6 +65,10 @@ namespace gpmd
virtual
BOOL IsStrict() const = 0;
+ // does function preserve NDVs of input (for cardinality estimation)
+ virtual
+ BOOL IsNDVPreserving() const = 0;
+
// does function return a set of values
virtual
BOOL ReturnsSet() const = 0;
diff --git a/src/backend/gporca/libnaucrates/include/naucrates/md/IMDScalarOp.h b/src/backend/gporca/libnaucrates/include/naucrates/md/IMDScalarOp.h
index 229e8648bf8ae6541902b4a06b3fc3e02769addc..d364f0fde6793013a1a4a59b3262c84d8fdeb53c 100644
--- a/src/backend/gporca/libnaucrates/include/naucrates/md/IMDScalarOp.h
+++ b/src/backend/gporca/libnaucrates/include/naucrates/md/IMDScalarOp.h
@@ -75,6 +75,10 @@ namespace gpmd
virtual
BOOL ReturnsNullOnNullInput() const = 0;
+ // preserves NDVs of its inputs?
+ virtual
+ BOOL IsNDVPreserving() const = 0;
+
virtual
IMDType::ECmpType ParseCmpType() const = 0;
diff --git a/src/backend/gporca/libnaucrates/include/naucrates/statistics/CStatsPred.h b/src/backend/gporca/libnaucrates/include/naucrates/statistics/CStatsPred.h
index eb9c6fb9fc227bd4ceae85a044d3b0063d615e6c..aa35eb8a7b5db60dd9452be04669da94b2a92981 100644
--- a/src/backend/gporca/libnaucrates/include/naucrates/statistics/CStatsPred.h
+++ b/src/backend/gporca/libnaucrates/include/naucrates/statistics/CStatsPred.h
@@ -55,9 +55,8 @@ namespace gpnaucrates
EstatscmptINDF, // is not distinct from
EstatscmptLike, // LIKE predicate comparison
EstatscmptNotLike, // NOT LIKE predicate comparison
- // NDV comparision for equality predicate on columns with functions, ex f(a) = b or a = f(b)
- EstatscmptEqNDVOuter, // use Outer NDV on inner side also
- EstatscmptEqNDVInner, // use Inner NDV on outer side also
+ // NDV comparison for equality predicate on columns with functions, ex f(a) = b or a = f(b)
+ EstatscmptEqNDV,
EstatscmptOther
};
diff --git a/src/backend/gporca/libnaucrates/include/naucrates/statistics/CStatsPredJoin.h b/src/backend/gporca/libnaucrates/include/naucrates/statistics/CStatsPredJoin.h
index 9c7b43c3ecbbe6f4dc3c22e2328122dbf91ea392..836b0bb8e1838adee537aacf7b557edbf499e030 100644
--- a/src/backend/gporca/libnaucrates/include/naucrates/statistics/CStatsPredJoin.h
+++ b/src/backend/gporca/libnaucrates/include/naucrates/statistics/CStatsPredJoin.h
@@ -64,6 +64,11 @@ namespace gpnaucrates
{}
// accessors
+ BOOL HasValidColIdOuter() const
+ {
+ return gpos::ulong_max != m_colidOuter;
+ }
+
ULONG ColIdOuter() const
{
return m_colidOuter;
@@ -75,6 +80,11 @@ namespace gpnaucrates
return m_stats_cmp_type;
}
+ BOOL HasValidColIdInner() const
+ {
+ return gpos::ulong_max != m_colidInner;
+ }
+
ULONG ColIdInner() const
{
return m_colidInner;
diff --git a/src/backend/gporca/libnaucrates/include/naucrates/statistics/CStatsPredUtils.h b/src/backend/gporca/libnaucrates/include/naucrates/statistics/CStatsPredUtils.h
index 955a352fc607ea71f630718c0ce279b5b70ddcb1..c5daa20e2da23ce6935cefa50951aafc037eb1d9 100644
--- a/src/backend/gporca/libnaucrates/include/naucrates/statistics/CStatsPredUtils.h
+++ b/src/backend/gporca/libnaucrates/include/naucrates/statistics/CStatsPredUtils.h
@@ -140,32 +140,40 @@ namespace gpopt
static
CStatsPred::EStatsCmpType GetStatsCmpType(IMDId *mdid);
- // derive whether it is EstatscmptEqNDVInner or EstatscmptEqNDVOuter
- static
- CStatsPred::EStatsCmpType DeriveStatCmpEqNDVType ( ULONG left_index, ULONG right_index, BOOL left_is_null, BOOL right_is_null);
-
// helper function to extract statistics join filter from a given join predicate
static
CStatsPredJoin *ExtractJoinStatsFromJoinPred
(
CMemoryPool *mp,
CExpression *join_predicate_expr,
- CColRefSetArray *join_output_col_refset, // array of output columns of join's relational inputs
+ CColRefSetArray *join_output_col_refset, // array of output columns of join's relational inputs
CColRefSet *outer_refs,
+ BOOL is_semi_or_anti_join,
CExpressionArray *unsupported_predicates_expr
);
- // is the expression a comparison of scalar idents (or casted scalar idents).
- // If so, extract relevant info.
+ // Is the expression a comparison of scalar idents (or casted scalar idents),
+ // or of other supported expressions? If so, extract relevant info.
static
- BOOL IsPredCmpColsOrIgnoreCast
+ BOOL IsJoinPredSupportedForStatsEstimation
(
CExpression *expr,
- const CColRef **col_ref1,
+ CColRefSetArray *output_col_refsets, // array of output columns of join's relational inputs
+ BOOL is_semi_or_anti_join,
CStatsPred::EStatsCmpType *stats_pred_cmp_type,
- const CColRef **col_ref2,
- BOOL &left_is_null,
- BOOL &right_is_null
+ const CColRef **col_ref_outer,
+ const CColRef **col_ref_inner
+ );
+
+ // find out which input expression refers only to the inner table and which
+ // refers only to the outer table, and return accordingly
+ static BOOL AssignExprsToOuterAndInner
+ (
+ CColRefSetArray *output_col_refsets, // array of output columns of join's relational inputs
+ CExpression *expr_1,
+ CExpression *expr_2,
+ CExpression **outer_expr,
+ CExpression **inner_expr
);
public:
@@ -180,14 +188,20 @@ namespace gpopt
(
CMemoryPool *mp,
CExpression *scalar_expr,
- CColRefSetArray *output_col_refset, // array of output columns of join's relational inputs
+ CColRefSetArray *output_col_refset, // array of output columns of join's relational inputs
CColRefSet *outer_refs,
+ BOOL is_semi_or_anti_join,
CStatsPred **unsupported_pred_stats
);
// helper function to extract array of statistics join filter from an expression handle
static
- CStatsPredJoinArray *ExtractJoinStatsFromExprHandle(CMemoryPool *mp, CExpressionHandle &expr_handle);
+ CStatsPredJoinArray *ExtractJoinStatsFromExprHandle
+ (
+ CMemoryPool *mp,
+ CExpressionHandle &expr_handle,
+ BOOL is_semi_or_anti_join
+ );
// helper function to extract array of statistics join filter from an expression
static
@@ -197,7 +211,8 @@ namespace gpopt
CExpressionHandle &expr_handle,
CExpression *scalar_expression,
CColRefSetArray *output_col_refset,
- CColRefSet *outer_refs
+ CColRefSet *outer_refs,
+ BOOL is_semi_or_anti_join
);
// is the predicate a conjunctive or disjunctive predicate
diff --git a/src/backend/gporca/libnaucrates/src/md/CMDFunctionGPDB.cpp b/src/backend/gporca/libnaucrates/src/md/CMDFunctionGPDB.cpp
index ee1abb2149fae654223054c94a5f495c9a1311d3..360840eb9216f2ebaa98a588ad65a948e251aa91 100644
--- a/src/backend/gporca/libnaucrates/src/md/CMDFunctionGPDB.cpp
+++ b/src/backend/gporca/libnaucrates/src/md/CMDFunctionGPDB.cpp
@@ -38,7 +38,8 @@ CMDFunctionGPDB::CMDFunctionGPDB
BOOL ReturnsSet,
EFuncStbl func_stability,
EFuncDataAcc func_data_access,
- BOOL is_strict
+ BOOL is_strict,
+ BOOL is_ndv_preserving
)
:
m_mp(mp),
@@ -49,7 +50,8 @@ CMDFunctionGPDB::CMDFunctionGPDB
m_returns_set(ReturnsSet),
m_func_stability(func_stability),
m_func_data_access(func_data_access),
- m_is_strict(is_strict)
+ m_is_strict(is_strict),
+ m_is_ndv_preserving(is_ndv_preserving)
{
GPOS_ASSERT(m_mdid->IsValid());
GPOS_ASSERT(EfsSentinel > func_stability);
@@ -228,6 +230,7 @@ CMDFunctionGPDB::Serialize
xml_serializer->AddAttribute(CDXLTokens::GetDXLTokenStr(EdxltokenGPDBFuncStability), GetFuncStabilityStr());
xml_serializer->AddAttribute(CDXLTokens::GetDXLTokenStr(EdxltokenGPDBFuncDataAccess), GetFuncDataAccessStr());
xml_serializer->AddAttribute(CDXLTokens::GetDXLTokenStr(EdxltokenGPDBFuncStrict), m_is_strict);
+ xml_serializer->AddAttribute(CDXLTokens::GetDXLTokenStr(EdxltokenGPDBFuncNDVPreserving), m_is_ndv_preserving);
SerializeMDIdAsElem(xml_serializer, CDXLTokens::GetDXLTokenStr(EdxltokenGPDBFuncResultTypeId), m_mdid_type_result);
diff --git a/src/backend/gporca/libnaucrates/src/md/CMDScalarOpGPDB.cpp b/src/backend/gporca/libnaucrates/src/md/CMDScalarOpGPDB.cpp
index 0dde8ab3e47b76044b6007810bca4f310b6f0997..4a8012a4b386f29664f607f3ed89788275ab4a2f 100644
--- a/src/backend/gporca/libnaucrates/src/md/CMDScalarOpGPDB.cpp
+++ b/src/backend/gporca/libnaucrates/src/md/CMDScalarOpGPDB.cpp
@@ -43,7 +43,8 @@ CMDScalarOpGPDB::CMDScalarOpGPDB
BOOL returns_null_on_null_input,
IMdIdArray *mdid_opfamilies_array,
IMDId *mdid_hash_opfamily,
- IMDId *mdid_legacy_hash_opfamily
+ IMDId *mdid_legacy_hash_opfamily,
+ BOOL is_ndv_preserving
)
:
m_mp(mp),
@@ -59,7 +60,8 @@ CMDScalarOpGPDB::CMDScalarOpGPDB
m_returns_null_on_null_input(returns_null_on_null_input),
m_mdid_opfamilies_array(mdid_opfamilies_array),
m_mdid_hash_opfamily(mdid_hash_opfamily),
- m_mdid_legacy_hash_opfamily(mdid_legacy_hash_opfamily)
+ m_mdid_legacy_hash_opfamily(mdid_legacy_hash_opfamily),
+ m_is_ndv_preserving(is_ndv_preserving)
{
GPOS_ASSERT(NULL != mdid_opfamilies_array);
m_dxl_str = CDXLUtils::SerializeMDObj(m_mp, this, false /*fSerializeHeader*/, false /*indentation*/);
@@ -236,6 +238,12 @@ CMDScalarOpGPDB::ReturnsNullOnNullInput() const
}
+BOOL
+CMDScalarOpGPDB::IsNDVPreserving() const
+{
+ return m_is_ndv_preserving;
+}
+
//---------------------------------------------------------------------------
// @function:
// CMDScalarOpGPDB::ParseCmpType
@@ -272,6 +280,7 @@ CMDScalarOpGPDB::Serialize
xml_serializer->AddAttribute(CDXLTokens::GetDXLTokenStr(EdxltokenName), m_mdname->GetMDName());
xml_serializer->AddAttribute(CDXLTokens::GetDXLTokenStr(EdxltokenGPDBScalarOpCmpType), IMDType::GetCmpTypeStr(m_comparision_type));
xml_serializer->AddAttribute(CDXLTokens::GetDXLTokenStr(EdxltokenReturnsNullOnNullInput), m_returns_null_on_null_input);
+ xml_serializer->AddAttribute(CDXLTokens::GetDXLTokenStr(EdxltokenIsNDVPreserving), m_is_ndv_preserving);
Edxltoken dxl_token_array[8] = {
EdxltokenGPDBScalarOpLeftTypeId, EdxltokenGPDBScalarOpRightTypeId,
diff --git a/src/backend/gporca/libnaucrates/src/parser/CParseHandlerMDGPDBFunc.cpp b/src/backend/gporca/libnaucrates/src/parser/CParseHandlerMDGPDBFunc.cpp
index 15f402662254294766cf092178ea34f74e9e3797..bf6cb3c2cbe84b851ad1eea4d216986b650cbaef 100644
--- a/src/backend/gporca/libnaucrates/src/parser/CParseHandlerMDGPDBFunc.cpp
+++ b/src/backend/gporca/libnaucrates/src/parser/CParseHandlerMDGPDBFunc.cpp
@@ -105,6 +105,17 @@ CParseHandlerMDGPDBFunc::StartElement
EdxltokenGPDBFunc
);
+ // parse whether func is NDV-preserving
+ m_is_ndv_preserving = CDXLOperatorFactory::ExtractConvertAttrValueToBool
+ (
+ m_parse_handler_mgr->GetDXLMemoryManager(),
+ attrs,
+ EdxltokenGPDBFuncNDVPreserving,
+ EdxltokenGPDBFunc,
+ true, // optional
+ false // default is false
+ );
+
// parse func stability property
const XMLCh *xmlszStbl = CDXLOperatorFactory::ExtractAttrValue
(
@@ -190,7 +201,8 @@ CParseHandlerMDGPDBFunc::EndElement
m_returns_set,
m_func_stability,
m_func_data_access,
- m_is_strict);
+ m_is_strict,
+ m_is_ndv_preserving);
// deactivate handler
m_parse_handler_mgr->DeactivateHandler();
diff --git a/src/backend/gporca/libnaucrates/src/parser/CParseHandlerMDGPDBScalarOp.cpp b/src/backend/gporca/libnaucrates/src/parser/CParseHandlerMDGPDBScalarOp.cpp
index 7adeb74aacc05713d779618a3d5e068263188985..cd074b5063f83126c951b2f9d5cb3da0d4c82b2d 100644
--- a/src/backend/gporca/libnaucrates/src/parser/CParseHandlerMDGPDBScalarOp.cpp
+++ b/src/backend/gporca/libnaucrates/src/parser/CParseHandlerMDGPDBScalarOp.cpp
@@ -53,7 +53,8 @@ CParseHandlerMDGPDBScalarOp::CParseHandlerMDGPDBScalarOp
m_comparision_type(IMDType::EcmptOther),
m_returns_null_on_null_input(false),
m_mdid_hash_opfamily(NULL),
- m_mdid_legacy_hash_opfamily(NULL)
+ m_mdid_legacy_hash_opfamily(NULL),
+ m_is_ndv_preserving(false)
{
}
@@ -122,6 +123,17 @@ CParseHandlerMDGPDBScalarOp::StartElement
);
}
+ // ndv-preserving property is optional
+ m_is_ndv_preserving = CDXLOperatorFactory::ExtractConvertAttrValueToBool
+ (
+ m_parse_handler_mgr->GetDXLMemoryManager(),
+ attrs,
+ EdxltokenIsNDVPreserving,
+ EdxltokenGPDBScalarOp,
+ true, // is optional
+ false // default value
+ );
+
}
else if (0 == XMLString::compareString(CDXLTokens::XmlstrToken(EdxltokenGPDBScalarOpLeftTypeId), element_local_name))
{
@@ -292,7 +304,8 @@ CParseHandlerMDGPDBScalarOp::EndElement
m_returns_null_on_null_input,
mdid_opfamilies_array,
m_mdid_hash_opfamily,
- m_mdid_legacy_hash_opfamily
+ m_mdid_legacy_hash_opfamily,
+ m_is_ndv_preserving
)
;
diff --git a/src/backend/gporca/libnaucrates/src/statistics/CJoinStatsProcessor.cpp b/src/backend/gporca/libnaucrates/src/statistics/CJoinStatsProcessor.cpp
index 73c10fbec2664466c51fa489f8ef4f5825e54f9a..43324bc5a83b90720ca693b87ad0f0e99cbb4939 100644
--- a/src/backend/gporca/libnaucrates/src/statistics/CJoinStatsProcessor.cpp
+++ b/src/backend/gporca/libnaucrates/src/statistics/CJoinStatsProcessor.cpp
@@ -216,6 +216,7 @@ CJoinStatsProcessor::CalcAllJoinStats
join_preds_available,
output_colrefsets,
outer_refs,
+ is_a_left_join, // left joins use an anti-semijoin internally
&unsupported_pred_stats
);
@@ -307,8 +308,11 @@ CJoinStatsProcessor::SetResultingJoinStats
{
CStatsPredJoin *join_stats = (*join_pred_stats_info)[i];
- (void) join_colids->ExchangeSet(join_stats->ColIdOuter());
- if (!semi_join)
+ if (join_stats->HasValidColIdOuter())
+ {
+ (void) join_colids->ExchangeSet(join_stats->ColIdOuter());
+ }
+ if (!semi_join && join_stats->HasValidColIdInner())
{
(void) join_colids->ExchangeSet(join_stats->ColIdInner());
}
@@ -331,30 +335,43 @@ CJoinStatsProcessor::SetResultingJoinStats
for (ULONG i = 0; i < num_join_conds; i++)
{
CStatsPredJoin *pred_info = (*join_pred_stats_info)[i];
- CStatsPred::EStatsCmpType stats_cmp_type = pred_info->GetCmpType();
ULONG colid1 = pred_info->ColIdOuter();
ULONG colid2 = pred_info->ColIdInner();
GPOS_ASSERT(colid1 != colid2);
- // find the histograms corresponding to the two columns
- const CHistogram *outer_histogram = outer_stats->GetHistogram(colid1);
- // are column id1 and 2 always in the order of outer inner?
- const CHistogram *inner_histogram = inner_side_stats->GetHistogram(colid2);
- GPOS_ASSERT(NULL != outer_histogram);
- GPOS_ASSERT(NULL != inner_histogram);
+ const CHistogram *outer_histogram = NULL;
+ const CHistogram *inner_histogram = NULL;
BOOL is_input_empty = CStatistics::IsEmptyJoin(outer_stats, inner_side_stats, IsLASJ);
CDouble local_scale_factor(1.0);
CHistogram *outer_histogram_after = NULL;
CHistogram *inner_histogram_after = NULL;
+
+ // find the histograms corresponding to the two columns
+ // are column id1 and 2 always in the order of outer inner?
+ if (pred_info->HasValidColIdOuter())
+ {
+ outer_histogram = outer_stats->GetHistogram(colid1);
+ GPOS_ASSERT(NULL != outer_histogram);
+ }
+ if (pred_info->HasValidColIdInner())
+ {
+ inner_histogram = inner_side_stats->GetHistogram(colid2);
+ GPOS_ASSERT(NULL != inner_histogram);
+ }
+
// When we have any form of equi join with join condition of type f(a)=b,
// we calculate the NDV of such a join as NDV(b) ( from Selinger et al.)
- if (CStatsPred::EstatscmptEqNDVOuter == stats_cmp_type)
+ if (NULL == outer_histogram)
{
- inner_histogram = outer_histogram;
+ GPOS_ASSERT(CStatsPred::EstatscmptEqNDV == pred_info->GetCmpType());
+ outer_histogram = inner_histogram;
+ colid1 = colid2;
}
- else if (CStatsPred::EstatscmptEqNDVInner == stats_cmp_type)
+ else if (NULL == inner_histogram)
{
- outer_histogram = inner_histogram;
+ GPOS_ASSERT(CStatsPred::EstatscmptEqNDV == pred_info->GetCmpType());
+ inner_histogram = outer_histogram;
+ colid2 = colid1;
}
JoinHistograms
@@ -377,7 +394,7 @@ CJoinStatsProcessor::SetResultingJoinStats
output_is_empty = JoinStatsAreEmpty(outer_stats->IsEmpty(), output_is_empty, outer_histogram, inner_histogram, outer_histogram_after, join_type);
CStatisticsUtils::AddHistogram(mp, colid1, outer_histogram_after, result_col_hist_mapping);
- if (!semi_join)
+ if (!semi_join && colid1 != colid2)
{
CStatisticsUtils::AddHistogram(mp, colid2, inner_histogram_after, result_col_hist_mapping);
}
@@ -385,6 +402,7 @@ CJoinStatsProcessor::SetResultingJoinStats
GPOS_DELETE(outer_histogram_after);
GPOS_DELETE(inner_histogram_after);
+ // remember which tables the columns came from, this info is used to combine scale factors
CColumnFactory *col_factory = COptCtxt::PoctxtFromTLS()->Pcf();
CColRef *colref_outer = col_factory->LookupColRef(colid1);
@@ -401,6 +419,9 @@ CJoinStatsProcessor::SetResultingJoinStats
// there should only be two tables involved in a join condition
// if the predicate is more complex (i.e. more than 2 tables involved in the predicate such as t1.a=t2.a+t3.a),
// the mdid of the base table will be NULL:
+ // Note that we hash on the pointer to the Mdid, not the value of the Mdid,
+ // but we know that CColRef::GetMdidTable() will always return the same
+ // pointer for a given table.
mdid_pair = GPOS_NEW(mp) IMdIdArray(mp, 2);
mdid_outer->AddRef();
mdid_inner->AddRef();
diff --git a/src/backend/gporca/libnaucrates/src/statistics/CLeftOuterJoinStatsProcessor.cpp b/src/backend/gporca/libnaucrates/src/statistics/CLeftOuterJoinStatsProcessor.cpp
index 398ed9ffa2e1fb8c0bed38e8acd0f5adbdbab466..5c7bbc162d333a2930a661b6012f94235b6fdca0 100644
--- a/src/backend/gporca/libnaucrates/src/statistics/CLeftOuterJoinStatsProcessor.cpp
+++ b/src/backend/gporca/libnaucrates/src/statistics/CLeftOuterJoinStatsProcessor.cpp
@@ -97,11 +97,14 @@ CLeftOuterJoinStatsProcessor::MakeLOJHistogram
GPOS_ASSERT(NULL != inner_join_stats);
// build a bitset with all outer child columns contributing to the join
- CBitSet *outer_side_cols = GPOS_NEW(mp) CBitSet(mp);
+ CBitSet *outer_side_join_cols = GPOS_NEW(mp) CBitSet(mp);
for (ULONG j = 0; j < join_preds_stats->Size(); j++)
{
CStatsPredJoin *join_stats = (*join_preds_stats)[j];
- (void) outer_side_cols->ExchangeSet(join_stats->ColIdOuter());
+ if (join_stats->HasValidColIdOuter())
+ {
+ (void) outer_side_join_cols->ExchangeSet(join_stats->ColIdOuter());
+ }
}
// for the columns in the outer child, compute the buckets that do not contribute to the inner join
@@ -129,7 +132,7 @@ CLeftOuterJoinStatsProcessor::MakeLOJHistogram
const CHistogram *inner_join_histogram = inner_join_stats->GetHistogram(colid);
GPOS_ASSERT(NULL != inner_join_histogram);
- if (outer_side_cols->Get(colid))
+ if (outer_side_join_cols->Get(colid))
{
// add buckets from the outer histogram that do not contribute to the inner join
const CHistogram *LASJ_histogram = LASJ_stats->GetHistogram(colid);
@@ -167,7 +170,7 @@ CLeftOuterJoinStatsProcessor::MakeLOJHistogram
// clean up
inner_colids_with_stats->Release();
outer_colids_with_stats->Release();
- outer_side_cols->Release();
+ outer_side_join_cols->Release();
return LOJ_histograms;
}
diff --git a/src/backend/gporca/libnaucrates/src/statistics/CLeftSemiJoinStatsProcessor.cpp b/src/backend/gporca/libnaucrates/src/statistics/CLeftSemiJoinStatsProcessor.cpp
index e93489d11e3e9c5ba25ccd50f73bac90e23ac7c4..505f9a308031e0bf3419290f03fcc4536bbf92d7 100644
--- a/src/backend/gporca/libnaucrates/src/statistics/CLeftSemiJoinStatsProcessor.cpp
+++ b/src/backend/gporca/libnaucrates/src/statistics/CLeftSemiJoinStatsProcessor.cpp
@@ -34,8 +34,11 @@ CLeftSemiJoinStatsProcessor::CalcLSJoinStatsStatic
ULongPtrArray *inner_colids = GPOS_NEW(mp) ULongPtrArray(mp);
for (ULONG ul = 0; ul < length; ul++)
{
- ULONG colid = ((*join_preds_stats)[ul])->ColIdInner();
- inner_colids->Append(GPOS_NEW(mp) ULONG(colid));
+ if ((*join_preds_stats)[ul]->HasValidColIdInner())
+ {
+ ULONG colid = ((*join_preds_stats)[ul])->ColIdInner();
+ inner_colids->Append(GPOS_NEW(mp) ULONG(colid));
+ }
}
// dummy agg columns required for group by derivation
diff --git a/src/backend/gporca/libnaucrates/src/statistics/CStatisticsUtils.cpp b/src/backend/gporca/libnaucrates/src/statistics/CStatisticsUtils.cpp
index d0a3c6548520d097339097d1abbaaa537a143aba..10bd6fb51d5bd0ae829629b8f8c4b51be16e5ac3 100644
--- a/src/backend/gporca/libnaucrates/src/statistics/CStatisticsUtils.cpp
+++ b/src/backend/gporca/libnaucrates/src/statistics/CStatisticsUtils.cpp
@@ -1180,6 +1180,7 @@ CStatisticsUtils::DeriveStatsForDynamicScan
scalar_expr,
output_colrefs,
outer_refs,
+ true, // semi-join
&unsupported_pred_stats
);
@@ -1863,9 +1864,7 @@ CStatisticsUtils::IsStatsCmpTypeNdvEq
CStatsPred::EStatsCmpType stats_cmp_type
)
{
- return (CStatsPred::EstatscmptEqNDVOuter == stats_cmp_type ||
- CStatsPred::EstatscmptEqNDVInner == stats_cmp_type
- );
+ return (CStatsPred::EstatscmptEqNDV == stats_cmp_type);
}
//---------------------------------------------------------------------------
// @function:
diff --git a/src/backend/gporca/libnaucrates/src/statistics/CStatsPredUtils.cpp b/src/backend/gporca/libnaucrates/src/statistics/CStatsPredUtils.cpp
index ab32d7d4e7b544d299c827182447915a17b5f1e7..71580fda91ec7869068d8161c48fbe17860e064c 100644
--- a/src/backend/gporca/libnaucrates/src/statistics/CStatsPredUtils.cpp
+++ b/src/backend/gporca/libnaucrates/src/statistics/CStatsPredUtils.cpp
@@ -59,34 +59,29 @@ CStatsPredUtils::StatsCmpType
CStatsPred::EStatsCmpType stats_cmp_type = CStatsPred::EstatscmptOther;
+ CWStringConst str_eq(GPOS_WSZ_LIT("="));
CWStringConst str_lt(GPOS_WSZ_LIT("<"));
CWStringConst str_leq(GPOS_WSZ_LIT("<="));
- CWStringConst str_eq(GPOS_WSZ_LIT("="));
CWStringConst str_geq(GPOS_WSZ_LIT(">="));
CWStringConst str_gt(GPOS_WSZ_LIT(">"));
CWStringConst str_neq(GPOS_WSZ_LIT("<>"));
- if (str_opname->Equals(&str_lt))
+ if (str_opname->Equals(&str_eq))
+ {
+ stats_cmp_type = CStatsPred::EstatscmptEq;
+ } else if (str_opname->Equals(&str_lt))
{
stats_cmp_type = CStatsPred::EstatscmptL;
- }
- if (str_opname->Equals(&str_leq))
+ } else if (str_opname->Equals(&str_leq))
{
stats_cmp_type = CStatsPred::EstatscmptLEq;
- }
- if (str_opname->Equals(&str_eq))
- {
- stats_cmp_type = CStatsPred::EstatscmptEq;
- }
- if (str_opname->Equals(&str_geq))
+ } else if (str_opname->Equals(&str_geq))
{
stats_cmp_type = CStatsPred::EstatscmptGEq;
- }
- if (str_opname->Equals(&str_gt))
+ } else if (str_opname->Equals(&str_gt))
{
stats_cmp_type = CStatsPred::EstatscmptG;
- }
- if (str_opname->Equals(&str_neq))
+ } else if (str_opname->Equals(&str_neq))
{
stats_cmp_type = CStatsPred::EstatscmptNEq;
}
@@ -323,40 +318,69 @@ CStatsPredUtils::GetPredStats
//---------------------------------------------------------------------------
-// @function:
-// CStatsPredUtils::IsPredCmpColsOrIgnoreCast
+// CStatsPredUtils::IsJoinPredSupportedForStatsEstimation
//
-// @doc:
-// Is the expression a comparison of scalar ident or cast of a scalar ident?
-// Extract relevant info.
+// Given a join predicate , return whether this is a supported
+// join predicate for cardinality estimation, and what method to use
+// to build the join statistics.
+//
+// Also return ColRefs for those sides of the comparison predicate that
+// can be used (either the entire histogram or just the NDV).
+//
+// Supported predicates:
+//
+// All of these must reference the outer table only on one side
+// and the inner table only on the other side.
//
+// col1 col2 (op could be INDF, IDF, =, <, <=, >, >=, <>)
+// col1 = p(col2) (p is an NDV-preserving function)
+// p(col1) = p(col2)
+// col1 = expr(col2...coln)
+// p(col1) = expr(col2...coln)
+//
+// plus variations of the above, flipping sides and adding casts.
+// Non-NDV-preserving expressions are not allowed on the inner side
+// of semi and anti-semijoins because we need the NDV of the join column
+// for those (LOJ stats are calculated using a semi-join, so the
+// restriction affects those as well).
+//
+// For all but the first line above, we use an NDV-based stats method.
//---------------------------------------------------------------------------
BOOL
-CStatsPredUtils::IsPredCmpColsOrIgnoreCast
+CStatsPredUtils::IsJoinPredSupportedForStatsEstimation
(
CExpression *expr,
- const CColRef **col_ref_left,
+ CColRefSetArray *output_col_refsets, // array of output columns of join's relational inputs
+ BOOL is_semi_or_anti_join,
CStatsPred::EStatsCmpType *stats_pred_cmp_type,
- const CColRef **col_ref_right,
- BOOL &left_is_null,
- BOOL &right_is_null
+ const CColRef **col_ref_outer,
+ const CColRef **col_ref_inner
)
{
- GPOS_ASSERT(NULL != col_ref_left);
- GPOS_ASSERT(NULL != col_ref_right);
+ GPOS_ASSERT(NULL != col_ref_outer);
+ GPOS_ASSERT(NULL != col_ref_inner);
+ GPOS_ASSERT(NULL == *col_ref_outer);
+ GPOS_ASSERT(NULL == *col_ref_inner);
COperator *expr_op = expr->Pop();
BOOL is_INDF = CPredicateUtils::FINDF(expr);
BOOL is_IDF = CPredicateUtils::FIDF(expr);
BOOL is_scalar_cmp = (COperator::EopScalarCmp == expr_op->Eopid());
+ // left and right children of our join pred operator
+ CExpression *expr_left = NULL;
+ CExpression *expr_right = NULL;
+
+ // initialize output parameters
+ *col_ref_inner = NULL;
+ *col_ref_outer = NULL;
+
if (!is_scalar_cmp && !is_INDF && !is_IDF)
{
+ // an unsupported expression
+ *stats_pred_cmp_type = CStatsPred::EstatscmptOther;
return false;
}
- CExpression *expr_left = NULL;
- CExpression *expr_right = NULL;
-
if (is_INDF)
{
(*stats_pred_cmp_type) = CStatsPred::EstatscmptINDF;
@@ -384,53 +408,135 @@ CStatsPredUtils::IsPredCmpColsOrIgnoreCast
expr_right = (*expr)[1];
}
- (*col_ref_left) = CCastUtils::PcrExtractFromScIdOrCastScId(expr_left);
- (*col_ref_right) = CCastUtils::PcrExtractFromScIdOrCastScId(expr_right);
+ // expr_left and expr_right associated with the outer and inner tables
+ CExpression *assigned_expr_outer = NULL;
+ CExpression *assigned_expr_inner = NULL;
- // if the equi join is of type f(a) = f(b) then it is unsupported stats comparison
- // So, we fall back to default stats.(from Selinger et al.)
- if (NULL == *col_ref_left && NULL == *col_ref_right)
+ if (!AssignExprsToOuterAndInner(output_col_refsets, expr_left, expr_right, &assigned_expr_outer, &assigned_expr_inner))
+ {
+ // we are not dealing with a join predicate where one side of the operator
+ // refers to the outer table and the other side refers to the inner
return false;
+ }
+
+ // check whether left or right expressions are simple columns or casts
+ // of simple columns
+ (*col_ref_outer) = CCastUtils::PcrExtractFromScIdOrCastScId(assigned_expr_outer);
+ (*col_ref_inner) = CCastUtils::PcrExtractFromScIdOrCastScId(assigned_expr_inner);
+
+ if (NULL != *col_ref_outer && NULL != *col_ref_inner)
+ {
+ // a simple predicate of the form col1 col2 (casts are allowed)
+ return true;
+ }
- if (NULL == *col_ref_left || NULL == *col_ref_right)
+ // if the scalar cmp is of equality type, we may not have been able to extract
+ // the column references of scalar ident if they had any other expression than cast
+ // on top of them.
+ // in such cases, check if there is still a possibility to extract scalar ident,
+ // if there is more than one column reference on either side, this is unsupported
+ // If supported, mark the comparison as NDV-based
+
+ if (*stats_pred_cmp_type == CStatsPred::EstatscmptEq)
{
- if (NULL == *col_ref_left)
+ BOOL outer_is_ndv_preserving =
+ (NULL != *col_ref_outer || CUtils::IsExprNDVPreserving(assigned_expr_outer, col_ref_outer));
+ BOOL inner_is_ndv_preserving =
+ (NULL != *col_ref_inner || CUtils::IsExprNDVPreserving(assigned_expr_inner, col_ref_inner));
+
+ if (!outer_is_ndv_preserving && !inner_is_ndv_preserving)
{
- left_is_null = true;
+ // join pred of the form f(a) = f(b) with neither side NDV-preserving, this is not supported
+ return false;
}
- if (NULL == *col_ref_right)
+ if (is_semi_or_anti_join && !inner_is_ndv_preserving)
{
- right_is_null = true;
+ // non-NDV-preserving functions on the inner of a semi-join or anti-semijoin
+ // are not supported, we need the NDV of the inner join columns to calculate
+ // the stats
+ return false;
}
- // if the scalar cmp is of equality type, we may not have been able to extract
- // the column referenes of scalar ident if they had any other expression than cast
- // on top of them.
- // in such cases, check if there is still a possibility to extract scalar ident,
- // if there is more than one column reference on either side, this is unsupported
- // If supported, mark the comparison as NDV-based
+ // a join predicate that involves an NDV-preserving function on at least one side, one of
+ // *col_ref_inner and *col_ref_outer may be NULL. If expr(...) is a non-NDV-preserving
+ // expression and p is an NDV-preserving function, then we can have one of the following
+ // (including variations with flipped sides and casts added):
+ // col1 = p(col2) (use max of both NDVs)
+ // p(col1) = p(col2) (use max of both NDVs)
+ // col1 = expr(col2...coln) (use NDV of col1)
+ // p(col1) = expr(col2...coln) (use NDV of col1)
+ *stats_pred_cmp_type = CStatsPred::EstatscmptEqNDV;
+ return true;
+ }
- if (*stats_pred_cmp_type == CStatsPred::EstatscmptEq)
- {
- (*col_ref_left) = CUtils::PcrExtractFromScExpression(expr_left);
- (*col_ref_right) = CUtils::PcrExtractFromScExpression(expr_right);
-
- if (NULL == *col_ref_left || NULL == *col_ref_right)
- {
- return false;
- }
+ // failed to extract a scalar ident
+ return false;
+}
- return true;
- }
- // failed to extract a scalar ident
+
+BOOL
+CStatsPredUtils::AssignExprsToOuterAndInner
+ (
+ CColRefSetArray *output_col_refsets, // array of output columns of join's relational inputs
+ CExpression *expr_1,
+ CExpression *expr_2,
+ CExpression **outer_expr,
+ CExpression **inner_expr
+ )
+{
+ // see also CPhysicalJoin::FPredKeysSeparated(), which returns similar info
+ CColRefSet *used_cols_1 = expr_1->DeriveUsedColumns();
+ CColRefSet *used_cols_2 = expr_2->DeriveUsedColumns();
+ ULONG child_index_1 = 0;
+ ULONG child_index_2 = 0;
+
+ if (0 == used_cols_1->Size() || 0 == used_cols_2->Size())
+ {
+ // one of the sides is a constant
+ return false;
+ }
+
+ // try just one ColRef from each side and find the associated input table
+ child_index_1 = CUtils::UlPcrIndexContainingSet(output_col_refsets, used_cols_1->PcrAny());
+ child_index_2 = CUtils::UlPcrIndexContainingSet(output_col_refsets, used_cols_2->PcrAny());
+
+ if (gpos::ulong_max == child_index_1 || gpos::ulong_max == child_index_2)
+ {
+ // the predicate refers to columns that are not available
+ // (predicate from NAry join that refers to tables not yet being processed)
+ return false;
+ }
+ if (child_index_1 == child_index_2)
+ {
+ // both sides refer to the same input table
+ return false;
+ }
+
+ // we tried one ColRef above, now try all of them, if there are multiple
+ if ((1 < used_cols_1->Size() && !(*output_col_refsets)[child_index_1]->ContainsAll(used_cols_1)) ||
+ (1 < used_cols_2->Size() && !(*output_col_refsets)[child_index_2]->ContainsAll(used_cols_2)))
+ {
+ // at least one of the sides refers to more than one input table
return false;
}
+ if (child_index_1 < child_index_2)
+ {
+ GPOS_ASSERT(0 == child_index_1 && 1 == child_index_2);
+ *outer_expr = expr_1;
+ *inner_expr = expr_2;
+ }
+ else
+ {
+ GPOS_ASSERT(0 == child_index_2 && 1 == child_index_1);
+ *outer_expr = expr_2;
+ *inner_expr = expr_1;
+ }
+
return true;
}
-
//---------------------------------------------------------------------------
// @function:
// CStatsPredUtils::ExtractPredStats
@@ -1133,28 +1239,6 @@ CStatsPredUtils::GetStatsPredFromBoolExpr
return GPOS_NEW(mp) CStatsPredPoint(colid, CStatsPred::EstatscmptEq, GPOS_NEW(mp) CPoint(datum));
}
-CStatsPred::EStatsCmpType
-CStatsPredUtils::DeriveStatCmpEqNDVType
- (
- ULONG left_index,
- ULONG right_index,
- BOOL left_is_null,
- BOOL right_is_null
- )
-{
- GPOS_ASSERT(left_is_null || right_is_null);
-
- // given an equi join condition f(a) = b, if the func is on
- // outer side, consider the NDV stats on inner
- if ((left_is_null && (left_index < right_index)) ||
- (right_is_null && (right_index < left_index)))
- {
- return CStatsPred::EstatscmptEqNDVInner;
- }
-
- // otherwise consider NDV stats on outer
- return CStatsPred::EstatscmptEqNDVOuter;
-}
//---------------------------------------------------------------------------
// @function:
// CStatsPredUtils::ExtractJoinStatsFromJoinPred
@@ -1170,6 +1254,7 @@ CStatsPredUtils::ExtractJoinStatsFromJoinPred
CExpression *join_pred_expr,
CColRefSetArray *output_col_refsets, // array of output columns of join's relational inputs
CColRefSet *outer_refs,
+ BOOL is_semi_or_anti_join,
CExpressionArray *unsupported_expr_array
)
{
@@ -1184,16 +1269,23 @@ CStatsPredUtils::ExtractJoinStatsFromJoinPred
return NULL;
}
- const CColRef *col_ref_left = NULL;
- const CColRef *col_ref_right = NULL;
- BOOL left_is_from_expr = false;
- BOOL right_is_from_expr = false;
+ const CColRef *col_ref_outer = NULL;
+ const CColRef *col_ref_inner = NULL;
CStatsPred::EStatsCmpType stats_cmp_type = CStatsPred::EstatscmptOther;
- BOOL fSupportedScIdentComparison = IsPredCmpColsOrIgnoreCast(join_pred_expr, &col_ref_left, &stats_cmp_type, &col_ref_right, left_is_from_expr, right_is_from_expr);
+ BOOL fSupportedScIdentComparison = IsJoinPredSupportedForStatsEstimation
+ (
+ join_pred_expr,
+ output_col_refsets,
+ is_semi_or_anti_join,
+ &stats_cmp_type,
+ &col_ref_outer,
+ &col_ref_inner
+ );
if (fSupportedScIdentComparison && CStatsPred::EstatscmptOther != stats_cmp_type)
{
- if (!IMDType::StatsAreComparable(col_ref_left->RetrieveType(), col_ref_right->RetrieveType()))
+ if (NULL != col_ref_outer && NULL != col_ref_inner &&
+ !IMDType::StatsAreComparable(col_ref_outer->RetrieveType(), col_ref_inner->RetrieveType()))
{
// unsupported statistics comparison between the histogram boundaries of the columns
join_pred_expr->AddRef();
@@ -1201,24 +1293,10 @@ CStatsPredUtils::ExtractJoinStatsFromJoinPred
return NULL;
}
- ULONG index_left = CUtils::UlPcrIndexContainingSet(output_col_refsets, col_ref_left);
- ULONG index_right = CUtils::UlPcrIndexContainingSet(output_col_refsets, col_ref_right);
+ ULONG outer_id = (NULL != col_ref_outer ? col_ref_outer->Id() : gpos::ulong_max);
+ ULONG inner_id = (NULL != col_ref_inner ? col_ref_inner->Id() : gpos::ulong_max);
- if (left_is_from_expr || right_is_from_expr)
- {
- stats_cmp_type = DeriveStatCmpEqNDVType(index_left, index_right, left_is_from_expr, right_is_from_expr);
- }
-
- if (gpos::ulong_max != index_left && gpos::ulong_max != index_right &&
- index_left != index_right)
- {
- if (index_left < index_right)
- {
- return GPOS_NEW(mp) CStatsPredJoin(col_ref_left->Id(), stats_cmp_type, col_ref_right->Id());
- }
-
- return GPOS_NEW(mp) CStatsPredJoin(col_ref_right->Id(), stats_cmp_type, col_ref_left->Id());
- }
+ return GPOS_NEW(mp) CStatsPredJoin(outer_id, stats_cmp_type, inner_id);
}
if (CColRefSet::FCovered(output_col_refsets, col_refset_used))
@@ -1248,6 +1326,7 @@ CStatsPredUtils::ExtractJoinStatsFromJoinPredArray
CExpression *scalar_expr,
CColRefSetArray *output_col_refsets, // array of output columns of join's relational inputs
CColRefSet *outer_refs,
+ BOOL is_semi_or_antijoin,
CStatsPred **unsupported_stats_pred_array
)
{
@@ -1270,6 +1349,7 @@ CStatsPredUtils::ExtractJoinStatsFromJoinPredArray
predicate_expr,
output_col_refsets,
outer_refs,
+ is_semi_or_antijoin,
unsupported_expr_array
);
if (NULL != join_stats)
@@ -1314,7 +1394,8 @@ CStatsPredUtils::ExtractJoinStatsFromExpr
CExpressionHandle &expr_handle,
CExpression *pexprScalarInput,
CColRefSetArray *output_col_refsets, // array of output columns of join's relational inputs
- CColRefSet *outer_refs
+ CColRefSet *outer_refs,
+ BOOL is_semi_or_anti_join
)
{
GPOS_ASSERT(NULL != output_col_refsets);
@@ -1330,6 +1411,7 @@ CStatsPredUtils::ExtractJoinStatsFromExpr
scalar_expr,
output_col_refsets,
outer_refs,
+ is_semi_or_anti_join,
&unsupported_pred_stats
);
@@ -1353,8 +1435,9 @@ CStatsPredUtils::ExtractJoinStatsFromExpr
CStatsPredJoinArray *
CStatsPredUtils::ExtractJoinStatsFromExprHandle
(
- CMemoryPool *mp,
- CExpressionHandle &expr_handle
+ CMemoryPool *mp,
+ CExpressionHandle &expr_handle,
+ BOOL is_semi_or_anti_join
)
{
// in case of subquery in join predicate, we return empty stats
@@ -1376,7 +1459,15 @@ CStatsPredUtils::ExtractJoinStatsFromExprHandle
CExpression *scalar_expr = expr_handle.PexprScalarChild(expr_handle.Arity() - 1);
CColRefSet *outer_refs = expr_handle.DeriveOuterReferences();
- CStatsPredJoinArray *join_pred_stats = ExtractJoinStatsFromExpr(mp, expr_handle, scalar_expr, output_col_refsets, outer_refs);
+ CStatsPredJoinArray *join_pred_stats = ExtractJoinStatsFromExpr
+ (
+ mp,
+ expr_handle,
+ scalar_expr,
+ output_col_refsets,
+ outer_refs,
+ is_semi_or_anti_join
+ );
// clean up
output_col_refsets->Release();
diff --git a/src/backend/gporca/libnaucrates/src/xml/dxltokens.cpp b/src/backend/gporca/libnaucrates/src/xml/dxltokens.cpp
index 1c23743dfe8dbe4b55c2674d0c8d598ea3062e8e..753d5276ff37fbb5d69cbc9233f70f8d06c6282d 100644
--- a/src/backend/gporca/libnaucrates/src/xml/dxltokens.cpp
+++ b/src/backend/gporca/libnaucrates/src/xml/dxltokens.cpp
@@ -613,6 +613,7 @@ CDXLTokens::Init
{EdxltokenCmpOther, GPOS_WSZ_LIT("Other")},
{EdxltokenReturnsNullOnNullInput, GPOS_WSZ_LIT("ReturnsNullOnNullInput")},
+ {EdxltokenIsNDVPreserving, GPOS_WSZ_LIT("IsNDVPreserving")},
{EdxltokenTriggers, GPOS_WSZ_LIT("Triggers")},
{EdxltokenTrigger, GPOS_WSZ_LIT("Trigger")},
@@ -638,7 +639,8 @@ CDXLTokens::Init
{EdxltokenGPDBFuncResultTypeId, GPOS_WSZ_LIT("ResultType")},
{EdxltokenGPDBFuncReturnsSet, GPOS_WSZ_LIT("ReturnsSet")},
{EdxltokenGPDBFuncStrict, GPOS_WSZ_LIT("IsStrict")},
-
+ {EdxltokenGPDBFuncNDVPreserving, GPOS_WSZ_LIT("IsNDVPreserving")},
+
{EdxltokenGPDBAgg, GPOS_WSZ_LIT("GPDBAgg")},
{EdxltokenGPDBIsAggOrdered, GPOS_WSZ_LIT("IsOrdered")},
{EdxltokenGPDBAggResultTypeId, GPOS_WSZ_LIT("ResultType")},
diff --git a/src/backend/gporca/server/CMakeLists.txt b/src/backend/gporca/server/CMakeLists.txt
index f544dfdc1caccbd87748d1cceed71578640f1e9e..fb8c08dee6a8228306c6a8793516f672bd9ae2dd 100644
--- a/src/backend/gporca/server/CMakeLists.txt
+++ b/src/backend/gporca/server/CMakeLists.txt
@@ -141,7 +141,7 @@ SingleColumnHomogenousIndexOnRoot-AO SingleColumnHomogenousIndexOnRoot-HEAP;
CStatsTest:
Stat-Derivation-Leaf-Pattern MissingBoolColStats JoinColWithOnlyNDV UnsupportedStatsPredicate
-StatsFilter-AnyWithNewColStats;
+StatsFilter-AnyWithNewColStats EquiJoinOnExpr-Supported EquiJoinOnExpr-Unsupported;
CICGMiscTest:
BroadcastSkewedHashjoin OrderByNullsFirst ConvertHashToRandomSelect ConvertHashToRandomInsert HJN-DeeperOuter CTAS CTAS-Random CheckAsUser
diff --git a/src/include/catalog/pg_operator.h b/src/include/catalog/pg_operator.h
index 93fb44710f2caa84e49a0bdab4f4ba91580ab3e6..92b90425fed9bc4b6c9c4a998fed7083654525c1 100644
--- a/src/include/catalog/pg_operator.h
+++ b/src/include/catalog/pg_operator.h
@@ -536,6 +536,7 @@ DATA(insert OID = 643 ( "<>" PGNSP PGUID b f f 19 19 16 643 93 namene neqsel
DESCR("not equal");
DATA(insert OID = 654 ( "||" PGNSP PGUID b f f 25 25 25 0 0 textcat - - ));
DESCR("concatenate");
+#define OIDTextConcatenateOperator 654
DATA(insert OID = 660 ( "<" PGNSP PGUID b f f 19 19 16 662 663 namelt scalarltsel scalarltjoinsel ));
DESCR("less than");
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 34da66c1ceb40cc8adaeb2f58e5b245a72c97509..b389fcf25859affcac690585270dcecbfd258110 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -1912,8 +1912,10 @@ DATA(insert OID = 868 ( strpos PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 2
DESCR("position of substring");
DATA(insert OID = 870 ( lower PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 25 "25" _null_ _null_ _null_ _null_ _null_ lower _null_ _null_ _null_ ));
DESCR("lowercase");
+#define LOWER_OID 870
DATA(insert OID = 871 ( upper PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 25 "25" _null_ _null_ _null_ _null_ _null_ upper _null_ _null_ _null_ ));
DESCR("uppercase");
+#define UPPER_OID 871
DATA(insert OID = 872 ( initcap PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 25 "25" _null_ _null_ _null_ _null_ _null_ initcap _null_ _null_ _null_ ));
DESCR("capitalize each word");
DATA(insert OID = 873 ( lpad PGNSP PGUID 12 1 0 0 0 f f f f t f i s 3 0 25 "25 23 25" _null_ _null_ _null_ _null_ _null_ lpad _null_ _null_ _null_ ));
@@ -1936,14 +1938,17 @@ DATA(insert OID = 880 ( rpad PGNSP PGUID 14 1 0 0 0 f f f f t f i s 2 0 25
DESCR("right-pad string to length");
DATA(insert OID = 881 ( ltrim PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 25 "25" _null_ _null_ _null_ _null_ _null_ ltrim1 _null_ _null_ _null_ ));
DESCR("trim spaces from left end of string");
+#define LTRIM_SPACE_OID 881
DATA(insert OID = 882 ( rtrim PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 25 "25" _null_ _null_ _null_ _null_ _null_ rtrim1 _null_ _null_ _null_ ));
DESCR("trim spaces from right end of string");
+#define RTRIM_SPACE_OID 882
DATA(insert OID = 883 ( substr PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 25 "25 23" _null_ _null_ _null_ _null_ _null_ text_substr_no_len _null_ _null_ _null_ ));
DESCR("extract portion of string");
DATA(insert OID = 884 ( btrim PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 25 "25 25" _null_ _null_ _null_ _null_ _null_ btrim _null_ _null_ _null_ ));
DESCR("trim selected characters from both ends of string");
DATA(insert OID = 885 ( btrim PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 25 "25" _null_ _null_ _null_ _null_ _null_ btrim1 _null_ _null_ _null_ ));
DESCR("trim spaces from both ends of string");
+#define BTRIM_SPACE_OID 885
DATA(insert OID = 936 ( substring PGNSP PGUID 12 1 0 0 0 f f f f t f i s 3 0 25 "25 23 23" _null_ _null_ _null_ _null_ _null_ text_substr _null_ _null_ _null_ ));
DESCR("extract portion of string");
diff --git a/src/include/gpopt/gpdbwrappers.h b/src/include/gpopt/gpdbwrappers.h
index e91b5d912cae7eecbd5ee9688a49d26cb524af90..509beeca752974acbf59f10d6e0c5337dcbe976b 100644
--- a/src/include/gpopt/gpdbwrappers.h
+++ b/src/include/gpopt/gpdbwrappers.h
@@ -204,6 +204,9 @@ namespace gpdb {
// is the given function strict
bool FuncStrict(Oid funcid);
+ // does this preserve the NDVs of its inputs?
+ bool IsFuncNDVPreserving(Oid funcid);
+
// stability property of given function
char FuncStability(Oid funcid);
@@ -480,6 +483,9 @@ namespace gpdb {
// is the given operator strict
bool IsOpStrict(Oid opno);
+ // does it preserve the NDVs of its inputs
+ bool IsOpNDVPreserving(Oid opno);
+
// get input types for a given operator
void GetOpInputTypes(Oid opno, Oid *lefttype, Oid *righttype);
diff --git a/src/include/gpopt/translate/CTranslatorRelcacheToDXL.h b/src/include/gpopt/translate/CTranslatorRelcacheToDXL.h
index 3df15331a65d87dc4d03f60f4f3581d1258b9e4e..3aaa00ae63a8990aa3b686b4ec96ee4d4030674d 100644
--- a/src/include/gpopt/translate/CTranslatorRelcacheToDXL.h
+++ b/src/include/gpopt/translate/CTranslatorRelcacheToDXL.h
@@ -165,6 +165,7 @@ namespace gpdxl
IMDFunction::EFuncStbl *stability, // output: function stability
IMDFunction::EFuncDataAcc *access, // output: function data access
BOOL *is_strict, // output: is function strict?
+ BOOL *is_ndv_preserving, // output: preserves NDVs of inputs
BOOL *ReturnsSet // output: does function return set?
);
diff --git a/src/test/regress/expected/gporca_optimizer.out b/src/test/regress/expected/gporca_optimizer.out
index 47d45285bd2a62f33f9b9947b98aa30490c13c44..c6a6f7a1735829f19f756250777086146d89a9ed 100644
--- a/src/test/regress/expected/gporca_optimizer.out
+++ b/src/test/regress/expected/gporca_optimizer.out
@@ -12264,32 +12264,35 @@ WHERE L1.lid = int4in(unknownout(meta.load_id));
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------
- Result (cost=0.00..437.37 rows=134 width=8)
+ Result (cost=0.00..431.10 rows=1 width=8)
Output: c, lid
- -> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..431.12 rows=134 width=8)
+ -> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..431.08 rows=1 width=8)
Output: c, lid
- -> HashAggregate (cost=0.00..431.12 rows=134 width=8)
+ -> GroupAggregate (cost=0.00..431.08 rows=1 width=8)
Output: c, lid
Group Key: t55.c, t55.lid
- -> Hash Join (cost=0.00..431.08 rows=134 width=8)
+ -> Sort (cost=0.00..431.08 rows=1 width=8)
Output: c, lid
- Hash Cond: (t55.lid = int4in(unknownout(('99'))))
- -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.02 rows=334 width=8)
+ Sort Key: t55.c, t55.lid
+ -> Hash Join (cost=0.00..431.08 rows=1 width=8)
Output: c, lid
- Hash Key: lid
- -> Seq Scan on orca.t55 (cost=0.00..431.01 rows=334 width=8)
+ Hash Cond: (t55.lid = int4in(unknownout(('99'))))
+ -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.02 rows=334 width=8)
Output: c, lid
- -> Hash (cost=0.00..0.00 rows=1 width=8)
- Output: ('99')
- -> Result (cost=0.00..0.00 rows=1 width=8)
+ Hash Key: lid
+ -> Seq Scan on orca.t55 (cost=0.00..431.01 rows=334 width=8)
+ Output: c, lid
+ -> Hash (cost=0.00..0.00 rows=1 width=8)
Output: ('99')
-> Result (cost=0.00..0.00 rows=1 width=8)
- Output: ('99'), int4in(unknownout(('99')))
- -> Result (cost=0.00..0.00 rows=1 width=1)
- Output: '99'
+ Output: ('99')
+ -> Result (cost=0.00..0.00 rows=1 width=8)
+ Output: ('99'), int4in(unknownout(('99')))
+ -> Result (cost=0.00..0.00 rows=1 width=1)
+ Output: '99'
Optimizer: Pivotal Optimizer (GPORCA)
Settings: optimizer=on, optimizer_cte_inlining_bound=1000, optimizer_join_order=query, optimizer_metadata_caching=on
-(25 rows)
+(28 rows)
CREATE TABLE TP AS
WITH META AS (SELECT '2020-01-01' AS VALID_DT, '99' AS LOAD_ID)
diff --git a/src/test/regress/expected/join_optimizer.out b/src/test/regress/expected/join_optimizer.out
index d9b58f9e53472b9cb059cc1d62ae25dc59c46dbe..8508edd15f5d7101577611f211cc585e179e92ee 100755
--- a/src/test/regress/expected/join_optimizer.out
+++ b/src/test/regress/expected/join_optimizer.out
@@ -4246,23 +4246,26 @@ select * from
(tenk1 as a1 full join (select 1 as id) as yy on (a1.unique1 = yy.id))
on (xx.id = coalesce(yy.id));
QUERY PLAN
-------------------------------------------------------
- Hash Left Join
- Hash Cond: ((1) = COALESCE((1)))
- -> Result
- -> Hash
- -> Gather Motion 3:1 (slice1; segments: 3)
- -> Merge Full Join
- Merge Cond: (unique1 = (1))
- -> Sort
- Sort Key: unique1
- -> Seq Scan on tenk1
- -> Sort
- Sort Key: (1)
- -> Result
+------------------------------------------------------------------
+ Gather Motion 3:1 (slice1; segments: 3)
+ -> Hash Left Join
+ Hash Cond: ((1) = COALESCE((1)))
+ -> Result
+ -> Result
+ -> Hash
+ -> Redistribute Motion 3:3 (slice2; segments: 3)
+ Hash Key: COALESCE((1))
+ -> Merge Full Join
+ Merge Cond: (unique1 = (1))
+ -> Sort
+ Sort Key: unique1
+ -> Seq Scan on tenk1
+ -> Sort
+ Sort Key: (1)
-> Result
- Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
-(15 rows)
+ -> Result
+ Optimizer: Pivotal Optimizer (GPORCA)
+(18 rows)
select * from
(select 1 as id) as xx