提交 05a26924 编写于 作者: O Omer Arap

Only request stats of columns needed for cardinality estimation [#150424379]

GPORCA should not spend time extracting column statistics that are not
needed for cardinality estimation. This commit eliminates this overhead
of requesting and generating the statistics for columns that are not
used in cardinality estimation unnecessarily.

E.g:
`CREATE TABLE foo (a int, b int, c int);`

For table foo, the query below only needs for stats for column `a` which
is the distribution column and column `c` which is the column used in
where clause.
`select * from foo where c=2;`

However, prior to that commit, the column statistics for column `b` is
also calculated and passed for the cardinality estimation. The only
information needed by the optimizer is the `width` of column `b`. For
this tiny information, we transfer every stats information for that
column.

This commit and its counterpart commit in GPDB ensures that the column
width information is passed and extracted in the `dxl:Relation` metadata
information.

Preliminary results for short running queries provides up to 65x
performance improvement.
Signed-off-by: NJemish Patel <jpatel@pivotal.io>
上级 7ccedc81
......@@ -1140,14 +1140,8 @@ CMDAccessor::RecordColumnStats
}
}
//---------------------------------------------------------------------------
// @function:
// CMDAccessor::Pmdcolstats
//
// @doc:
// Return the column statistics meta data object for a given column of a table
//
//---------------------------------------------------------------------------
// Return the column statistics meta data object for a given column of a table
const IMDColStats *
CMDAccessor::Pmdcolstats
(
......@@ -1227,6 +1221,7 @@ CMDAccessor::Pstats
// extract column widths
CColRefSetIter crsiWidth(*pcrsWidth);
while (crsiWidth.FAdvance())
{
CColRef *pcrWidth = crsiWidth.Pcr();
......@@ -1239,11 +1234,7 @@ CMDAccessor::Pstats
INT iAttno = pcrtable->IAttno();
ULONG ulPos = pmdrel->UlPosFromAttno(iAttno);
// extract the width information
const IMDColStats *pmdcolstats = Pmdcolstats(pmp, pmdidRel, ulPos);
GPOS_ASSERT(NULL != pmdcolstats);
CDouble *pdWidth = GPOS_NEW(pmp) CDouble(pmdcolstats->DWidth());
CDouble *pdWidth = GPOS_NEW(pmp) CDouble(pmdrel->DColWidth(ulPos));
phmuldoubleWidth->FInsert(GPOS_NEW(pmp) ULONG(ulColId), pdWidth);
}
......
......@@ -255,8 +255,8 @@ CPhysicalScan::ComputeTableStats
{
GPOS_ASSERT(NULL == m_pstatsBaseTable);
CColRefSet *pcrsHist = GPOS_NEW(pmp) CColRefSet(pmp, m_pdrgpcrOutput);
CColRefSet *pcrsWidth = GPOS_NEW(pmp) CColRefSet(pmp);
CColRefSet *pcrsHist = GPOS_NEW(pmp) CColRefSet(pmp);
CColRefSet *pcrsWidth = GPOS_NEW(pmp) CColRefSet(pmp, m_pdrgpcrOutput);
CMDAccessor *pmda = COptCtxt::PoctxtFromTLS()->Pmda();
m_pstatsBaseTable = pmda->Pstats(pmp, m_ptabdesc->Pmdid(), pcrsHist, pcrsWidth);
......
......@@ -96,6 +96,9 @@ namespace gpmd
// vartypemod list
DrgPi *m_pdrgpiVarTypeMod;
// array of column widths
DrgPdouble *m_pdrgpdoubleColWidths;
// private copy ctor
CMDRelationCtasGPDB(const CMDRelationCtasGPDB &);
......@@ -177,7 +180,11 @@ namespace gpmd
// number of columns
virtual
ULONG UlColumns() const;
// width of a column with regards to the position
virtual
DOUBLE DColWidth(ULONG ulPos) const;
// does relation have dropped columns
virtual
BOOL FHasDroppedColumns() const
......
......@@ -103,6 +103,9 @@ namespace gpmd
// the original positions of all the non-dropped columns
DrgPul *m_pdrgpulNonDroppedCols;
// array of column widths including dropped columns
DrgPdouble *m_pdrgpdoubleColWidths;
// format type for the relation
const CWStringConst *PstrFormatType() const;
......@@ -156,7 +159,11 @@ namespace gpmd
// number of columns
virtual
ULONG UlColumns() const;
// width of a column with regards to the position
virtual
DOUBLE DColWidth(ULONG ulPos) const;
// does relation have dropped columns
virtual
BOOL FHasDroppedColumns() const;
......
......@@ -117,7 +117,10 @@ namespace gpmd
// the original positions of all the non-dropped columns
DrgPul *m_pdrgpulNonDroppedCols;
// array of column widths including dropped columns
DrgPdouble *m_pdrgpdoubleColWidths;
// private copy ctor
CMDRelationGPDB(const CMDRelationGPDB &);
......@@ -180,7 +183,11 @@ namespace gpmd
// number of columns
virtual
ULONG UlColumns() const;
// width of a column with regards to the position
virtual
DOUBLE DColWidth(ULONG ulPos) const;
// does relation have dropped columns
virtual
BOOL FHasDroppedColumns() const;
......
......@@ -106,7 +106,11 @@ namespace gpmd
// number of columns
virtual
ULONG UlColumns() const = 0;
// width of a column with regards to the position
virtual
DOUBLE DColWidth(ULONG ulPos) const = 0;
// does relation have dropped columns
virtual
BOOL FHasDroppedColumns() const = 0;
......
......@@ -68,7 +68,8 @@ CMDRelationCtasGPDB::CMDRelationCtasGPDB
m_phmiulAttno2Pos = GPOS_NEW(m_pmp) HMIUl(m_pmp);
m_pdrgpulNonDroppedCols = GPOS_NEW(m_pmp) DrgPul(m_pmp);
m_pdrgpdoubleColWidths = GPOS_NEW(pmp) DrgPdouble(pmp);
const ULONG ulArity = pdrgpmdcol->UlLength();
for (ULONG ul = 0; ul < ulArity; ul++)
{
......@@ -90,6 +91,8 @@ CMDRelationCtasGPDB::CMDRelationCtasGPDB
GPOS_NEW(m_pmp) INT(pmdcol->IAttno()),
GPOS_NEW(m_pmp) ULONG(ul)
);
m_pdrgpdoubleColWidths->Append(GPOS_NEW(pmp) CDouble(pmdcol->UlLength()));
}
m_pstr = CDXLUtils::PstrSerializeMDObj(m_pmp, this, false /*fSerializeHeader*/, false /*fIndent*/);
}
......@@ -110,6 +113,7 @@ CMDRelationCtasGPDB::~CMDRelationCtasGPDB()
m_pmdid->Release();
m_pdrgpmdcol->Release();
m_pdrgpdrgpulKeys->Release();
m_pdrgpdoubleColWidths->Release();
CRefCount::SafeRelease(m_pdrgpulDistrColumns);
CRefCount::SafeRelease(m_phmiulAttno2Pos);
CRefCount::SafeRelease(m_pdrgpulNonDroppedCols);
......@@ -189,6 +193,16 @@ CMDRelationCtasGPDB::UlColumns() const
return m_pdrgpmdcol->UlLength();
}
// Return the width of a column with regards to the position
DOUBLE
CMDRelationCtasGPDB::DColWidth
(
ULONG ulPos
)
const
{
return (*m_pdrgpdoubleColWidths)[ulPos]->DVal();
}
//---------------------------------------------------------------------------
// @function:
......
......@@ -74,7 +74,8 @@ CMDRelationExternalGPDB::CMDRelationExternalGPDB
m_phmululNonDroppedCols = GPOS_NEW(m_pmp) HMUlUl(m_pmp);
m_phmiulAttno2Pos = GPOS_NEW(m_pmp) HMIUl(m_pmp);
m_pdrgpulNonDroppedCols = GPOS_NEW(m_pmp) DrgPul(m_pmp);
m_pdrgpdoubleColWidths = GPOS_NEW(pmp) DrgPdouble(pmp);
ULONG ulPosNonDropped = 0;
const ULONG ulArity = pdrgpmdcol->UlLength();
for (ULONG ul = 0; ul < ulArity; ul++)
......@@ -107,6 +108,7 @@ CMDRelationExternalGPDB::CMDRelationExternalGPDB
GPOS_NEW(m_pmp) INT(pmdcol->IAttno()),
GPOS_NEW(m_pmp) ULONG(ul)
);
m_pdrgpdoubleColWidths->Append(GPOS_NEW(pmp) CDouble(pmdcol->UlLength()));
}
m_pstr = CDXLUtils::PstrSerializeMDObj(m_pmp, this, false /*fSerializeHeader*/, false /*fIndent*/);
}
......@@ -129,6 +131,7 @@ CMDRelationExternalGPDB::~CMDRelationExternalGPDB()
CRefCount::SafeRelease(m_pdrgpdrgpulKeys);
m_pdrgpmdIndexInfo->Release();
m_pdrgpmdidTriggers->Release();
m_pdrgpdoubleColWidths->Release();
m_pdrgpmdidCheckConstraint->Release();
CRefCount::SafeRelease(m_pmdidFmtErrRel);
......@@ -195,6 +198,17 @@ CMDRelationExternalGPDB::UlColumns() const
return m_pdrgpmdcol->UlLength();
}
// Return the width of a column with regards to the position
DOUBLE
CMDRelationExternalGPDB::DColWidth
(
ULONG ulPos
)
const
{
return (*m_pdrgpdoubleColWidths)[ulPos]->DVal();
}
//---------------------------------------------------------------------------
// @function:
// CMDRelationExternalGPDB::FHasDroppedColumns
......
......@@ -86,7 +86,8 @@ CMDRelationGPDB::CMDRelationGPDB
m_phmululNonDroppedCols = GPOS_NEW(m_pmp) HMUlUl(m_pmp);
m_phmiulAttno2Pos = GPOS_NEW(m_pmp) HMIUl(m_pmp);
m_pdrgpulNonDroppedCols = GPOS_NEW(m_pmp) DrgPul(m_pmp);
m_pdrgpdoubleColWidths = GPOS_NEW(pmp) DrgPdouble(pmp);
const ULONG ulArity = pdrgpmdcol->UlLength();
ULONG ulPosNonDropped = 0;
for (ULONG ul = 0; ul < ulArity; ul++)
......@@ -117,6 +118,8 @@ CMDRelationGPDB::CMDRelationGPDB
(void) m_phmululNonDroppedCols->FInsert(GPOS_NEW(m_pmp) ULONG(ul), GPOS_NEW(m_pmp) ULONG(ulPosNonDropped));
ulPosNonDropped++;
}
m_pdrgpdoubleColWidths->Append(GPOS_NEW(pmp) CDouble(pmdcol->UlLength()));
}
m_pstr = CDXLUtils::PstrSerializeMDObj(m_pmp, this, false /*fSerializeHeader*/, false /*fIndent*/);
}
......@@ -142,6 +145,7 @@ CMDRelationGPDB::~CMDRelationGPDB()
m_pdrgpmdIndexInfo->Release();
m_pdrgpmdidTriggers->Release();
m_pdrgpmdidCheckConstraint->Release();
m_pdrgpdoubleColWidths->Release();
CRefCount::SafeRelease(m_pmdpartcnstr);
CRefCount::SafeRelease(m_phmululNonDroppedCols);
CRefCount::SafeRelease(m_phmiulAttno2Pos);
......@@ -234,6 +238,17 @@ CMDRelationGPDB::UlColumns() const
return m_pdrgpmdcol->UlLength();
}
// Return the width of a column with regards to the position
DOUBLE
CMDRelationGPDB::DColWidth
(
ULONG ulPos
)
const
{
return (*m_pdrgpdoubleColWidths)[ulPos]->DVal();
}
//---------------------------------------------------------------------------
// @function:
// CMDRelationGPDB::FHasDroppedColumns
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册