提交 157b0a48 编写于 作者: S Shreedhar Hardikar 提交者: Abhijit Subramanya

Support MCV based cardinality estimation for all text related types

This commit introduces support for estimation for all text related types
including extensions like citext. Prior to this commit, the optimizer would
estimate the cardinality for predicates involving types like citext, name etc
as 40% leading to cardinality mis-estimation and thus suboptimal plans for
certain queries.
Co-authored-by: NShreedar Hardikar <shardikar@pivotal.io>
Co-authored-by: NAbhijit Subramanya <asubramanya@pivotal.io>
上级 ce453cf2
......@@ -5,7 +5,7 @@ project(gpopt LANGUAGES CXX C)
set(CMAKE_CXX_STANDARD 98)
set(GPORCA_VERSION_MAJOR 3)
set(GPORCA_VERSION_MINOR 89)
set(GPORCA_VERSION_MINOR 90)
set(GPORCA_VERSION_PATCH 0)
set(GPORCA_VERSION_STRING "${GPORCA_VERSION_MAJOR}.${GPORCA_VERSION_MINOR}.${GPORCA_VERSION_PATCH}")
......
<?xml version="1.0" encoding="UTF-8"?>
<dxl:DXLMessage xmlns:dxl="http://greenplum.com/dxl/2010/12/">
<dxl:Comment><![CDATA[
create table t(a char);
insert into t select 'a' from generate_series(1, 100);
insert into t values('b');
analyze t;
explain select * from t where a = 'b';
-- cardinality of the scan should be 1
]]>
</dxl:Comment>
<dxl:Thread Id="0">
<dxl:OptimizerConfig>
<dxl:EnumeratorConfig Id="0" PlanSamples="0" CostThreshold="0"/>
<dxl:StatisticsConfig DampingFactorFilter="0.750000" DampingFactorJoin="0.010000" DampingFactorGroupBy="0.750000"/>
<dxl:CTEConfig CTEInliningCutoff="0"/>
<dxl:WindowOids RowNumber="3100" Rank="3101"/>
<dxl:CostModelConfig CostModelType="1" SegmentsForCosting="3">
<dxl:CostParams>
<dxl:CostParam Name="NLJFactor" Value="1024.000000" LowerBound="1023.500000" UpperBound="1024.500000"/>
</dxl:CostParams>
</dxl:CostModelConfig>
<dxl:Hint MinNumOfPartsToRequireSortOnInsert="2147483647" JoinArityForAssociativityCommutativity="18" ArrayExpansionThreshold="100" JoinOrderDynamicProgThreshold="10" BroadcastThreshold="100000" EnforceConstraintsOnDML="false" PushGroupByBelowSetopThreshold="10"/>
<dxl:TraceFlags Value="102074,102120,102146,103001,103014,103022,103027,103029,104002,104003,104004,104005,105000,106000"/>
</dxl:OptimizerConfig>
<dxl:Metadata SystemIds="0.GPDB">
<dxl:RelationStatistics Mdid="2.16388.1.0" Name="t" Rows="101.000000" EmptyRelation="false"/>
<dxl:Relation Mdid="0.16388.1.0" Name="t" IsTemporary="false" HasOids="false" StorageType="Heap" DistributionPolicy="Hash" DistributionColumns="0" Keys="7,1" NumberLeafPartitions="0">
<dxl:Columns>
<dxl:Column Name="a" Attno="1" Mdid="0.18.1.0" Nullable="true" ColWidth="1">
<dxl:DefaultValue/>
</dxl:Column>
<dxl:Column Name="ctid" Attno="-1" Mdid="0.27.1.0" Nullable="false" ColWidth="6">
<dxl:DefaultValue/>
</dxl:Column>
<dxl:Column Name="xmin" Attno="-3" Mdid="0.28.1.0" Nullable="false" ColWidth="4">
<dxl:DefaultValue/>
</dxl:Column>
<dxl:Column Name="cmin" Attno="-4" Mdid="0.29.1.0" Nullable="false" ColWidth="4">
<dxl:DefaultValue/>
</dxl:Column>
<dxl:Column Name="xmax" Attno="-5" Mdid="0.28.1.0" Nullable="false" ColWidth="4">
<dxl:DefaultValue/>
</dxl:Column>
<dxl:Column Name="cmax" Attno="-6" Mdid="0.29.1.0" Nullable="false" ColWidth="4">
<dxl:DefaultValue/>
</dxl:Column>
<dxl:Column Name="tableoid" Attno="-7" Mdid="0.26.1.0" Nullable="false" ColWidth="4">
<dxl:DefaultValue/>
</dxl:Column>
<dxl:Column Name="gp_segment_id" Attno="-8" Mdid="0.23.1.0" Nullable="false" ColWidth="4">
<dxl:DefaultValue/>
</dxl:Column>
</dxl:Columns>
<dxl:IndexInfoList/>
<dxl:Triggers/>
<dxl:CheckConstraints/>
</dxl:Relation>
<dxl:Type Mdid="0.16.1.0" Name="bool" IsRedistributable="true" IsHashable="true" IsMergeJoinable="true" IsComposite="false" IsTextRelated="false" IsFixedLength="true" Length="1" PassByValue="true">
<dxl:EqualityOp Mdid="0.91.1.0"/>
<dxl:InequalityOp Mdid="0.85.1.0"/>
<dxl:LessThanOp Mdid="0.58.1.0"/>
<dxl:LessThanEqualsOp Mdid="0.1694.1.0"/>
<dxl:GreaterThanOp Mdid="0.59.1.0"/>
<dxl:GreaterThanEqualsOp Mdid="0.1695.1.0"/>
<dxl:ComparisonOp Mdid="0.1693.1.0"/>
<dxl:ArrayType Mdid="0.1000.1.0"/>
<dxl:MinAgg Mdid="0.0.0.0"/>
<dxl:MaxAgg Mdid="0.0.0.0"/>
<dxl:AvgAgg Mdid="0.0.0.0"/>
<dxl:SumAgg Mdid="0.0.0.0"/>
<dxl:CountAgg Mdid="0.2147.1.0"/>
</dxl:Type>
<dxl:Type Mdid="0.18.1.0" Name="char" IsRedistributable="true" IsHashable="true" IsMergeJoinable="true" IsComposite="false" IsTextRelated="true" IsFixedLength="true" Length="1" PassByValue="true">
<dxl:EqualityOp Mdid="0.92.1.0"/>
<dxl:InequalityOp Mdid="0.630.1.0"/>
<dxl:LessThanOp Mdid="0.631.1.0"/>
<dxl:LessThanEqualsOp Mdid="0.632.1.0"/>
<dxl:GreaterThanOp Mdid="0.633.1.0"/>
<dxl:GreaterThanEqualsOp Mdid="0.634.1.0"/>
<dxl:ComparisonOp Mdid="0.358.1.0"/>
<dxl:ArrayType Mdid="0.1002.1.0"/>
<dxl:MinAgg Mdid="0.0.0.0"/>
<dxl:MaxAgg Mdid="0.0.0.0"/>
<dxl:AvgAgg Mdid="0.0.0.0"/>
<dxl:SumAgg Mdid="0.0.0.0"/>
<dxl:CountAgg Mdid="0.2147.1.0"/>
</dxl:Type>
<dxl:Type Mdid="0.23.1.0" Name="int4" IsRedistributable="true" IsHashable="true" IsMergeJoinable="true" IsComposite="false" IsTextRelated="false" IsFixedLength="true" Length="4" PassByValue="true">
<dxl:EqualityOp Mdid="0.96.1.0"/>
<dxl:InequalityOp Mdid="0.518.1.0"/>
<dxl:LessThanOp Mdid="0.97.1.0"/>
<dxl:LessThanEqualsOp Mdid="0.523.1.0"/>
<dxl:GreaterThanOp Mdid="0.521.1.0"/>
<dxl:GreaterThanEqualsOp Mdid="0.525.1.0"/>
<dxl:ComparisonOp Mdid="0.351.1.0"/>
<dxl:ArrayType Mdid="0.1007.1.0"/>
<dxl:MinAgg Mdid="0.2132.1.0"/>
<dxl:MaxAgg Mdid="0.2116.1.0"/>
<dxl:AvgAgg Mdid="0.2101.1.0"/>
<dxl:SumAgg Mdid="0.2108.1.0"/>
<dxl:CountAgg Mdid="0.2147.1.0"/>
</dxl:Type>
<dxl:Type Mdid="0.26.1.0" Name="oid" IsRedistributable="true" IsHashable="true" IsMergeJoinable="true" IsComposite="false" IsTextRelated="false" IsFixedLength="true" Length="4" PassByValue="true">
<dxl:EqualityOp Mdid="0.607.1.0"/>
<dxl:InequalityOp Mdid="0.608.1.0"/>
<dxl:LessThanOp Mdid="0.609.1.0"/>
<dxl:LessThanEqualsOp Mdid="0.611.1.0"/>
<dxl:GreaterThanOp Mdid="0.610.1.0"/>
<dxl:GreaterThanEqualsOp Mdid="0.612.1.0"/>
<dxl:ComparisonOp Mdid="0.356.1.0"/>
<dxl:ArrayType Mdid="0.1028.1.0"/>
<dxl:MinAgg Mdid="0.2118.1.0"/>
<dxl:MaxAgg Mdid="0.2134.1.0"/>
<dxl:AvgAgg Mdid="0.0.0.0"/>
<dxl:SumAgg Mdid="0.0.0.0"/>
<dxl:CountAgg Mdid="0.2147.1.0"/>
</dxl:Type>
<dxl:Type Mdid="0.27.1.0" Name="tid" IsRedistributable="true" IsHashable="true" IsMergeJoinable="true" IsComposite="false" IsTextRelated="false" IsFixedLength="true" Length="6" PassByValue="false">
<dxl:EqualityOp Mdid="0.387.1.0"/>
<dxl:InequalityOp Mdid="0.402.1.0"/>
<dxl:LessThanOp Mdid="0.2799.1.0"/>
<dxl:LessThanEqualsOp Mdid="0.2801.1.0"/>
<dxl:GreaterThanOp Mdid="0.2800.1.0"/>
<dxl:GreaterThanEqualsOp Mdid="0.2802.1.0"/>
<dxl:ComparisonOp Mdid="0.2794.1.0"/>
<dxl:ArrayType Mdid="0.1010.1.0"/>
<dxl:MinAgg Mdid="0.2798.1.0"/>
<dxl:MaxAgg Mdid="0.2797.1.0"/>
<dxl:AvgAgg Mdid="0.0.0.0"/>
<dxl:SumAgg Mdid="0.0.0.0"/>
<dxl:CountAgg Mdid="0.2147.1.0"/>
</dxl:Type>
<dxl:Type Mdid="0.29.1.0" Name="cid" IsRedistributable="true" IsHashable="true" IsMergeJoinable="false" IsComposite="false" IsTextRelated="false" IsFixedLength="true" Length="4" PassByValue="true">
<dxl:EqualityOp Mdid="0.385.1.0"/>
<dxl:InequalityOp Mdid="0.0.0.0"/>
<dxl:LessThanOp Mdid="0.0.0.0"/>
<dxl:LessThanEqualsOp Mdid="0.0.0.0"/>
<dxl:GreaterThanOp Mdid="0.0.0.0"/>
<dxl:GreaterThanEqualsOp Mdid="0.0.0.0"/>
<dxl:ComparisonOp Mdid="0.0.0.0"/>
<dxl:ArrayType Mdid="0.1012.1.0"/>
<dxl:MinAgg Mdid="0.0.0.0"/>
<dxl:MaxAgg Mdid="0.0.0.0"/>
<dxl:AvgAgg Mdid="0.0.0.0"/>
<dxl:SumAgg Mdid="0.0.0.0"/>
<dxl:CountAgg Mdid="0.2147.1.0"/>
</dxl:Type>
<dxl:Type Mdid="0.28.1.0" Name="xid" IsRedistributable="true" IsHashable="true" IsMergeJoinable="false" IsComposite="false" IsTextRelated="false" IsFixedLength="true" Length="4" PassByValue="true">
<dxl:EqualityOp Mdid="0.352.1.0"/>
<dxl:InequalityOp Mdid="0.3315.1.0"/>
<dxl:LessThanOp Mdid="0.0.0.0"/>
<dxl:LessThanEqualsOp Mdid="0.0.0.0"/>
<dxl:GreaterThanOp Mdid="0.0.0.0"/>
<dxl:GreaterThanEqualsOp Mdid="0.0.0.0"/>
<dxl:ComparisonOp Mdid="0.0.0.0"/>
<dxl:ArrayType Mdid="0.1011.1.0"/>
<dxl:MinAgg Mdid="0.0.0.0"/>
<dxl:MaxAgg Mdid="0.0.0.0"/>
<dxl:AvgAgg Mdid="0.0.0.0"/>
<dxl:SumAgg Mdid="0.0.0.0"/>
<dxl:CountAgg Mdid="0.2147.1.0"/>
</dxl:Type>
<dxl:ColumnStatistics Mdid="1.16388.1.0.0" Name="a" Width="1.000000" NullFreq="0.000000" NdvRemain="1.000000" FreqRemain="0.009901" ColStatsMissing="false">
<dxl:StatsBucket Frequency="0.990099" DistinctValues="1.000000">
<dxl:LowerBound Closed="true" TypeMdid="0.18.1.0" Value="YQ==" LintValue="2824300782"/>
<dxl:UpperBound Closed="true" TypeMdid="0.18.1.0" Value="YQ==" LintValue="2824300782"/>
</dxl:StatsBucket>
</dxl:ColumnStatistics>
<dxl:GPDBScalarOp Mdid="0.92.1.0" Name="=" ComparisonType="Eq" ReturnsNullOnNullInput="true">
<dxl:LeftType Mdid="0.18.1.0"/>
<dxl:RightType Mdid="0.18.1.0"/>
<dxl:ResultType Mdid="0.16.1.0"/>
<dxl:OpFunc Mdid="0.61.1.0"/>
<dxl:Commutator Mdid="0.92.1.0"/>
<dxl:InverseOp Mdid="0.630.1.0"/>
<dxl:OpClasses>
<dxl:OpClass Mdid="0.429.1.0"/>
<dxl:OpClass Mdid="0.431.1.0"/>
<dxl:OpClass Mdid="0.4062.1.0"/>
<dxl:OpClass Mdid="0.7104.1.0"/>
<dxl:OpClass Mdid="0.12625.1.0"/>
</dxl:OpClasses>
</dxl:GPDBScalarOp>
</dxl:Metadata>
<dxl:Query>
<dxl:OutputColumns>
<dxl:Ident ColId="1" ColName="a" TypeMdid="0.18.1.0"/>
</dxl:OutputColumns>
<dxl:CTEList/>
<dxl:LogicalSelect>
<dxl:Comparison ComparisonOperator="=" OperatorMdid="0.92.1.0">
<dxl:Ident ColId="1" ColName="a" TypeMdid="0.18.1.0"/>
<dxl:ConstValue TypeMdid="0.18.1.0" Value="Yg==" LintValue="2228706033"/>
</dxl:Comparison>
<dxl:LogicalGet>
<dxl:TableDescriptor Mdid="0.16388.1.0" TableName="t">
<dxl:Columns>
<dxl:Column ColId="1" Attno="1" ColName="a" TypeMdid="0.18.1.0" ColWidth="1"/>
<dxl:Column ColId="2" Attno="-1" ColName="ctid" TypeMdid="0.27.1.0" ColWidth="6"/>
<dxl:Column ColId="3" Attno="-3" ColName="xmin" TypeMdid="0.28.1.0" ColWidth="4"/>
<dxl:Column ColId="4" Attno="-4" ColName="cmin" TypeMdid="0.29.1.0" ColWidth="4"/>
<dxl:Column ColId="5" Attno="-5" ColName="xmax" TypeMdid="0.28.1.0" ColWidth="4"/>
<dxl:Column ColId="6" Attno="-6" ColName="cmax" TypeMdid="0.29.1.0" ColWidth="4"/>
<dxl:Column ColId="7" Attno="-7" ColName="tableoid" TypeMdid="0.26.1.0" ColWidth="4"/>
<dxl:Column ColId="8" Attno="-8" ColName="gp_segment_id" TypeMdid="0.23.1.0" ColWidth="4"/>
</dxl:Columns>
</dxl:TableDescriptor>
</dxl:LogicalGet>
</dxl:LogicalSelect>
</dxl:Query>
<dxl:Plan Id="0" SpaceSize="1">
<dxl:GatherMotion InputSegments="0,1,2" OutputSegments="-1">
<dxl:Properties>
<dxl:Cost StartupCost="0" TotalCost="431.002527" Rows="1.000001" Width="1"/>
</dxl:Properties>
<dxl:ProjList>
<dxl:ProjElem ColId="0" Alias="a">
<dxl:Ident ColId="0" ColName="a" TypeMdid="0.18.1.0"/>
</dxl:ProjElem>
</dxl:ProjList>
<dxl:Filter/>
<dxl:SortingColumnList/>
<dxl:TableScan>
<dxl:Properties>
<dxl:Cost StartupCost="0" TotalCost="431.002523" Rows="1.000001" Width="1"/>
</dxl:Properties>
<dxl:ProjList>
<dxl:ProjElem ColId="0" Alias="a">
<dxl:Ident ColId="0" ColName="a" TypeMdid="0.18.1.0"/>
</dxl:ProjElem>
</dxl:ProjList>
<dxl:Filter>
<dxl:Comparison ComparisonOperator="=" OperatorMdid="0.92.1.0">
<dxl:Ident ColId="0" ColName="a" TypeMdid="0.18.1.0"/>
<dxl:ConstValue TypeMdid="0.18.1.0" Value="Yg==" LintValue="2228706033"/>
</dxl:Comparison>
</dxl:Filter>
<dxl:TableDescriptor Mdid="0.16388.1.0" TableName="t">
<dxl:Columns>
<dxl:Column ColId="0" Attno="1" ColName="a" TypeMdid="0.18.1.0" ColWidth="1"/>
<dxl:Column ColId="1" Attno="-1" ColName="ctid" TypeMdid="0.27.1.0" ColWidth="6"/>
<dxl:Column ColId="2" Attno="-3" ColName="xmin" TypeMdid="0.28.1.0" ColWidth="4"/>
<dxl:Column ColId="3" Attno="-4" ColName="cmin" TypeMdid="0.29.1.0" ColWidth="4"/>
<dxl:Column ColId="4" Attno="-5" ColName="xmax" TypeMdid="0.28.1.0" ColWidth="4"/>
<dxl:Column ColId="5" Attno="-6" ColName="cmax" TypeMdid="0.29.1.0" ColWidth="4"/>
<dxl:Column ColId="6" Attno="-7" ColName="tableoid" TypeMdid="0.26.1.0" ColWidth="4"/>
<dxl:Column ColId="7" Attno="-8" ColName="gp_segment_id" TypeMdid="0.23.1.0" ColWidth="4"/>
</dxl:Columns>
</dxl:TableDescriptor>
</dxl:TableScan>
</dxl:GatherMotion>
</dxl:Plan>
</dxl:Thread>
</dxl:DXLMessage>
此差异已折叠。
<?xml version="1.0" encoding="UTF-8"?>
<dxl:DXLMessage xmlns:dxl="http://greenplum.com/dxl/2010/12/">
<dxl:Comment> <![CDATA[
create table t(a name);
insert into t select 'abc' || (i % 10) from generate_series(1, 1000) i;
delete from t where a = 'abc3';
insert into t values('abc3');
analyze t;
explain select * from t where a = 'abc3';
-- cardinality for table scan should be 1
]]>
</dxl:Comment>
<dxl:Thread Id="0">
<dxl:OptimizerConfig>
<dxl:EnumeratorConfig Id="0" PlanSamples="0" CostThreshold="0"/>
<dxl:StatisticsConfig DampingFactorFilter="0.750000" DampingFactorJoin="0.010000" DampingFactorGroupBy="0.750000"/>
<dxl:CTEConfig CTEInliningCutoff="0"/>
<dxl:WindowOids RowNumber="3100" Rank="3101"/>
<dxl:CostModelConfig CostModelType="1" SegmentsForCosting="3">
<dxl:CostParams>
<dxl:CostParam Name="NLJFactor" Value="1024.000000" LowerBound="1023.500000" UpperBound="1024.500000"/>
</dxl:CostParams>
</dxl:CostModelConfig>
<dxl:Hint MinNumOfPartsToRequireSortOnInsert="2147483647" JoinArityForAssociativityCommutativity="18" ArrayExpansionThreshold="100" JoinOrderDynamicProgThreshold="10" BroadcastThreshold="100000" EnforceConstraintsOnDML="false" PushGroupByBelowSetopThreshold="10"/>
<dxl:TraceFlags Value="102074,102120,102146,103001,103014,103022,103027,103029,104002,104003,104004,104005,105000,106000"/>
</dxl:OptimizerConfig>
<dxl:Metadata SystemIds="0.GPDB">
<dxl:RelationStatistics Mdid="2.16385.1.0" Name="t" Rows="901.000000" EmptyRelation="false"/>
<dxl:Relation Mdid="0.16385.1.0" Name="t" IsTemporary="false" HasOids="false" StorageType="Heap" DistributionPolicy="Hash" DistributionColumns="0" Keys="7,1" NumberLeafPartitions="0">
<dxl:Columns>
<dxl:Column Name="a" Attno="1" Mdid="0.19.1.0" Nullable="true" ColWidth="64">
<dxl:DefaultValue/>
</dxl:Column>
<dxl:Column Name="ctid" Attno="-1" Mdid="0.27.1.0" Nullable="false" ColWidth="6">
<dxl:DefaultValue/>
</dxl:Column>
<dxl:Column Name="xmin" Attno="-3" Mdid="0.28.1.0" Nullable="false" ColWidth="4">
<dxl:DefaultValue/>
</dxl:Column>
<dxl:Column Name="cmin" Attno="-4" Mdid="0.29.1.0" Nullable="false" ColWidth="4">
<dxl:DefaultValue/>
</dxl:Column>
<dxl:Column Name="xmax" Attno="-5" Mdid="0.28.1.0" Nullable="false" ColWidth="4">
<dxl:DefaultValue/>
</dxl:Column>
<dxl:Column Name="cmax" Attno="-6" Mdid="0.29.1.0" Nullable="false" ColWidth="4">
<dxl:DefaultValue/>
</dxl:Column>
<dxl:Column Name="tableoid" Attno="-7" Mdid="0.26.1.0" Nullable="false" ColWidth="4">
<dxl:DefaultValue/>
</dxl:Column>
<dxl:Column Name="gp_segment_id" Attno="-8" Mdid="0.23.1.0" Nullable="false" ColWidth="4">
<dxl:DefaultValue/>
</dxl:Column>
</dxl:Columns>
<dxl:IndexInfoList/>
<dxl:Triggers/>
<dxl:CheckConstraints/>
</dxl:Relation>
<dxl:Type Mdid="0.16.1.0" Name="bool" IsRedistributable="true" IsHashable="true" IsMergeJoinable="true" IsComposite="false" IsTextRelated="false" IsFixedLength="true" Length="1" PassByValue="true">
<dxl:EqualityOp Mdid="0.91.1.0"/>
<dxl:InequalityOp Mdid="0.85.1.0"/>
<dxl:LessThanOp Mdid="0.58.1.0"/>
<dxl:LessThanEqualsOp Mdid="0.1694.1.0"/>
<dxl:GreaterThanOp Mdid="0.59.1.0"/>
<dxl:GreaterThanEqualsOp Mdid="0.1695.1.0"/>
<dxl:ComparisonOp Mdid="0.1693.1.0"/>
<dxl:ArrayType Mdid="0.1000.1.0"/>
<dxl:MinAgg Mdid="0.0.0.0"/>
<dxl:MaxAgg Mdid="0.0.0.0"/>
<dxl:AvgAgg Mdid="0.0.0.0"/>
<dxl:SumAgg Mdid="0.0.0.0"/>
<dxl:CountAgg Mdid="0.2147.1.0"/>
</dxl:Type>
<dxl:Type Mdid="0.19.1.0" Name="name" IsRedistributable="true" IsHashable="true" IsMergeJoinable="true" IsComposite="false" IsTextRelated="true" IsFixedLength="true" Length="64" PassByValue="false">
<dxl:EqualityOp Mdid="0.93.1.0"/>
<dxl:InequalityOp Mdid="0.643.1.0"/>
<dxl:LessThanOp Mdid="0.660.1.0"/>
<dxl:LessThanEqualsOp Mdid="0.661.1.0"/>
<dxl:GreaterThanOp Mdid="0.662.1.0"/>
<dxl:GreaterThanEqualsOp Mdid="0.663.1.0"/>
<dxl:ComparisonOp Mdid="0.359.1.0"/>
<dxl:ArrayType Mdid="0.1003.1.0"/>
<dxl:MinAgg Mdid="0.0.0.0"/>
<dxl:MaxAgg Mdid="0.0.0.0"/>
<dxl:AvgAgg Mdid="0.0.0.0"/>
<dxl:SumAgg Mdid="0.0.0.0"/>
<dxl:CountAgg Mdid="0.2147.1.0"/>
</dxl:Type>
<dxl:Type Mdid="0.23.1.0" Name="int4" IsRedistributable="true" IsHashable="true" IsMergeJoinable="true" IsComposite="false" IsTextRelated="false" IsFixedLength="true" Length="4" PassByValue="true">
<dxl:EqualityOp Mdid="0.96.1.0"/>
<dxl:InequalityOp Mdid="0.518.1.0"/>
<dxl:LessThanOp Mdid="0.97.1.0"/>
<dxl:LessThanEqualsOp Mdid="0.523.1.0"/>
<dxl:GreaterThanOp Mdid="0.521.1.0"/>
<dxl:GreaterThanEqualsOp Mdid="0.525.1.0"/>
<dxl:ComparisonOp Mdid="0.351.1.0"/>
<dxl:ArrayType Mdid="0.1007.1.0"/>
<dxl:MinAgg Mdid="0.2132.1.0"/>
<dxl:MaxAgg Mdid="0.2116.1.0"/>
<dxl:AvgAgg Mdid="0.2101.1.0"/>
<dxl:SumAgg Mdid="0.2108.1.0"/>
<dxl:CountAgg Mdid="0.2147.1.0"/>
</dxl:Type>
<dxl:ColumnStatistics Mdid="1.16385.1.0.0" Name="a" Width="64.000000" NullFreq="0.000000" NdvRemain="1.000000" FreqRemain="0.001110" ColStatsMissing="false">
<dxl:StatsBucket Frequency="0.110988" DistinctValues="1.000000">
<dxl:LowerBound Closed="true" TypeMdid="0.19.1.0" Value="YWJjMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA&#10;AAAAAAAAAAAAAAAAAAAAAAAAAA==" LintValue="157510457"/>
<dxl:UpperBound Closed="true" TypeMdid="0.19.1.0" Value="YWJjMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA&#10;AAAAAAAAAAAAAAAAAAAAAAAAAA==" LintValue="157510457"/>
</dxl:StatsBucket>
<dxl:StatsBucket Frequency="0.110988" DistinctValues="1.000000">
<dxl:LowerBound Closed="true" TypeMdid="0.19.1.0" Value="YWJjOQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA&#10;AAAAAAAAAAAAAAAAAAAAAAAAAA==" LintValue="613025688"/>
<dxl:UpperBound Closed="true" TypeMdid="0.19.1.0" Value="YWJjOQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA&#10;AAAAAAAAAAAAAAAAAAAAAAAAAA==" LintValue="613025688"/>
</dxl:StatsBucket>
<dxl:StatsBucket Frequency="0.110988" DistinctValues="1.000000">
<dxl:LowerBound Closed="true" TypeMdid="0.19.1.0" Value="YWJjMQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA&#10;AAAAAAAAAAAAAAAAAAAAAAAAAA==" LintValue="1000379473"/>
<dxl:UpperBound Closed="true" TypeMdid="0.19.1.0" Value="YWJjMQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA&#10;AAAAAAAAAAAAAAAAAAAAAAAAAA==" LintValue="1000379473"/>
</dxl:StatsBucket>
<dxl:StatsBucket Frequency="0.110988" DistinctValues="1.000000">
<dxl:LowerBound Closed="true" TypeMdid="0.19.1.0" Value="YWJjMgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA&#10;AAAAAAAAAAAAAAAAAAAAAAAAAA==" LintValue="1462181016"/>
<dxl:UpperBound Closed="true" TypeMdid="0.19.1.0" Value="YWJjMgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA&#10;AAAAAAAAAAAAAAAAAAAAAAAAAA==" LintValue="1462181016"/>
</dxl:StatsBucket>
<dxl:StatsBucket Frequency="0.110988" DistinctValues="1.000000">
<dxl:LowerBound Closed="true" TypeMdid="0.19.1.0" Value="YWJjNgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA&#10;AAAAAAAAAAAAAAAAAAAAAAAAAA==" LintValue="3235253284"/>
<dxl:UpperBound Closed="true" TypeMdid="0.19.1.0" Value="YWJjNgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA&#10;AAAAAAAAAAAAAAAAAAAAAAAAAA==" LintValue="3235253284"/>
</dxl:StatsBucket>
<dxl:StatsBucket Frequency="0.110988" DistinctValues="1.000000">
<dxl:LowerBound Closed="true" TypeMdid="0.19.1.0" Value="YWJjNQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA&#10;AAAAAAAAAAAAAAAAAAAAAAAAAA==" LintValue="3255620275"/>
<dxl:UpperBound Closed="true" TypeMdid="0.19.1.0" Value="YWJjNQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA&#10;AAAAAAAAAAAAAAAAAAAAAAAAAA==" LintValue="3255620275"/>
</dxl:StatsBucket>
<dxl:StatsBucket Frequency="0.110988" DistinctValues="1.000000">
<dxl:LowerBound Closed="true" TypeMdid="0.19.1.0" Value="YWJjOAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA&#10;AAAAAAAAAAAAAAAAAAAAAAAAAA==" LintValue="3652651429"/>
<dxl:UpperBound Closed="true" TypeMdid="0.19.1.0" Value="YWJjOAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA&#10;AAAAAAAAAAAAAAAAAAAAAAAAAA==" LintValue="3652651429"/>
</dxl:StatsBucket>
<dxl:StatsBucket Frequency="0.110988" DistinctValues="1.000000">
<dxl:LowerBound Closed="true" TypeMdid="0.19.1.0" Value="YWJjNAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA&#10;AAAAAAAAAAAAAAAAAAAAAAAAAA==" LintValue="3773465112"/>
<dxl:UpperBound Closed="true" TypeMdid="0.19.1.0" Value="YWJjNAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA&#10;AAAAAAAAAAAAAAAAAAAAAAAAAA==" LintValue="3773465112"/>
</dxl:StatsBucket>
<dxl:StatsBucket Frequency="0.110988" DistinctValues="1.000000">
<dxl:LowerBound Closed="true" TypeMdid="0.19.1.0" Value="YWJjNwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA&#10;AAAAAAAAAAAAAAAAAAAAAAAAAA==" LintValue="4238082864"/>
<dxl:UpperBound Closed="true" TypeMdid="0.19.1.0" Value="YWJjNwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA&#10;AAAAAAAAAAAAAAAAAAAAAAAAAA==" LintValue="4238082864"/>
</dxl:StatsBucket>
</dxl:ColumnStatistics>
<dxl:Type Mdid="0.26.1.0" Name="oid" IsRedistributable="true" IsHashable="true" IsMergeJoinable="true" IsComposite="false" IsTextRelated="false" IsFixedLength="true" Length="4" PassByValue="true">
<dxl:EqualityOp Mdid="0.607.1.0"/>
<dxl:InequalityOp Mdid="0.608.1.0"/>
<dxl:LessThanOp Mdid="0.609.1.0"/>
<dxl:LessThanEqualsOp Mdid="0.611.1.0"/>
<dxl:GreaterThanOp Mdid="0.610.1.0"/>
<dxl:GreaterThanEqualsOp Mdid="0.612.1.0"/>
<dxl:ComparisonOp Mdid="0.356.1.0"/>
<dxl:ArrayType Mdid="0.1028.1.0"/>
<dxl:MinAgg Mdid="0.2118.1.0"/>
<dxl:MaxAgg Mdid="0.2134.1.0"/>
<dxl:AvgAgg Mdid="0.0.0.0"/>
<dxl:SumAgg Mdid="0.0.0.0"/>
<dxl:CountAgg Mdid="0.2147.1.0"/>
</dxl:Type>
<dxl:Type Mdid="0.27.1.0" Name="tid" IsRedistributable="true" IsHashable="true" IsMergeJoinable="true" IsComposite="false" IsTextRelated="false" IsFixedLength="true" Length="6" PassByValue="false">
<dxl:EqualityOp Mdid="0.387.1.0"/>
<dxl:InequalityOp Mdid="0.402.1.0"/>
<dxl:LessThanOp Mdid="0.2799.1.0"/>
<dxl:LessThanEqualsOp Mdid="0.2801.1.0"/>
<dxl:GreaterThanOp Mdid="0.2800.1.0"/>
<dxl:GreaterThanEqualsOp Mdid="0.2802.1.0"/>
<dxl:ComparisonOp Mdid="0.2794.1.0"/>
<dxl:ArrayType Mdid="0.1010.1.0"/>
<dxl:MinAgg Mdid="0.2798.1.0"/>
<dxl:MaxAgg Mdid="0.2797.1.0"/>
<dxl:AvgAgg Mdid="0.0.0.0"/>
<dxl:SumAgg Mdid="0.0.0.0"/>
<dxl:CountAgg Mdid="0.2147.1.0"/>
</dxl:Type>
<dxl:Type Mdid="0.29.1.0" Name="cid" IsRedistributable="true" IsHashable="true" IsMergeJoinable="false" IsComposite="false" IsTextRelated="false" IsFixedLength="true" Length="4" PassByValue="true">
<dxl:EqualityOp Mdid="0.385.1.0"/>
<dxl:InequalityOp Mdid="0.0.0.0"/>
<dxl:LessThanOp Mdid="0.0.0.0"/>
<dxl:LessThanEqualsOp Mdid="0.0.0.0"/>
<dxl:GreaterThanOp Mdid="0.0.0.0"/>
<dxl:GreaterThanEqualsOp Mdid="0.0.0.0"/>
<dxl:ComparisonOp Mdid="0.0.0.0"/>
<dxl:ArrayType Mdid="0.1012.1.0"/>
<dxl:MinAgg Mdid="0.0.0.0"/>
<dxl:MaxAgg Mdid="0.0.0.0"/>
<dxl:AvgAgg Mdid="0.0.0.0"/>
<dxl:SumAgg Mdid="0.0.0.0"/>
<dxl:CountAgg Mdid="0.2147.1.0"/>
</dxl:Type>
<dxl:Type Mdid="0.28.1.0" Name="xid" IsRedistributable="true" IsHashable="true" IsMergeJoinable="false" IsComposite="false" IsTextRelated="false" IsFixedLength="true" Length="4" PassByValue="true">
<dxl:EqualityOp Mdid="0.352.1.0"/>
<dxl:InequalityOp Mdid="0.3315.1.0"/>
<dxl:LessThanOp Mdid="0.0.0.0"/>
<dxl:LessThanEqualsOp Mdid="0.0.0.0"/>
<dxl:GreaterThanOp Mdid="0.0.0.0"/>
<dxl:GreaterThanEqualsOp Mdid="0.0.0.0"/>
<dxl:ComparisonOp Mdid="0.0.0.0"/>
<dxl:ArrayType Mdid="0.1011.1.0"/>
<dxl:MinAgg Mdid="0.0.0.0"/>
<dxl:MaxAgg Mdid="0.0.0.0"/>
<dxl:AvgAgg Mdid="0.0.0.0"/>
<dxl:SumAgg Mdid="0.0.0.0"/>
<dxl:CountAgg Mdid="0.2147.1.0"/>
</dxl:Type>
<dxl:GPDBScalarOp Mdid="0.93.1.0" Name="=" ComparisonType="Eq" ReturnsNullOnNullInput="true">
<dxl:LeftType Mdid="0.19.1.0"/>
<dxl:RightType Mdid="0.19.1.0"/>
<dxl:ResultType Mdid="0.16.1.0"/>
<dxl:OpFunc Mdid="0.62.1.0"/>
<dxl:Commutator Mdid="0.93.1.0"/>
<dxl:InverseOp Mdid="0.643.1.0"/>
<dxl:OpClasses>
<dxl:OpClass Mdid="0.1986.1.0"/>
<dxl:OpClass Mdid="0.1987.1.0"/>
<dxl:OpClass Mdid="0.4065.1.0"/>
<dxl:OpClass Mdid="0.7108.1.0"/>
<dxl:OpClass Mdid="0.12809.1.0"/>
</dxl:OpClasses>
</dxl:GPDBScalarOp>
</dxl:Metadata>
<dxl:Query>
<dxl:OutputColumns>
<dxl:Ident ColId="1" ColName="a" TypeMdid="0.19.1.0"/>
</dxl:OutputColumns>
<dxl:CTEList/>
<dxl:LogicalSelect>
<dxl:Comparison ComparisonOperator="=" OperatorMdid="0.93.1.0">
<dxl:Ident ColId="1" ColName="a" TypeMdid="0.19.1.0"/>
<dxl:ConstValue TypeMdid="0.19.1.0" Value="YWJjMwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA&#10;AAAAAAAAAAAAAAAAAAAAAAAAAA==" LintValue="2072455951"/>
</dxl:Comparison>
<dxl:LogicalGet>
<dxl:TableDescriptor Mdid="0.16385.1.0" TableName="t">
<dxl:Columns>
<dxl:Column ColId="1" Attno="1" ColName="a" TypeMdid="0.19.1.0" ColWidth="64"/>
<dxl:Column ColId="2" Attno="-1" ColName="ctid" TypeMdid="0.27.1.0" ColWidth="6"/>
<dxl:Column ColId="3" Attno="-3" ColName="xmin" TypeMdid="0.28.1.0" ColWidth="4"/>
<dxl:Column ColId="4" Attno="-4" ColName="cmin" TypeMdid="0.29.1.0" ColWidth="4"/>
<dxl:Column ColId="5" Attno="-5" ColName="xmax" TypeMdid="0.28.1.0" ColWidth="4"/>
<dxl:Column ColId="6" Attno="-6" ColName="cmax" TypeMdid="0.29.1.0" ColWidth="4"/>
<dxl:Column ColId="7" Attno="-7" ColName="tableoid" TypeMdid="0.26.1.0" ColWidth="4"/>
<dxl:Column ColId="8" Attno="-8" ColName="gp_segment_id" TypeMdid="0.23.1.0" ColWidth="4"/>
</dxl:Columns>
</dxl:TableDescriptor>
</dxl:LogicalGet>
</dxl:LogicalSelect>
</dxl:Query>
<dxl:Plan Id="0" SpaceSize="1">
<dxl:GatherMotion InputSegments="0,1,2" OutputSegments="-1">
<dxl:Properties>
<dxl:Cost StartupCost="0" TotalCost="431.025686" Rows="1.000110" Width="64"/>
</dxl:Properties>
<dxl:ProjList>
<dxl:ProjElem ColId="0" Alias="a">
<dxl:Ident ColId="0" ColName="a" TypeMdid="0.19.1.0"/>
</dxl:ProjElem>
</dxl:ProjList>
<dxl:Filter/>
<dxl:SortingColumnList/>
<dxl:TableScan>
<dxl:Properties>
<dxl:Cost StartupCost="0" TotalCost="431.025448" Rows="1.000110" Width="64"/>
</dxl:Properties>
<dxl:ProjList>
<dxl:ProjElem ColId="0" Alias="a">
<dxl:Ident ColId="0" ColName="a" TypeMdid="0.19.1.0"/>
</dxl:ProjElem>
</dxl:ProjList>
<dxl:Filter>
<dxl:Comparison ComparisonOperator="=" OperatorMdid="0.93.1.0">
<dxl:Ident ColId="0" ColName="a" TypeMdid="0.19.1.0"/>
<dxl:ConstValue TypeMdid="0.19.1.0" Value="YWJjMwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA&#10;AAAAAAAAAAAAAAAAAAAAAAAAAA==" LintValue="2072455951"/>
</dxl:Comparison>
</dxl:Filter>
<dxl:TableDescriptor Mdid="0.16385.1.0" TableName="t">
<dxl:Columns>
<dxl:Column ColId="0" Attno="1" ColName="a" TypeMdid="0.19.1.0" ColWidth="64"/>
<dxl:Column ColId="1" Attno="-1" ColName="ctid" TypeMdid="0.27.1.0" ColWidth="6"/>
<dxl:Column ColId="2" Attno="-3" ColName="xmin" TypeMdid="0.28.1.0" ColWidth="4"/>
<dxl:Column ColId="3" Attno="-4" ColName="cmin" TypeMdid="0.29.1.0" ColWidth="4"/>
<dxl:Column ColId="4" Attno="-5" ColName="xmax" TypeMdid="0.28.1.0" ColWidth="4"/>
<dxl:Column ColId="5" Attno="-6" ColName="cmax" TypeMdid="0.29.1.0" ColWidth="4"/>
<dxl:Column ColId="6" Attno="-7" ColName="tableoid" TypeMdid="0.26.1.0" ColWidth="4"/>
<dxl:Column ColId="7" Attno="-8" ColName="gp_segment_id" TypeMdid="0.23.1.0" ColWidth="4"/>
</dxl:Columns>
</dxl:TableDescriptor>
</dxl:TableScan>
</dxl:GatherMotion>
</dxl:Plan>
</dxl:Thread>
</dxl:DXLMessage>
......@@ -46,6 +46,8 @@ typedef ULONG OID;
#define GPDB_CHAR OID(1042)
#define GPDB_VARCHAR OID(1043)
#define GPDB_TEXT OID(25)
#define GPDB_NAME OID(19)
#define GPDB_SINGLE_CHAR OID(18)
// network related types
#define GPDB_INET OID(869)
......
......@@ -108,6 +108,9 @@ namespace gpdxl
// is type composite
BOOL m_is_composite;
// is type text related
BOOL m_is_text_related;
// id of the relation corresponding to a composite type
IMDId *m_mdid_base_rel;
......
......@@ -524,6 +524,7 @@ namespace gpdxl
EdxltokenMDTypeHashable,
EdxltokenMDTypeMergeJoinable,
EdxltokenMDTypeComposite,
EdxltokenMDTypeIsTextRelated,
EdxltokenMDTypeFixedLength,
EdxltokenMDTypeLength,
EdxltokenMDTypeByValue,
......
......@@ -48,6 +48,8 @@ namespace gpmd
mdtype->GetBaseRelMdid()->Serialize(xml_serializer, CDXLTokens::GetDXLTokenStr(EdxltokenMDTypeRelid));
}
xml_serializer->AddAttribute(CDXLTokens::GetDXLTokenStr(EdxltokenMDTypeIsTextRelated), mdtype->IsTextRelated());
xml_serializer->AddAttribute(CDXLTokens::GetDXLTokenStr(EdxltokenMDTypeFixedLength), mdtype->IsFixedLength());
xml_serializer->AddAttribute(CDXLTokens::GetDXLTokenStr(EdxltokenMDTypeLength), mdtype->GetGPDBLength());
......
......@@ -213,6 +213,10 @@ namespace gpmd
static
CMDIdGPDB m_mdid_time_interval;
// char mdid
static
CMDIdGPDB m_mdid_char;
// bpchar mdid
static
CMDIdGPDB m_mdid_bpchar;
......@@ -225,6 +229,10 @@ namespace gpmd
static
CMDIdGPDB m_mdid_text;
// name mdid
static
CMDIdGPDB m_mdid_name;
// float4 mdid
static
CMDIdGPDB m_mdid_float4;
......
......@@ -119,6 +119,9 @@ namespace gpmd
// is type composite
BOOL m_is_composite_type;
// is type text related
BOOL m_is_text_related;
// id of the relation corresponding to a composite type
IMDId *m_mdid_base_relation;
......@@ -160,6 +163,7 @@ namespace gpmd
BOOL is_hashable,
BOOL is_merge_joinable,
BOOL is_composite_type,
BOOL is_text_related,
IMDId *mdid_base_relation,
IMDId *mdid_type_array,
INT gpdb_length
......@@ -234,6 +238,12 @@ namespace gpmd
return m_is_hashable;
}
virtual
BOOL IsTextRelated() const
{
return m_is_text_related;
}
// is type merge joinable on '='
virtual
BOOL IsMergeJoinable() const
......@@ -308,6 +318,7 @@ namespace gpmd
(
CMemoryPool *mp,
IMDId *mdid,
const IMDType *md_type,
INT type_modifier,
BOOL is_null,
BYTE *byte_array,
......@@ -346,7 +357,7 @@ namespace gpmd
// does a datum of this type need bytea to Lint mapping for statistics computation
static
BOOL HasByte2IntMapping(const IMDId *mdid);
BOOL HasByte2IntMapping(const IMDType *mdtype);
// does a datum of this type need bytea to double mapping for statistics computation
static
......
......@@ -142,6 +142,12 @@ namespace gpmd
virtual
BOOL IsComposite() const = 0;
virtual
BOOL IsTextRelated() const
{
return false;
}
// id of the relation corresponding to a composite type
virtual
IMDId *GetBaseRelMdid() const = 0;
......
......@@ -208,10 +208,6 @@ namespace gpopt
static
BOOL IsUnsupportedPredOnDefinedCol(CStatsPred *pred_stats);
// is the type varchar, bpchar or text
static
BOOL IsTextRelatedType(const IMDId *mdid);
}; // class CStatsPredUtils
}
......
......@@ -20,6 +20,9 @@
#include "naucrates/statistics/CScaleFactorUtils.h"
#include "gpopt/mdcache/CMDAccessor.h"
#include "gpopt/base/COptCtxt.h"
using namespace gpnaucrates;
using namespace gpmd;
......@@ -319,7 +322,9 @@ CDatumGenericGPDB::IsDatumMappableToDouble() const
BOOL
CDatumGenericGPDB::IsDatumMappableToLINT() const
{
return CMDTypeGenericGPDB::HasByte2IntMapping(this->MDId());
CMDAccessor *md_accessor = COptCtxt::PoctxtFromTLS()->Pmda();
const IMDType *type = md_accessor->RetrieveType(MDId());
return CMDTypeGenericGPDB::HasByte2IntMapping(type);
}
......
......@@ -65,6 +65,9 @@ CMDIdGPDB CMDIdGPDB::m_mdid_interval(GPDB_INTERVAL);
// time interval mdid
CMDIdGPDB CMDIdGPDB::m_mdid_time_interval(GPDB_TIMEINTERVAL);
// char mdid
CMDIdGPDB CMDIdGPDB::m_mdid_char(GPDB_SINGLE_CHAR);
// bpchar mdid
CMDIdGPDB CMDIdGPDB::m_mdid_bpchar(GPDB_CHAR);
......@@ -74,6 +77,9 @@ CMDIdGPDB CMDIdGPDB::m_mdid_varchar(GPDB_VARCHAR);
// text mdid
CMDIdGPDB CMDIdGPDB::m_mdid_text(GPDB_TEXT);
// text mdid
CMDIdGPDB CMDIdGPDB::m_mdid_name(GPDB_NAME);
// float4 mdid
CMDIdGPDB CMDIdGPDB::m_mdid_float4(GPDB_FLOAT4);
......
......@@ -9,12 +9,15 @@
// Implementation of the class for representing GPDB generic types
//---------------------------------------------------------------------------
#include "gpopt/base/COptCtxt.h"
#include "gpos/string/CWStringDynamic.h"
#include "naucrates/md/CMDTypeGenericGPDB.h"
#include "naucrates/md/CGPDBTypeHelper.h"
#include "naucrates/base/CDatumGenericGPDB.h"
#include "naucrates/statistics/CStatsPredUtils.h"
#include "naucrates/dxl/operators/CDXLScalarConstValue.h"
#include "naucrates/dxl/operators/CDXLDatumStatsDoubleMappable.h"
......@@ -68,6 +71,7 @@ CMDTypeGenericGPDB::CMDTypeGenericGPDB
BOOL is_hashable,
BOOL is_merge_joinable,
BOOL is_composite_type,
BOOL is_text_related,
IMDId *mdid_base_relation,
IMDId *mdid_type_array,
INT gpdb_length
......@@ -95,6 +99,7 @@ CMDTypeGenericGPDB::CMDTypeGenericGPDB
m_is_hashable(is_hashable),
m_is_merge_joinable(is_merge_joinable),
m_is_composite_type(is_composite_type),
m_is_text_related(is_text_related),
m_mdid_base_relation(mdid_base_relation),
m_mdid_type_array(mdid_type_array),
m_gpdb_length(gpdb_length),
......@@ -369,7 +374,9 @@ CMDTypeGenericGPDB::GetDatumVal
dValue = datum_generic->GetDoubleMapping();
}
return CreateDXLDatumVal(mp, m_mdid, datum_generic->TypeModifier(), datum_generic->IsNull(), pba, length, lValue, dValue);
CMDAccessor *md_accessor = COptCtxt::PoctxtFromTLS()->Pmda();
const IMDType *md_type = md_accessor->RetrieveType(m_mdid);
return CreateDXLDatumVal(mp, m_mdid, md_type, datum_generic->TypeModifier(), datum_generic->IsNull(), pba, length, lValue, dValue);
}
//---------------------------------------------------------------------------
......@@ -404,6 +411,7 @@ CMDTypeGenericGPDB::CreateDXLDatumVal
(
CMemoryPool *mp,
IMDId *mdid,
const IMDType *md_type,
INT type_modifier,
BOOL is_null,
BYTE *pba,
......@@ -414,40 +422,17 @@ CMDTypeGenericGPDB::CreateDXLDatumVal
{
GPOS_ASSERT(IMDId::EmdidGPDB == mdid->MdidType());
const CMDIdGPDB * const pmdidGPDB = CMDIdGPDB::CastMdid(mdid);
switch (pmdidGPDB->Oid())
if (HasByte2DoubleMapping(mdid))
{
// numbers
case GPDB_NUMERIC:
case GPDB_FLOAT4:
case GPDB_FLOAT8:
return CMDTypeGenericGPDB::CreateDXLDatumStatsDoubleMappable(mp, mdid, type_modifier, is_null, pba, length, lValue, dValue);
// has lint mapping
case GPDB_CHAR:
case GPDB_VARCHAR:
case GPDB_TEXT:
case GPDB_CASH:
case GPDB_UUID:
return CMDTypeGenericGPDB::CreateDXLDatumStatsIntMappable(mp, mdid, type_modifier, is_null, pba, length, lValue, dValue);
// time-related types
case GPDB_DATE:
case GPDB_TIME:
case GPDB_TIMETZ:
case GPDB_TIMESTAMP:
case GPDB_TIMESTAMPTZ:
case GPDB_ABSTIME:
case GPDB_RELTIME:
case GPDB_INTERVAL:
case GPDB_TIMEINTERVAL:
return CMDTypeGenericGPDB::CreateDXLDatumStatsDoubleMappable(mp, mdid, type_modifier, is_null, pba, length, lValue, dValue);
// network-related types
case GPDB_INET:
case GPDB_CIDR:
case GPDB_MACADDR:
return CMDTypeGenericGPDB::CreateDXLDatumStatsDoubleMappable(mp, mdid, type_modifier, is_null, pba, length, lValue, dValue);
default:
return GPOS_NEW(mp) CDXLDatumGeneric(mp, mdid, type_modifier, is_null, pba, length);
return CMDTypeGenericGPDB::CreateDXLDatumStatsDoubleMappable(mp, mdid, type_modifier, is_null, pba, length, lValue, dValue);
}
if (HasByte2IntMapping(md_type))
{
return CMDTypeGenericGPDB::CreateDXLDatumStatsIntMappable(mp, mdid, type_modifier, is_null, pba, length, lValue, dValue);
}
return GPOS_NEW(mp) CDXLDatumGeneric(mp, mdid, type_modifier, is_null, pba, length);
}
......@@ -498,7 +483,6 @@ CMDTypeGenericGPDB::CreateDXLDatumStatsIntMappable
CDouble // double_value
)
{
GPOS_ASSERT(CMDTypeGenericGPDB::HasByte2IntMapping(mdid));
return GPOS_NEW(mp) CDXLDatumStatsLintMappable(mp, mdid, type_modifier, is_null, byte_array, length, lint_value);
}
......@@ -539,8 +523,9 @@ CMDTypeGenericGPDB::GetDXLDatumNull
const
{
m_mdid->AddRef();
return CreateDXLDatumVal(mp, m_mdid, default_type_modifier, true /*fConstNull*/, NULL /*byte_array*/, 0 /*length*/, 0 /*lint_value */, 0 /*double_value */);
CMDAccessor *md_accessor = COptCtxt::PoctxtFromTLS()->Pmda();
const IMDType *md_type = md_accessor->RetrieveType(m_mdid);
return CreateDXLDatumVal(mp, m_mdid, md_type, default_type_modifier, true /*fConstNull*/, NULL /*byte_array*/, 0 /*length*/, 0 /*lint_value */, 0 /*double_value */);
}
//---------------------------------------------------------------------------
......@@ -554,12 +539,11 @@ CMDTypeGenericGPDB::GetDXLDatumNull
BOOL
CMDTypeGenericGPDB::HasByte2IntMapping
(
const IMDId *mdid
const IMDType *mdtype
)
{
return mdid->Equals(&CMDIdGPDB::m_mdid_bpchar)
|| mdid->Equals(&CMDIdGPDB::m_mdid_varchar)
|| mdid->Equals(&CMDIdGPDB::m_mdid_text)
IMDId *mdid = mdtype->MDId();
return mdtype->IsTextRelated()
|| mdid->Equals(&CMDIdGPDB::m_mdid_uuid)
|| mdid->Equals(&CMDIdGPDB::m_mdid_cash);
}
......
......@@ -2946,6 +2946,11 @@ CDXLOperatorFactory::GetDatumVal
if (NULL == func)
{
const XMLCh *attr_val_xml = attrs.getValue(CDXLTokens::XmlstrToken(EdxltokenLintValue));
if (attr_val_xml)
{
return GetDatumStatsLintMappable(dxl_memory_manager, attrs, target_elem, mdid, is_const_null);
}
// generate a datum of generic type
return GetDatumGeneric(dxl_memory_manager, attrs, target_elem, mdid, is_const_null);
}
......
......@@ -61,6 +61,7 @@ CParseHandlerMDType::CParseHandlerMDType
m_mdid_count_op(NULL),
m_is_hashable(false),
m_is_composite(false),
m_is_text_related(false),
m_mdid_base_rel(NULL),
m_mdid_array_type(NULL)
{
......@@ -212,6 +213,22 @@ CParseHandlerMDType::StartElement
EdxltokenMDType
);
const XMLCh *xml_is_text_related = attrs.getValue(CDXLTokens::XmlstrToken(EdxltokenMDTypeIsTextRelated));
if (NULL == xml_is_text_related)
{
m_is_text_related = false;
}
else
{
m_is_text_related = CDXLOperatorFactory::ConvertAttrValueToBool
(
m_parse_handler_mgr->GetDXLMemoryManager(),
xml_is_text_related,
EdxltokenMDTypeIsTextRelated,
EdxltokenMDType
);
}
// get type length
m_type_length = CDXLOperatorFactory::ExtractConvertAttrValueToInt
(
......@@ -462,6 +479,7 @@ CParseHandlerMDType::EndElement
m_is_hashable,
m_is_merge_joinable,
m_is_composite,
m_is_text_related,
m_mdid_base_rel,
m_mdid_array_type,
m_type_length
......
......@@ -2216,7 +2216,10 @@ const
if (m_histogram_buckets->Size() > 0)
{
IMDId *mdid = (*m_histogram_buckets)[0]->GetLowerBound()->GetDatum()->MDId();
return CStatsPredUtils::IsTextRelatedType(mdid);
CMDAccessor *md_accessor = COptCtxt::PoctxtFromTLS()->Pmda();
const IMDType *type = md_accessor->RetrieveType(mdid);
return type->IsTextRelated();
}
return false;
}
......
......@@ -234,16 +234,6 @@ CStatsPredUtils::GetStatsCmpType
}
BOOL
CStatsPredUtils::IsTextRelatedType(const IMDId *mdid)
{
return mdid->Equals(&CMDIdGPDB::m_mdid_varchar)
|| mdid->Equals(&CMDIdGPDB::m_mdid_bpchar)
|| mdid->Equals(&CMDIdGPDB::m_mdid_text)
|| mdid->Equals(&CMDIdGPDB::m_mdid_uuid);
}
//---------------------------------------------------------------------------
// @function:
// CStatsPredUtils::GetPredStats
......@@ -303,10 +293,12 @@ CStatsPredUtils::GetPredStats
CScalarConst *scalar_const_op = CScalarConst::PopExtractFromConstOrCastConst(expr_right);
GPOS_ASSERT(NULL != scalar_const_op);
CMDAccessor *md_accessor = COptCtxt::PoctxtFromTLS()->Pmda();
IDatum *datum = scalar_const_op->GetDatum();
const IMDType *datum_type = md_accessor->RetrieveType(datum->MDId());
BOOL is_text_related_type = IsTextRelatedType(datum->MDId()) && IsTextRelatedType(col_ref->RetrieveType()->MDId());
if (is_text_related_type && !CHistogram::IsOpSupportedForTextFilter(stats_cmp_type))
BOOL is_text_related = datum_type->IsTextRelated() && col_ref->RetrieveType()->IsTextRelated();
if (is_text_related && !CHistogram::IsOpSupportedForTextFilter(stats_cmp_type))
{
return GPOS_NEW(mp) CStatsPredUnsupported(col_ref->Id(), stats_cmp_type);
}
......
......@@ -564,6 +564,7 @@ CDXLTokens::Init
{EdxltokenMDTypeHashable, GPOS_WSZ_LIT("IsHashable")},
{EdxltokenMDTypeMergeJoinable, GPOS_WSZ_LIT("IsMergeJoinable")},
{EdxltokenMDTypeComposite, GPOS_WSZ_LIT("IsComposite")},
{EdxltokenMDTypeIsTextRelated, GPOS_WSZ_LIT("IsTextRelated")},
{EdxltokenMDTypeFixedLength, GPOS_WSZ_LIT("IsFixedLength")},
{EdxltokenMDTypeLength, GPOS_WSZ_LIT("Length")},
{EdxltokenMDTypeByValue, GPOS_WSZ_LIT("PassByValue")},
......
......@@ -291,7 +291,7 @@ CDqaTest:
DqaHavingMax DqaMax DqaMin DqaSubqueryMax DqaNoRedistribute;
CMCVCardinalityTest:
BpCharMCVCardinalityEquals BpCharMCVCardinalityGreaterThan TextMCVCardinalityEquals TextMCVCardinalityGreaterThan VarcharMCVCardinalityEquals VarcharMCVCardinalityGreaterThan
BpCharMCVCardinalityEquals BpCharMCVCardinalityGreaterThan TextMCVCardinalityEquals TextMCVCardinalityGreaterThan VarcharMCVCardinalityEquals VarcharMCVCardinalityGreaterThan Citext-Cardinality Name-Cardinality Char-Cardinality
")
set(mdp_dir "../data/dxl/minidump/")
......
......@@ -4286,7 +4286,7 @@ CTestUtils::CreateGenericDatum
{
dxl_datum = GPOS_NEW(mp) CDXLDatumStatsDoubleMappable(mp, mdid_type, default_type_modifier, false /*is_const_null*/, data, ulbaSize, CDouble(value));
}
else if (CStatsPredUtils::IsTextRelatedType(mdid_type))
else if (pmdtype->IsTextRelated())
{
dxl_datum = GPOS_NEW(mp) CDXLDatumStatsLintMappable(mp, mdid_type, default_type_modifier, false /*is_const_null*/, data, ulbaSize, value);
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册