提交 e04ae39d 编写于 作者: V Venkatesh Raghavan

Convert Non-correlated EXISTS subquery to a LIMIT 1 AND a JOIN

Enable GPORCA to generate better plans for non-correlated exists subquery in the WHERE clause

Consider the following exists subquery, `(select * from bar)`. GPORCA generates an elaborate count based implementation of this subquery. If bar is a fact table, the count is going to be expensive.

```
vraghavan=# explain select * from foo where foo.a = foo.b and exists (select * from bar);
                                                    QUERY PLAN
------------------------------------------------------------------------------------------------------------------
 Gather Motion 3:1  (slice3; segments: 3)  (cost=0.00..1368262.79 rows=400324 width=8)
   ->  Nested Loop  (cost=0.00..1368250.86 rows=133442 width=8)
         Join Filter: true
         ->  Table Scan on foo  (cost=0.00..461.91 rows=133442 width=8)
               Filter: a = b
         ->  Materialize  (cost=0.00..438.57 rows=1 width=1)
               ->  Broadcast Motion 1:3  (slice2)  (cost=0.00..438.57 rows=3 width=1)
                     ->  Result  (cost=0.00..438.57 rows=1 width=1)
                           Filter: (count((count()))) > 0::bigint
                           ->  Aggregate  (cost=0.00..438.57 rows=1 width=8)
                                 ->  Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..438.57 rows=1 width=8)
                                       ->  Aggregate  (cost=0.00..438.57 rows=1 width=8)
                                             ->  Table Scan on bar  (cost=0.00..437.95 rows=332395 width=1)
 Optimizer status: PQO version 2.35.1
(14 rows)
```
Planner on the other hand uses LIMIT as shown in the INIT plan.

```
vraghavan=# explain select * from foo where foo.a = foo.b and exists (select * from bar);
                                           QUERY PLAN
------------------------------------------------------------------------------------------------
 Gather Motion 3:1  (slice2; segments: 3)  (cost=0.03..13611.14 rows=1001 width=8)
   ->  Result  (cost=0.03..13611.14 rows=334 width=8)
         One-Time Filter: $0
         InitPlan  (slice3)
           ->  Limit  (cost=0.00..0.03 rows=1 width=0)
                 ->  Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..0.03 rows=1 width=0)
                       ->  Limit  (cost=0.00..0.01 rows=1 width=0)
                             ->  Seq Scan on bar  (cost=0.00..11072.84 rows=332395 width=0)
         ->  Seq Scan on foo  (cost=0.00..13611.11 rows=334 width=8)
               Filter: a = b
 Settings:  optimizer=off
 Optimizer status: legacy query optimizer
(12 rows)
```

While GPORCA doesnot support init-plan, we can nevertheless generate a better plan by using LIMIT instead of count. After this PR, GPORCA will generate the following plan with LIMIT clause.

```
vraghavan=# explain select * from foo where foo.a = foo.b and exists (select * from bar);
                                                 QUERY PLAN
------------------------------------------------------------------------------------------------------------
 Gather Motion 3:1  (slice3; segments: 3)  (cost=0.00..1368262.73 rows=400324 width=8)
   ->  Nested Loop EXISTS Join  (cost=0.00..1368250.80 rows=133442 width=8)
         Join Filter: true
         ->  Table Scan on foo  (cost=0.00..461.91 rows=133442 width=8)
               Filter: a = b
         ->  Materialize  (cost=0.00..438.57 rows=1 width=1)
               ->  Broadcast Motion 1:3  (slice2)  (cost=0.00..438.57 rows=3 width=1)
                     ->  Limit  (cost=0.00..438.57 rows=1 width=1)
                           ->  Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..438.57 rows=1 width=1)
                                 ->  Limit  (cost=0.00..438.57 rows=1 width=1)
                                       ->  Table Scan on bar  (cost=0.00..437.95 rows=332395 width=1)
 Optimizer status: PQO version 2.35.1
(12 rows)
```
上级 4ad9ce70
......@@ -5,7 +5,7 @@ project(gpopt LANGUAGES CXX C)
set(GPORCA_VERSION_MAJOR 2)
set(GPORCA_VERSION_MINOR 35)
set(GPORCA_VERSION_PATCH 2)
set(GPORCA_VERSION_PATCH 3)
set(GPORCA_VERSION_STRING "${GPORCA_VERSION_MAJOR}.${GPORCA_VERSION_MINOR}.${GPORCA_VERSION_PATCH}")
# Whenever an ABI-breaking change is made to GPORCA, this should be incremented.
......
此差异已折叠。
......@@ -427,10 +427,10 @@
</dxl:LogicalGet>
</dxl:LogicalSelect>
</dxl:Query>
<dxl:Plan Id="0" SpaceSize="279471">
<dxl:Plan Id="0" SpaceSize="244760">
<dxl:GatherMotion InputSegments="0,1" OutputSegments="-1">
<dxl:Properties>
<dxl:Cost StartupCost="0" TotalCost="16.184570" Rows="1.000000" Width="8"/>
<dxl:Cost StartupCost="0" TotalCost="18.187988" Rows="1.000000" Width="8"/>
</dxl:Properties>
<dxl:ProjList>
<dxl:ProjElem ColId="0" Alias="i">
......@@ -444,7 +444,7 @@
<dxl:SortingColumnList/>
<dxl:NestedLoopJoin JoinType="In" IndexNestedLoopJoin="false">
<dxl:Properties>
<dxl:Cost StartupCost="0" TotalCost="15.180664" Rows="1.000000" Width="8"/>
<dxl:Cost StartupCost="0" TotalCost="17.184082" Rows="1.000000" Width="8"/>
</dxl:Properties>
<dxl:ProjList>
<dxl:ProjElem ColId="0" Alias="i">
......@@ -487,150 +487,170 @@
</dxl:TableScan>
<dxl:Materialize Eager="true">
<dxl:Properties>
<dxl:Cost StartupCost="0" TotalCost="6.176758" Rows="2.000000" Width="1"/>
<dxl:Cost StartupCost="0" TotalCost="8.180176" Rows="2.000000" Width="1"/>
</dxl:Properties>
<dxl:ProjList/>
<dxl:Filter/>
<dxl:BroadcastMotion InputSegments="0,1" OutputSegments="0,1">
<dxl:BroadcastMotion InputSegments="-1" OutputSegments="0,1">
<dxl:Properties>
<dxl:Cost StartupCost="0" TotalCost="5.174805" Rows="2.000000" Width="1"/>
<dxl:Cost StartupCost="0" TotalCost="7.178223" Rows="2.000000" Width="1"/>
</dxl:Properties>
<dxl:ProjList/>
<dxl:Filter/>
<dxl:SortingColumnList/>
<dxl:HashJoin JoinType="Inner">
<dxl:Limit>
<dxl:Properties>
<dxl:Cost StartupCost="0" TotalCost="4.173828" Rows="1.000000" Width="1"/>
<dxl:Cost StartupCost="0" TotalCost="6.176270" Rows="1.000000" Width="1"/>
</dxl:Properties>
<dxl:ProjList/>
<dxl:Filter/>
<dxl:JoinFilter/>
<dxl:HashCondList>
<dxl:Comparison ComparisonOperator="=" OperatorMdid="0.96.1.0">
<dxl:Ident ColId="10" ColName="j" TypeMdid="0.23.1.0"/>
<dxl:Ident ColId="19" ColName="j" TypeMdid="0.23.1.0"/>
</dxl:Comparison>
</dxl:HashCondList>
<dxl:TableScan>
<dxl:GatherMotion InputSegments="0,1" OutputSegments="-1">
<dxl:Properties>
<dxl:Cost StartupCost="0" TotalCost="0.001953" Rows="1.000000" Width="4"/>
<dxl:Cost StartupCost="0" TotalCost="5.174316" Rows="1.000000" Width="1"/>
</dxl:Properties>
<dxl:ProjList>
<dxl:ProjElem ColId="10" Alias="j">
<dxl:Ident ColId="10" ColName="j" TypeMdid="0.23.1.0"/>
</dxl:ProjElem>
</dxl:ProjList>
<dxl:Filter/>
<dxl:TableDescriptor Mdid="0.1159322.1.1" TableName="c">
<dxl:Columns>
<dxl:Column ColId="9" Attno="1" ColName="i" TypeMdid="0.23.1.0"/>
<dxl:Column ColId="10" Attno="2" ColName="j" TypeMdid="0.23.1.0"/>
<dxl:Column ColId="11" Attno="-1" ColName="ctid" TypeMdid="0.27.1.0"/>
<dxl:Column ColId="12" Attno="-3" ColName="xmin" TypeMdid="0.28.1.0"/>
<dxl:Column ColId="13" Attno="-4" ColName="cmin" TypeMdid="0.29.1.0"/>
<dxl:Column ColId="14" Attno="-5" ColName="xmax" TypeMdid="0.28.1.0"/>
<dxl:Column ColId="15" Attno="-6" ColName="cmax" TypeMdid="0.29.1.0"/>
<dxl:Column ColId="16" Attno="-7" ColName="tableoid" TypeMdid="0.26.1.0"/>
<dxl:Column ColId="17" Attno="-8" ColName="gp_segment_id" TypeMdid="0.23.1.0"/>
</dxl:Columns>
</dxl:TableDescriptor>
</dxl:TableScan>
<dxl:BroadcastMotion InputSegments="0,1" OutputSegments="0,1">
<dxl:Properties>
<dxl:Cost StartupCost="0" TotalCost="3.128906" Rows="2.000000" Width="4"/>
</dxl:Properties>
<dxl:ProjList>
<dxl:ProjElem ColId="19" Alias="j">
<dxl:Ident ColId="19" ColName="j" TypeMdid="0.23.1.0"/>
</dxl:ProjElem>
</dxl:ProjList>
<dxl:ProjList/>
<dxl:Filter/>
<dxl:SortingColumnList/>
<dxl:HashJoin JoinType="In">
<dxl:HashJoin JoinType="Inner">
<dxl:Properties>
<dxl:Cost StartupCost="0" TotalCost="2.125000" Rows="1.000000" Width="4"/>
<dxl:Cost StartupCost="0" TotalCost="4.173828" Rows="1.000000" Width="1"/>
</dxl:Properties>
<dxl:ProjList>
<dxl:ProjElem ColId="19" Alias="j">
<dxl:Ident ColId="19" ColName="j" TypeMdid="0.23.1.0"/>
</dxl:ProjElem>
</dxl:ProjList>
<dxl:ProjList/>
<dxl:Filter/>
<dxl:JoinFilter/>
<dxl:HashCondList>
<dxl:Comparison ComparisonOperator="=" OperatorMdid="0.96.1.0">
<dxl:Ident ColId="10" ColName="j" TypeMdid="0.23.1.0"/>
<dxl:Ident ColId="19" ColName="j" TypeMdid="0.23.1.0"/>
<dxl:Ident ColId="28" ColName="j" TypeMdid="0.23.1.0"/>
</dxl:Comparison>
<dxl:Comparison ComparisonOperator="=" OperatorMdid="0.96.1.0">
<dxl:Ident ColId="18" ColName="i" TypeMdid="0.23.1.0"/>
<dxl:Ident ColId="27" ColName="i" TypeMdid="0.23.1.0"/>
</dxl:Comparison>
</dxl:HashCondList>
<dxl:TableScan>
<dxl:Properties>
<dxl:Cost StartupCost="0" TotalCost="0.007812" Rows="1.000000" Width="8"/>
<dxl:Cost StartupCost="0" TotalCost="0.001953" Rows="1.000000" Width="4"/>
</dxl:Properties>
<dxl:ProjList>
<dxl:ProjElem ColId="18" Alias="i">
<dxl:Ident ColId="18" ColName="i" TypeMdid="0.23.1.0"/>
</dxl:ProjElem>
<dxl:ProjElem ColId="19" Alias="j">
<dxl:Ident ColId="19" ColName="j" TypeMdid="0.23.1.0"/>
<dxl:ProjElem ColId="10" Alias="j">
<dxl:Ident ColId="10" ColName="j" TypeMdid="0.23.1.0"/>
</dxl:ProjElem>
</dxl:ProjList>
<dxl:Filter/>
<dxl:TableDescriptor Mdid="0.1159276.1.1" TableName="a">
<dxl:TableDescriptor Mdid="0.1159322.1.1" TableName="c">
<dxl:Columns>
<dxl:Column ColId="18" Attno="1" ColName="i" TypeMdid="0.23.1.0"/>
<dxl:Column ColId="19" Attno="2" ColName="j" TypeMdid="0.23.1.0"/>
<dxl:Column ColId="20" Attno="-1" ColName="ctid" TypeMdid="0.27.1.0"/>
<dxl:Column ColId="21" Attno="-3" ColName="xmin" TypeMdid="0.28.1.0"/>
<dxl:Column ColId="22" Attno="-4" ColName="cmin" TypeMdid="0.29.1.0"/>
<dxl:Column ColId="23" Attno="-5" ColName="xmax" TypeMdid="0.28.1.0"/>
<dxl:Column ColId="24" Attno="-6" ColName="cmax" TypeMdid="0.29.1.0"/>
<dxl:Column ColId="25" Attno="-7" ColName="tableoid" TypeMdid="0.26.1.0"/>
<dxl:Column ColId="26" Attno="-8" ColName="gp_segment_id" TypeMdid="0.23.1.0"/>
<dxl:Column ColId="9" Attno="1" ColName="i" TypeMdid="0.23.1.0"/>
<dxl:Column ColId="10" Attno="2" ColName="j" TypeMdid="0.23.1.0"/>
<dxl:Column ColId="11" Attno="-1" ColName="ctid" TypeMdid="0.27.1.0"/>
<dxl:Column ColId="12" Attno="-3" ColName="xmin" TypeMdid="0.28.1.0"/>
<dxl:Column ColId="13" Attno="-4" ColName="cmin" TypeMdid="0.29.1.0"/>
<dxl:Column ColId="14" Attno="-5" ColName="xmax" TypeMdid="0.28.1.0"/>
<dxl:Column ColId="15" Attno="-6" ColName="cmax" TypeMdid="0.29.1.0"/>
<dxl:Column ColId="16" Attno="-7" ColName="tableoid" TypeMdid="0.26.1.0"/>
<dxl:Column ColId="17" Attno="-8" ColName="gp_segment_id" TypeMdid="0.23.1.0"/>
</dxl:Columns>
</dxl:TableDescriptor>
</dxl:TableScan>
<dxl:TableScan>
<dxl:BroadcastMotion InputSegments="0,1" OutputSegments="0,1">
<dxl:Properties>
<dxl:Cost StartupCost="0" TotalCost="1.023438" Rows="1.000000" Width="8"/>
<dxl:Cost StartupCost="0" TotalCost="3.128906" Rows="2.000000" Width="4"/>
</dxl:Properties>
<dxl:ProjList>
<dxl:ProjElem ColId="27" Alias="i">
<dxl:Ident ColId="27" ColName="i" TypeMdid="0.23.1.0"/>
</dxl:ProjElem>
<dxl:ProjElem ColId="28" Alias="j">
<dxl:Ident ColId="28" ColName="j" TypeMdid="0.23.1.0"/>
<dxl:ProjElem ColId="19" Alias="j">
<dxl:Ident ColId="19" ColName="j" TypeMdid="0.23.1.0"/>
</dxl:ProjElem>
</dxl:ProjList>
<dxl:Filter>
<dxl:ArrayComp OperatorName="&lt;&gt;" OperatorMdid="0.518.1.0" OperatorType="All">
<dxl:Ident ColId="27" ColName="i" TypeMdid="0.23.1.0"/>
<dxl:Array ArrayType="0.1007.1.0" ElementType="0.23.1.0" MultiDimensional="false">
<dxl:ConstValue TypeMdid="0.23.1.0" IsNull="false" IsByValue="true" Value="10"/>
</dxl:Array>
</dxl:ArrayComp>
</dxl:Filter>
<dxl:TableDescriptor Mdid="0.1159322.1.1" TableName="c">
<dxl:Columns>
<dxl:Column ColId="27" Attno="1" ColName="i" TypeMdid="0.23.1.0"/>
<dxl:Column ColId="28" Attno="2" ColName="j" TypeMdid="0.23.1.0"/>
<dxl:Column ColId="29" Attno="-1" ColName="ctid" TypeMdid="0.27.1.0"/>
<dxl:Column ColId="30" Attno="-3" ColName="xmin" TypeMdid="0.28.1.0"/>
<dxl:Column ColId="31" Attno="-4" ColName="cmin" TypeMdid="0.29.1.0"/>
<dxl:Column ColId="32" Attno="-5" ColName="xmax" TypeMdid="0.28.1.0"/>
<dxl:Column ColId="33" Attno="-6" ColName="cmax" TypeMdid="0.29.1.0"/>
<dxl:Column ColId="34" Attno="-7" ColName="tableoid" TypeMdid="0.26.1.0"/>
<dxl:Column ColId="35" Attno="-8" ColName="gp_segment_id" TypeMdid="0.23.1.0"/>
</dxl:Columns>
</dxl:TableDescriptor>
</dxl:TableScan>
<dxl:Filter/>
<dxl:SortingColumnList/>
<dxl:HashJoin JoinType="In">
<dxl:Properties>
<dxl:Cost StartupCost="0" TotalCost="2.125000" Rows="1.000000" Width="4"/>
</dxl:Properties>
<dxl:ProjList>
<dxl:ProjElem ColId="19" Alias="j">
<dxl:Ident ColId="19" ColName="j" TypeMdid="0.23.1.0"/>
</dxl:ProjElem>
</dxl:ProjList>
<dxl:Filter/>
<dxl:JoinFilter/>
<dxl:HashCondList>
<dxl:Comparison ComparisonOperator="=" OperatorMdid="0.96.1.0">
<dxl:Ident ColId="19" ColName="j" TypeMdid="0.23.1.0"/>
<dxl:Ident ColId="28" ColName="j" TypeMdid="0.23.1.0"/>
</dxl:Comparison>
<dxl:Comparison ComparisonOperator="=" OperatorMdid="0.96.1.0">
<dxl:Ident ColId="18" ColName="i" TypeMdid="0.23.1.0"/>
<dxl:Ident ColId="27" ColName="i" TypeMdid="0.23.1.0"/>
</dxl:Comparison>
</dxl:HashCondList>
<dxl:TableScan>
<dxl:Properties>
<dxl:Cost StartupCost="0" TotalCost="0.007812" Rows="1.000000" Width="8"/>
</dxl:Properties>
<dxl:ProjList>
<dxl:ProjElem ColId="18" Alias="i">
<dxl:Ident ColId="18" ColName="i" TypeMdid="0.23.1.0"/>
</dxl:ProjElem>
<dxl:ProjElem ColId="19" Alias="j">
<dxl:Ident ColId="19" ColName="j" TypeMdid="0.23.1.0"/>
</dxl:ProjElem>
</dxl:ProjList>
<dxl:Filter/>
<dxl:TableDescriptor Mdid="0.1159276.1.1" TableName="a">
<dxl:Columns>
<dxl:Column ColId="18" Attno="1" ColName="i" TypeMdid="0.23.1.0"/>
<dxl:Column ColId="19" Attno="2" ColName="j" TypeMdid="0.23.1.0"/>
<dxl:Column ColId="20" Attno="-1" ColName="ctid" TypeMdid="0.27.1.0"/>
<dxl:Column ColId="21" Attno="-3" ColName="xmin" TypeMdid="0.28.1.0"/>
<dxl:Column ColId="22" Attno="-4" ColName="cmin" TypeMdid="0.29.1.0"/>
<dxl:Column ColId="23" Attno="-5" ColName="xmax" TypeMdid="0.28.1.0"/>
<dxl:Column ColId="24" Attno="-6" ColName="cmax" TypeMdid="0.29.1.0"/>
<dxl:Column ColId="25" Attno="-7" ColName="tableoid" TypeMdid="0.26.1.0"/>
<dxl:Column ColId="26" Attno="-8" ColName="gp_segment_id" TypeMdid="0.23.1.0"/>
</dxl:Columns>
</dxl:TableDescriptor>
</dxl:TableScan>
<dxl:TableScan>
<dxl:Properties>
<dxl:Cost StartupCost="0" TotalCost="1.023438" Rows="1.000000" Width="8"/>
</dxl:Properties>
<dxl:ProjList>
<dxl:ProjElem ColId="27" Alias="i">
<dxl:Ident ColId="27" ColName="i" TypeMdid="0.23.1.0"/>
</dxl:ProjElem>
<dxl:ProjElem ColId="28" Alias="j">
<dxl:Ident ColId="28" ColName="j" TypeMdid="0.23.1.0"/>
</dxl:ProjElem>
</dxl:ProjList>
<dxl:Filter>
<dxl:ArrayComp OperatorName="&lt;&gt;" OperatorMdid="0.518.1.0" OperatorType="All">
<dxl:Ident ColId="27" ColName="i" TypeMdid="0.23.1.0"/>
<dxl:Array ArrayType="0.1007.1.0" ElementType="0.23.1.0" MultiDimensional="false">
<dxl:ConstValue TypeMdid="0.23.1.0" IsNull="false" IsByValue="true" Value="10"/>
</dxl:Array>
</dxl:ArrayComp>
</dxl:Filter>
<dxl:TableDescriptor Mdid="0.1159322.1.1" TableName="c">
<dxl:Columns>
<dxl:Column ColId="27" Attno="1" ColName="i" TypeMdid="0.23.1.0"/>
<dxl:Column ColId="28" Attno="2" ColName="j" TypeMdid="0.23.1.0"/>
<dxl:Column ColId="29" Attno="-1" ColName="ctid" TypeMdid="0.27.1.0"/>
<dxl:Column ColId="30" Attno="-3" ColName="xmin" TypeMdid="0.28.1.0"/>
<dxl:Column ColId="31" Attno="-4" ColName="cmin" TypeMdid="0.29.1.0"/>
<dxl:Column ColId="32" Attno="-5" ColName="xmax" TypeMdid="0.28.1.0"/>
<dxl:Column ColId="33" Attno="-6" ColName="cmax" TypeMdid="0.29.1.0"/>
<dxl:Column ColId="34" Attno="-7" ColName="tableoid" TypeMdid="0.26.1.0"/>
<dxl:Column ColId="35" Attno="-8" ColName="gp_segment_id" TypeMdid="0.23.1.0"/>
</dxl:Columns>
</dxl:TableDescriptor>
</dxl:TableScan>
</dxl:HashJoin>
</dxl:BroadcastMotion>
</dxl:HashJoin>
</dxl:BroadcastMotion>
</dxl:HashJoin>
</dxl:GatherMotion>
<dxl:LimitCount>
<dxl:ConstValue TypeMdid="0.20.1.0" IsNull="false" IsByValue="true" Value="1"/>
</dxl:LimitCount>
<dxl:LimitOffset>
<dxl:ConstValue TypeMdid="0.20.1.0" IsNull="false" IsByValue="true" Value="0"/>
</dxl:LimitOffset>
</dxl:Limit>
</dxl:BroadcastMotion>
</dxl:Materialize>
</dxl:NestedLoopJoin>
......
......@@ -1075,6 +1075,9 @@ namespace gpopt
// get execution locality
static
EExecLocalityType ExecLocalityType(CDistributionSpec *pds);
// generate a limit expression on top of the given relational child with the given offset and limit count
static CExpression *PexprLimit(IMemoryPool *pmp, CExpression *pexpr, ULONG ulOffSet, ULONG ulCount);
}; // class CUtils
// hash set from expressions
......
......@@ -224,19 +224,6 @@ namespace gpopt
static
CExpression *PexprScalarIf(IMemoryPool *pmp, CColRef *pcrBool, CColRef *pcrSum, CColRef *pcrCount, CExpression *pexprSubquery);
// helper for creating a correlated apply expression for existential subquery
static
BOOL FConvertExistOrQuantToScalarSubquery
(
IMemoryPool *pmp,
CExpression *pexprOuter,
CExpression *pexprSubquery,
BOOL fDisjunctionOrNegation,
ESubqueryCtxt esqctxt,
CExpression **ppexprNewOuter,
CExpression **ppexprResidualScalar
);
// helper for creating a correlated apply expression for existential subquery
static
BOOL FCreateCorrelatedApplyForExistentialSubquery
......
此差异已折叠。
......@@ -942,68 +942,6 @@ CSubqueryHandler::FCreateOuterApply
return FCreateOuterApplyForScalarSubquery(pmp, pexprOuter, pexprInner, pexprSubquery, fOuterRefsUnderInner, ppexprNewOuter, ppexprResidualScalar);
}
//---------------------------------------------------------------------------
// @function:
// CSubqueryHandler::FConvertExistOrQuantToScalarSubquery
//
// @doc:
// Helper for converting quantified/existential to count(*)
// subqueries that will ultimately be executed using a correlated
// execution strategy;
//
//
//---------------------------------------------------------------------------
BOOL
CSubqueryHandler::FConvertExistOrQuantToScalarSubquery
(
IMemoryPool *pmp,
CExpression *pexprOuter,
CExpression *pexprSubquery,
BOOL fDisjunctionOrNegation,
ESubqueryCtxt esqctxt,
CExpression **ppexprNewOuter,
CExpression **ppexprResidualScalar
)
{
BOOL fExistential = CUtils::FExistentialSubquery(pexprSubquery->Pop());
GPOS_ASSERT(fExistential || CUtils::FQuantifiedSubquery(pexprSubquery->Pop()));
CExpression *pexprInnerNew = NULL;
if (fExistential)
{
CExpression *pexprNewSubquery = NULL;
CXformUtils::ExistentialToAgg(pmp, pexprSubquery, &pexprNewSubquery, ppexprResidualScalar);
(*pexprNewSubquery)[0]->AddRef();
pexprInnerNew = (*pexprNewSubquery)[0];
pexprNewSubquery->Release();
}
else
{
CExpression *pexprNewSubquery = NULL;
CXformUtils::QuantifiedToAgg(pmp, pexprSubquery, &pexprNewSubquery, ppexprResidualScalar);
(*pexprNewSubquery)[0]->AddRef();
pexprInnerNew = (*pexprNewSubquery)[0];
pexprNewSubquery->Release();
}
const CColRef *pcr = CScalarProjectElement::PopConvert((*(*pexprInnerNew)[1])[0]->Pop())->Pcr();
if (EsqctxtFilter == esqctxt && !fDisjunctionOrNegation)
{
*ppexprNewOuter =
CUtils::PexprLogicalApply<CLogicalInnerCorrelatedApply>(pmp, pexprOuter, pexprInnerNew, pcr, COperator::EopScalarSubquery);
}
else
{
// subquery occurs in a value context or disjunction, we need to create an outer apply expression
*ppexprNewOuter =
CUtils::PexprLogicalApply<CLogicalLeftOuterCorrelatedApply>(pmp, pexprOuter, pexprInnerNew, pcr, COperator::EopScalarSubquery);
}
return true;
}
//---------------------------------------------------------------------------
// @function:
// CSubqueryHandler::FCreateCorrelatedApplyForQuantifiedSubquery
......@@ -1649,12 +1587,23 @@ CSubqueryHandler::FRemoveExistentialSubquery
{
GPOS_ASSERT(EsqctxtFilter == esqctxt);
CDrvdPropRelational *pdpInner = CDrvdPropRelational::Pdprel(pexprInner->PdpDerive());
// for existential subqueries, any column produced by inner expression
// can be used to check for empty answers; we use first column for that
CColRef *pcr = CDrvdPropRelational::Pdprel(pexprInner->PdpDerive())->PcrsOutput()->PcrFirst();
CColRef *pcr = pdpInner->PcrsOutput()->PcrFirst();
if (COperator::EopScalarSubqueryExists == eopid)
{
CColRefSet *pcrsOuterRefs = pdpInner->PcrsOuter();
if (0 == pcrsOuterRefs->CElements())
{
// add a limit operator on top of the inner child if the subquery does not have
// any outer references. Adding Limit for the correlated case hinders pulling up
// predicates into an EXISTS join
pexprInner = CUtils::PexprLimit(pmp, pexprInner, 0, 1);
}
*ppexprNewOuter = CUtils::PexprLogicalApply<CLogicalLeftSemiApply>(pmp, pexprOuter, pexprInner, pcr, eopid);
}
else
......
......@@ -71,6 +71,8 @@ add_executable(gporca_test
src/unittest/gpopt/search/CSearchStrategyTest.cpp
include/unittest/gpopt/minidump/CAggTest.h
src/unittest/gpopt/minidump/CAggTest.cpp
include/unittest/gpopt/minidump/CExistsSubqueryTest.h
src/unittest/gpopt/minidump/CExistsSubqueryTest.cpp
include/unittest/gpopt/minidump/CCollapseProjectTest.h
src/unittest/gpopt/minidump/CCollapseProjectTest.cpp
include/unittest/gpopt/minidump/CArrayExpansionTest.h
......@@ -230,6 +232,7 @@ add_orca_test(CDirectDispatchTest)
add_orca_test(CTVFTest)
add_orca_test(CPullUpProjectElementTest)
add_orca_test(CAggTest)
add_orca_test(CExistsSubqueryTest)
add_orca_test(CCollapseProjectTest)
add_orca_test(CPruneColumnsTest)
add_orca_test(CMissingStatsTest)
......
//---------------------------------------------------------------------------
// Greenplum Database
// Copyright (C) 2017 Pivotal, Inc.
//
// @filename:
// CExistsSubqueryTest.h
//
// @doc:
// Test for exists and not exists subquery optimization
//---------------------------------------------------------------------------
#ifndef GPOPT_CExistsSubqueryTest_H
#define GPOPT_CExistsSubqueryTest_H
#include "gpos/base.h"
namespace gpopt
{
class CExistsSubqueryTest
{
private:
// counter used to mark last successful test
static
gpos::ULONG m_ulExistsSubQueryTestCounter;
public:
// unittests
static
gpos::GPOS_RESULT EresUnittest();
static
gpos::GPOS_RESULT EresUnittest_RunTests();
}; // class CExistsSubqueryTest
}
#endif // !GPOPT_CExistsSubqueryTest_H
// EOF
......@@ -83,6 +83,7 @@
#include "unittest/gpopt/minidump/CTVFTest.h"
#include "unittest/gpopt/minidump/CDMLTest.h"
#include "unittest/gpopt/minidump/CAggTest.h"
#include "unittest/gpopt/minidump/CExistsSubqueryTest.h"
#include "unittest/gpopt/minidump/CCollapseProjectTest.h"
#include "unittest/gpopt/minidump/CPhysicalParallelUnionAllTest.h"
#include "unittest/gpopt/minidump/CPruneColumnsTest.h"
......@@ -149,6 +150,7 @@ static gpos::CUnittest rgut[] =
GPOS_UNITTEST_STD(CDirectDispatchTest),
GPOS_UNITTEST_STD(CTVFTest),
GPOS_UNITTEST_STD(CAggTest),
GPOS_UNITTEST_STD(CExistsSubqueryTest),
GPOS_UNITTEST_STD(CCollapseProjectTest),
GPOS_UNITTEST_STD(CPruneColumnsTest),
GPOS_UNITTEST_STD(CPhysicalParallelUnionAllTest),
......
......@@ -107,8 +107,6 @@ const CHAR *rgszFileNames[] =
"../data/dxl/minidump/EquivClassesLimit.mdp",
"../data/dxl/minidump/Date-TimeStamp-HashJoin.mdp",
"../data/dxl/minidump/TimeStamp-Date-HashJoin.mdp",
"../data/dxl/minidump/Exists-SuperfluousEquality.mdp",
"../data/dxl/minidump/NotExists-SuperfluousEquality.mdp",
"../data/dxl/minidump/MultiLevel-CorrelatedExec.mdp",
"../data/dxl/minidump/OneLevel-CorrelatedExec.mdp",
"../data/dxl/minidump/MultiLevel-IN-Subquery.mdp",
......@@ -146,8 +144,6 @@ const CHAR *rgszFileNames[] =
"../data/dxl/minidump/SubqAll-To-ScalarSubq.mdp",
"../data/dxl/minidump/SubqAll-Limit1.mdp",
"../data/dxl/minidump/ProjectUnderSubq.mdp",
"../data/dxl/minidump/SubqExists-With-External-Corrs.mdp",
"../data/dxl/minidump/SubqExists-Without-External-Corrs.mdp",
#ifndef GPOS_DEBUG
"../data/dxl/minidump/TPCDS-39-InnerJoin-JoinEstimate.mdp",
"../data/dxl/minidump/TPCH-Partitioned-256GB.mdp",
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册