Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
doujutun3207
spark
提交
7791d0c3
S
spark
项目概览
doujutun3207
/
spark
与 Fork 源项目一致
从无法访问的项目Fork
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
S
spark
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
7791d0c3
编写于
3月 09, 2016
作者:
D
Davies Liu
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Revert "[SPARK-13668][SQL] Reorder filter/join predicates to short-circuit isNotNull checks"
This reverts commit
e430614e
.
上级
9634e17d
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
14 addition
and
150 deletion
+14
-150
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/QueryPlanner.scala
...org/apache/spark/sql/catalyst/planning/QueryPlanner.scala
+1
-23
sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
...cala/org/apache/spark/sql/execution/SparkStrategies.scala
+13
-24
sql/core/src/test/scala/org/apache/spark/sql/execution/ReorderedPredicateSuite.scala
.../apache/spark/sql/execution/ReorderedPredicateSuite.scala
+0
-103
未找到文件。
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/QueryPlanner.scala
浏览文件 @
7791d0c3
...
@@ -18,8 +18,6 @@
...
@@ -18,8 +18,6 @@
package
org.apache.spark.sql.catalyst.planning
package
org.apache.spark.sql.catalyst.planning
import
org.apache.spark.Logging
import
org.apache.spark.Logging
import
org.apache.spark.sql.catalyst.expressions.
{
And
,
Expression
,
IsNotNull
,
PredicateHelper
}
import
org.apache.spark.sql.catalyst.plans
import
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import
org.apache.spark.sql.catalyst.trees.TreeNode
import
org.apache.spark.sql.catalyst.trees.TreeNode
...
@@ -28,28 +26,8 @@ import org.apache.spark.sql.catalyst.trees.TreeNode
...
@@ -28,28 +26,8 @@ import org.apache.spark.sql.catalyst.trees.TreeNode
* be used for execution. If this strategy does not apply to the give logical operation then an
* be used for execution. If this strategy does not apply to the give logical operation then an
* empty list should be returned.
* empty list should be returned.
*/
*/
abstract
class
GenericStrategy
[
PhysicalPlan
<:
TreeNode
[
PhysicalPlan
]]
abstract
class
GenericStrategy
[
PhysicalPlan
<:
TreeNode
[
PhysicalPlan
]]
extends
Logging
{
extends
PredicateHelper
with
Logging
{
def
apply
(
plan
:
LogicalPlan
)
:
Seq
[
PhysicalPlan
]
def
apply
(
plan
:
LogicalPlan
)
:
Seq
[
PhysicalPlan
]
// Attempts to re-order the individual conjunctive predicates in an expression to short circuit
// the evaluation of relatively cheaper checks (e.g., checking for nullability) before others.
protected
def
reorderPredicates
(
expr
:
Expression
)
:
Expression
=
{
splitConjunctivePredicates
(
expr
)
.
sortWith
((
x
,
_
)
=>
x
.
isInstanceOf
[
IsNotNull
])
.
reduce
(
And
)
}
// Wrapper around reorderPredicates(expr: Expression) to reorder optional conditions in joins
protected
def
reorderPredicates
(
exprOpt
:
Option
[
Expression
])
:
Option
[
Expression
]
=
{
exprOpt
match
{
case
Some
(
expr
)
=>
Option
(
reorderPredicates
(
expr
))
case
None
=>
exprOpt
}
}
}
}
/**
/**
...
...
sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
浏览文件 @
7791d0c3
...
@@ -66,13 +66,11 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
...
@@ -66,13 +66,11 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
case
ExtractEquiJoinKeys
(
case
ExtractEquiJoinKeys
(
LeftSemi
,
leftKeys
,
rightKeys
,
condition
,
left
,
CanBroadcast
(
right
))
=>
LeftSemi
,
leftKeys
,
rightKeys
,
condition
,
left
,
CanBroadcast
(
right
))
=>
joins
.
BroadcastLeftSemiJoinHash
(
joins
.
BroadcastLeftSemiJoinHash
(
leftKeys
,
rightKeys
,
planLater
(
left
),
planLater
(
right
),
leftKeys
,
rightKeys
,
planLater
(
left
),
planLater
(
right
),
condition
)
::
Nil
reorderPredicates
(
condition
))
::
Nil
// Find left semi joins where at least some predicates can be evaluated by matching join keys
// Find left semi joins where at least some predicates can be evaluated by matching join keys
case
ExtractEquiJoinKeys
(
LeftSemi
,
leftKeys
,
rightKeys
,
condition
,
left
,
right
)
=>
case
ExtractEquiJoinKeys
(
LeftSemi
,
leftKeys
,
rightKeys
,
condition
,
left
,
right
)
=>
joins
.
LeftSemiJoinHash
(
joins
.
LeftSemiJoinHash
(
leftKeys
,
rightKeys
,
planLater
(
left
),
planLater
(
right
),
leftKeys
,
rightKeys
,
planLater
(
left
),
planLater
(
right
),
condition
)
::
Nil
reorderPredicates
(
condition
))
::
Nil
case
_
=>
Nil
case
_
=>
Nil
}
}
}
}
...
@@ -113,39 +111,33 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
...
@@ -113,39 +111,33 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
case
ExtractEquiJoinKeys
(
Inner
,
leftKeys
,
rightKeys
,
condition
,
left
,
CanBroadcast
(
right
))
=>
case
ExtractEquiJoinKeys
(
Inner
,
leftKeys
,
rightKeys
,
condition
,
left
,
CanBroadcast
(
right
))
=>
Seq
(
joins
.
BroadcastHashJoin
(
Seq
(
joins
.
BroadcastHashJoin
(
leftKeys
,
rightKeys
,
Inner
,
BuildRight
,
reorderPredicates
(
condition
),
leftKeys
,
rightKeys
,
Inner
,
BuildRight
,
condition
,
planLater
(
left
),
planLater
(
right
)))
planLater
(
left
),
planLater
(
right
)))
case
ExtractEquiJoinKeys
(
Inner
,
leftKeys
,
rightKeys
,
condition
,
CanBroadcast
(
left
),
right
)
=>
case
ExtractEquiJoinKeys
(
Inner
,
leftKeys
,
rightKeys
,
condition
,
CanBroadcast
(
left
),
right
)
=>
Seq
(
joins
.
BroadcastHashJoin
(
Seq
(
joins
.
BroadcastHashJoin
(
leftKeys
,
rightKeys
,
Inner
,
BuildLeft
,
reorderPredicates
(
condition
),
planLater
(
left
),
leftKeys
,
rightKeys
,
Inner
,
BuildLeft
,
condition
,
planLater
(
left
),
planLater
(
right
)))
planLater
(
right
)))
case
ExtractEquiJoinKeys
(
Inner
,
leftKeys
,
rightKeys
,
condition
,
left
,
right
)
case
ExtractEquiJoinKeys
(
Inner
,
leftKeys
,
rightKeys
,
condition
,
left
,
right
)
if
RowOrdering
.
isOrderable
(
leftKeys
)
=>
if
RowOrdering
.
isOrderable
(
leftKeys
)
=>
joins
.
SortMergeJoin
(
joins
.
SortMergeJoin
(
leftKeys
,
rightKeys
,
reorderPredicates
(
condition
),
planLater
(
left
),
leftKeys
,
rightKeys
,
condition
,
planLater
(
left
),
planLater
(
right
))
::
Nil
planLater
(
right
))
::
Nil
// --- Outer joins --------------------------------------------------------------------------
// --- Outer joins --------------------------------------------------------------------------
case
ExtractEquiJoinKeys
(
case
ExtractEquiJoinKeys
(
LeftOuter
,
leftKeys
,
rightKeys
,
condition
,
left
,
CanBroadcast
(
right
))
=>
LeftOuter
,
leftKeys
,
rightKeys
,
condition
,
left
,
CanBroadcast
(
right
))
=>
Seq
(
joins
.
BroadcastHashJoin
(
Seq
(
joins
.
BroadcastHashJoin
(
leftKeys
,
rightKeys
,
LeftOuter
,
BuildRight
,
reorderPredicates
(
condition
),
leftKeys
,
rightKeys
,
LeftOuter
,
BuildRight
,
condition
,
planLater
(
left
),
planLater
(
right
)))
planLater
(
left
),
planLater
(
right
)))
case
ExtractEquiJoinKeys
(
case
ExtractEquiJoinKeys
(
RightOuter
,
leftKeys
,
rightKeys
,
condition
,
CanBroadcast
(
left
),
right
)
=>
RightOuter
,
leftKeys
,
rightKeys
,
condition
,
CanBroadcast
(
left
),
right
)
=>
Seq
(
joins
.
BroadcastHashJoin
(
Seq
(
joins
.
BroadcastHashJoin
(
leftKeys
,
rightKeys
,
RightOuter
,
BuildLeft
,
reorderPredicates
(
condition
),
leftKeys
,
rightKeys
,
RightOuter
,
BuildLeft
,
condition
,
planLater
(
left
),
planLater
(
right
)))
planLater
(
left
),
planLater
(
right
)))
case
ExtractEquiJoinKeys
(
joinType
,
leftKeys
,
rightKeys
,
condition
,
left
,
right
)
case
ExtractEquiJoinKeys
(
joinType
,
leftKeys
,
rightKeys
,
condition
,
left
,
right
)
if
RowOrdering
.
isOrderable
(
leftKeys
)
=>
if
RowOrdering
.
isOrderable
(
leftKeys
)
=>
joins
.
SortMergeOuterJoin
(
joins
.
SortMergeOuterJoin
(
leftKeys
,
rightKeys
,
joinType
,
reorderPredicates
(
condition
),
planLater
(
left
),
leftKeys
,
rightKeys
,
joinType
,
condition
,
planLater
(
left
),
planLater
(
right
))
::
Nil
planLater
(
right
))
::
Nil
// --- Cases where this strategy does not apply ---------------------------------------------
// --- Cases where this strategy does not apply ---------------------------------------------
...
@@ -260,12 +252,10 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
...
@@ -260,12 +252,10 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
def
apply
(
plan
:
LogicalPlan
)
:
Seq
[
SparkPlan
]
=
plan
match
{
def
apply
(
plan
:
LogicalPlan
)
:
Seq
[
SparkPlan
]
=
plan
match
{
case
j
@
logical
.
Join
(
CanBroadcast
(
left
),
right
,
Inner
|
RightOuter
,
condition
)
=>
case
j
@
logical
.
Join
(
CanBroadcast
(
left
),
right
,
Inner
|
RightOuter
,
condition
)
=>
execution
.
joins
.
BroadcastNestedLoopJoin
(
execution
.
joins
.
BroadcastNestedLoopJoin
(
planLater
(
left
),
planLater
(
right
),
joins
.
BuildLeft
,
j
.
joinType
,
planLater
(
left
),
planLater
(
right
),
joins
.
BuildLeft
,
j
.
joinType
,
condition
)
::
Nil
reorderPredicates
(
condition
))
::
Nil
case
j
@
logical
.
Join
(
left
,
CanBroadcast
(
right
),
Inner
|
LeftOuter
|
LeftSemi
,
condition
)
=>
case
j
@
logical
.
Join
(
left
,
CanBroadcast
(
right
),
Inner
|
LeftOuter
|
LeftSemi
,
condition
)
=>
execution
.
joins
.
BroadcastNestedLoopJoin
(
execution
.
joins
.
BroadcastNestedLoopJoin
(
planLater
(
left
),
planLater
(
right
),
joins
.
BuildRight
,
j
.
joinType
,
planLater
(
left
),
planLater
(
right
),
joins
.
BuildRight
,
j
.
joinType
,
condition
)
::
Nil
reorderPredicates
(
condition
))
::
Nil
case
_
=>
Nil
case
_
=>
Nil
}
}
}
}
...
@@ -275,7 +265,7 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
...
@@ -275,7 +265,7 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
case
logical
.
Join
(
left
,
right
,
Inner
,
None
)
=>
case
logical
.
Join
(
left
,
right
,
Inner
,
None
)
=>
execution
.
joins
.
CartesianProduct
(
planLater
(
left
),
planLater
(
right
))
::
Nil
execution
.
joins
.
CartesianProduct
(
planLater
(
left
),
planLater
(
right
))
::
Nil
case
logical
.
Join
(
left
,
right
,
Inner
,
Some
(
condition
))
=>
case
logical
.
Join
(
left
,
right
,
Inner
,
Some
(
condition
))
=>
execution
.
Filter
(
reorderPredicates
(
condition
)
,
execution
.
Filter
(
condition
,
execution
.
joins
.
CartesianProduct
(
planLater
(
left
),
planLater
(
right
)))
::
Nil
execution
.
joins
.
CartesianProduct
(
planLater
(
left
),
planLater
(
right
)))
::
Nil
case
_
=>
Nil
case
_
=>
Nil
}
}
...
@@ -292,8 +282,7 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
...
@@ -292,8 +282,7 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
}
}
// This join could be very slow or even hang forever
// This join could be very slow or even hang forever
joins
.
BroadcastNestedLoopJoin
(
joins
.
BroadcastNestedLoopJoin
(
planLater
(
left
),
planLater
(
right
),
buildSide
,
joinType
,
planLater
(
left
),
planLater
(
right
),
buildSide
,
joinType
,
condition
)
::
Nil
reorderPredicates
(
condition
))
::
Nil
case
_
=>
Nil
case
_
=>
Nil
}
}
}
}
...
@@ -352,7 +341,7 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
...
@@ -352,7 +341,7 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
case
logical
.
Project
(
projectList
,
child
)
=>
case
logical
.
Project
(
projectList
,
child
)
=>
execution
.
Project
(
projectList
,
planLater
(
child
))
::
Nil
execution
.
Project
(
projectList
,
planLater
(
child
))
::
Nil
case
logical
.
Filter
(
condition
,
child
)
=>
case
logical
.
Filter
(
condition
,
child
)
=>
execution
.
Filter
(
reorderPredicates
(
condition
)
,
planLater
(
child
))
::
Nil
execution
.
Filter
(
condition
,
planLater
(
child
))
::
Nil
case
e
@
logical
.
Expand
(
_
,
_
,
child
)
=>
case
e
@
logical
.
Expand
(
_
,
_
,
child
)
=>
execution
.
Expand
(
e
.
projections
,
e
.
output
,
planLater
(
child
))
::
Nil
execution
.
Expand
(
e
.
projections
,
e
.
output
,
planLater
(
child
))
::
Nil
case
logical
.
Window
(
projectList
,
windowExprs
,
partitionSpec
,
orderSpec
,
child
)
=>
case
logical
.
Window
(
projectList
,
windowExprs
,
partitionSpec
,
orderSpec
,
child
)
=>
...
...
sql/core/src/test/scala/org/apache/spark/sql/execution/ReorderedPredicateSuite.scala
已删除
100644 → 0
浏览文件 @
9634e17d
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package
org.apache.spark.sql.execution
import
org.apache.spark.sql.catalyst.expressions.
{
Expression
,
IsNotNull
,
PredicateHelper
}
import
org.apache.spark.sql.catalyst.plans.logical
import
org.apache.spark.sql.catalyst.plans.logical.Join
import
org.apache.spark.sql.execution
import
org.apache.spark.sql.execution.joins.LeftSemiJoinHash
import
org.apache.spark.sql.test.SharedSQLContext
class
ReorderedPredicateSuite
extends
SharedSQLContext
with
PredicateHelper
{
setupTestData
()
// Verifies that (a) In the new condition, the IsNotNull operators precede rest of the operators
// and (b) The relative sort order of IsNotNull and !IsNotNull operators is still maintained
private
def
verifyStableOrder
(
before
:
Expression
,
after
:
Expression
)
:
Unit
=
{
val
oldPredicates
=
splitConjunctivePredicates
(
before
)
splitConjunctivePredicates
(
after
).
sliding
(
2
).
foreach
{
case
Seq
(
x
,
y
)
=>
// Verify IsNotNull operator ordering
assert
(
x
.
isInstanceOf
[
IsNotNull
]
||
!
y
.
isInstanceOf
[
IsNotNull
])
// Verify stable sort order
if
((
x
.
isInstanceOf
[
IsNotNull
]
&&
y
.
isInstanceOf
[
IsNotNull
])
||
(!
x
.
isInstanceOf
[
IsNotNull
]
&&
!
y
.
isInstanceOf
[
IsNotNull
]))
{
assert
(
oldPredicates
.
indexOf
(
x
)
<=
oldPredicates
.
indexOf
(
y
))
}
}
}
test
(
"null ordering in filter predicates"
)
{
val
query
=
sql
(
"""
|SELECT * from testData
|WHERE value != '5' AND value != '4' AND value IS NOT NULL AND key != 5
"""
.
stripMargin
)
.
queryExecution
val
logicalPlan
=
query
.
optimizedPlan
val
physicalPlan
=
query
.
sparkPlan
assert
(
logicalPlan
.
find
(
_
.
isInstanceOf
[
logical.Filter
]).
isDefined
)
assert
(
physicalPlan
.
find
(
_
.
isInstanceOf
[
execution.Filter
]).
isDefined
)
val
logicalCondition
=
logicalPlan
.
collect
{
case
logical
.
Filter
(
condition
,
_
)
=>
condition
}.
head
val
physicalCondition
=
physicalPlan
.
collect
{
case
Filter
(
condition
,
_
)
=>
condition
}.
head
verifyStableOrder
(
logicalCondition
,
physicalCondition
)
}
test
(
"null ordering in join predicates"
)
{
sqlContext
.
cacheManager
.
clearCache
()
val
query
=
sql
(
"""
|SELECT * FROM testData t1
|LEFT SEMI JOIN testData t2
|ON t1.key = t2.key
|AND t1.key + t2.key != 5
|AND CONCAT(t1.value, t2.value) IS NOT NULL
"""
.
stripMargin
)
.
queryExecution
val
logicalPlan
=
query
.
optimizedPlan
val
physicalPlan
=
query
.
sparkPlan
assert
(
logicalPlan
.
find
(
_
.
isInstanceOf
[
Join
]).
isDefined
)
assert
(
physicalPlan
.
find
(
_
.
isInstanceOf
[
LeftSemiJoinHash
]).
isDefined
)
val
logicalCondition
=
logicalPlan
.
collect
{
case
Join
(
_
,
_
,
_
,
condition
)
=>
condition
.
get
}.
head
val
physicalCondition
=
physicalPlan
.
collect
{
case
LeftSemiJoinHash
(
_
,
_
,
_
,
_
,
conditionOpt
)
=>
conditionOpt
.
get
}.
head
verifyStableOrder
(
logicalCondition
,
physicalCondition
)
}
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录