From dd83c209f1692a2e5afb72fa7a2d039fd1e682c8 Mon Sep 17 00:00:00 2001 From: Davies Liu Date: Fri, 4 Mar 2016 16:18:15 +0800 Subject: [PATCH] [SPARK-13603][SQL] support SQL generation for subquery ## What changes were proposed in this pull request? This is support SQL generation for subquery expressions, which will be replaced to a SubqueryHolder inside SQLBuilder recursively. ## How was this patch tested? Added unit tests. Author: Davies Liu Closes #11453 from davies/sql_subquery. --- .../sql/catalyst/expressions/subquery.scala | 2 -- .../apache/spark/sql/hive/SQLBuilder.scala | 21 ++++++++++++++----- .../spark/sql/hive/ExpressionToSQLSuite.scala | 5 +++++ 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala index ddf214a4b3..968bbdb1a5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala @@ -76,6 +76,4 @@ case class ScalarSubquery( override def withNewPlan(plan: LogicalPlan): ScalarSubquery = ScalarSubquery(plan, exprId) override def toString: String = s"subquery#${exprId.id}" - - // TODO: support sql() } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala index 13a78c609e..9a14ccff57 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala @@ -24,13 +24,22 @@ import scala.util.control.NonFatal import org.apache.spark.Logging import org.apache.spark.sql.{DataFrame, SQLContext} import org.apache.spark.sql.catalyst.TableIdentifier -import org.apache.spark.sql.catalyst.expressions.{Attribute, NamedExpression, NonSQLExpression, - SortOrder} +import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.optimizer.CollapseProject import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.{Rule, RuleExecutor} import org.apache.spark.sql.catalyst.util.quoteIdentifier import org.apache.spark.sql.execution.datasources.LogicalRelation +import org.apache.spark.sql.types.{DataType, NullType} + +/** + * A place holder for generated SQL for subquery expression. + */ +case class SubqueryHolder(query: String) extends LeafExpression with Unevaluable { + override def dataType: DataType = NullType + override def nullable: Boolean = true + override def sql: String = s"($query)" +} /** * A builder class used to convert a resolved logical plan into a SQL query string. Note that this @@ -46,7 +55,9 @@ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Loggi def toSQL: String = { val canonicalizedPlan = Canonicalizer.execute(logicalPlan) try { - canonicalizedPlan.transformAllExpressions { + val replaced = canonicalizedPlan.transformAllExpressions { + case e: SubqueryExpression => + SubqueryHolder(new SQLBuilder(e.query, sqlContext).toSQL) case e: NonSQLExpression => throw new UnsupportedOperationException( s"Expression $e doesn't have a SQL representation" @@ -54,14 +65,14 @@ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Loggi case e => e } - val generatedSQL = toSQL(canonicalizedPlan, true) + val generatedSQL = toSQL(replaced, true) logDebug( s"""Built SQL query string successfully from given logical plan: | |# Original logical plan: |${logicalPlan.treeString} |# Canonicalized logical plan: - |${canonicalizedPlan.treeString} + |${replaced.treeString} |# Generated SQL: |$generatedSQL """.stripMargin) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ExpressionToSQLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ExpressionToSQLSuite.scala index d68c602a88..72765f05e7 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ExpressionToSQLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ExpressionToSQLSuite.scala @@ -268,4 +268,9 @@ class ExpressionToSQLSuite extends SQLBuilderTest with SQLTestUtils { checkSqlGeneration("SELECT input_file_name()") checkSqlGeneration("SELECT monotonically_increasing_id()") } + + test("subquery") { + checkSqlGeneration("SELECT 1 + (SELECT 2)") + checkSqlGeneration("SELECT 1 + (SELECT 2 + (SELECT 3 as a))") + } } -- GitLab