From 035d3acdf3c1be5b309a861d5c5beb803b946b5e Mon Sep 17 00:00:00 2001 From: Jakob Odersky Date: Tue, 8 Mar 2016 18:11:09 -0800 Subject: [PATCH] [SPARK-7286][SQL] Deprecate !== in favour of =!= This PR replaces #9925 which had issues with CI. **Please see the original PR for any previous discussions.** ## What changes were proposed in this pull request? Deprecate the SparkSQL column operator !== and use =!= as an alternative. Fixes subtle issues related to operator precedence (basically, !== does not have the same priority as its logical negation, ===). ## How was this patch tested? All currently existing tests. Author: Jakob Odersky Closes #11588 from jodersky/SPARK-7286. --- .../spark/sql/catalyst/dsl/package.scala | 2 +- .../scala/org/apache/spark/sql/Column.scala | 22 +++++++++++++++++-- .../spark/sql/ColumnExpressionSuite.scala | 2 +- .../org/apache/spark/sql/JoinSuite.scala | 4 ++-- .../parquet/ParquetFilterSuite.scala | 14 ++++++------ .../sql/hive/ExpressionSQLBuilderSuite.scala | 2 +- .../spark/sql/hive/orc/OrcFilterSuite.scala | 2 +- 7 files changed, 33 insertions(+), 15 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala index a12f7396fe..63463265e3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala @@ -83,7 +83,7 @@ package object dsl { def >= (other: Expression): Predicate = GreaterThanOrEqual(expr, other) def === (other: Expression): Predicate = EqualTo(expr, other) def <=> (other: Expression): Predicate = EqualNullSafe(expr, other) - def !== (other: Expression): Predicate = Not(EqualTo(expr, other)) + def =!= (other: Expression): Predicate = Not(EqualTo(expr, other)) def in(list: Expression*): Expression = In(expr, list) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala index 0fa81594ee..f7ba61d2b8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala @@ -253,6 +253,23 @@ class Column(protected[sql] val expr: Expression) extends Logging { */ def equalTo(other: Any): Column = this === other + /** + * Inequality test. + * {{{ + * // Scala: + * df.select( df("colA") =!= df("colB") ) + * df.select( !(df("colA") === df("colB")) ) + * + * // Java: + * import static org.apache.spark.sql.functions.*; + * df.filter( col("colA").notEqual(col("colB")) ); + * }}} + * + * @group expr_ops + * @since 2.0.0 + */ + def =!= (other: Any): Column = withExpr{ Not(EqualTo(expr, lit(other).expr)) } + /** * Inequality test. * {{{ @@ -267,8 +284,9 @@ class Column(protected[sql] val expr: Expression) extends Logging { * * @group expr_ops * @since 1.3.0 - */ - def !== (other: Any): Column = withExpr{ Not(EqualTo(expr, lit(other).expr)) } + */ + @deprecated("!== does not have the same precedence as ===, use =!= instead", "2.0.0") + def !== (other: Any): Column = this =!= other /** * Inequality test. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala index b349bb6dc9..c2434e46f7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala @@ -349,7 +349,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSQLContext { testData2.collect().toSeq.filter(r => r.getInt(0) == r.getInt(1))) } - test("!==") { + test("=!=") { val nullData = sqlContext.createDataFrame(sparkContext.parallelize( Row(1, 1) :: Row(1, 2) :: diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala index 5b98c11ef2..2bd29ef19b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala @@ -347,7 +347,7 @@ class JoinSuite extends QueryTest with SharedSQLContext { Row(null, null, 6, "F") :: Nil) checkAnswer( - left.join(right, ($"left.N" === $"right.N") && ($"left.N" !== 3), "full"), + left.join(right, ($"left.N" === $"right.N") && ($"left.N" =!= 3), "full"), Row(1, "A", null, null) :: Row(2, "B", null, null) :: Row(3, "C", null, null) :: @@ -357,7 +357,7 @@ class JoinSuite extends QueryTest with SharedSQLContext { Row(null, null, 6, "F") :: Nil) checkAnswer( - left.join(right, ($"left.N" === $"right.N") && ($"right.N" !== 3), "full"), + left.join(right, ($"left.N" === $"right.N") && ($"right.N" =!= 3), "full"), Row(1, "A", null, null) :: Row(2, "B", null, null) :: Row(3, "C", null, null) :: diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala index e32616fb5c..a64df435d8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala @@ -120,7 +120,7 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex checkFilterPredicate('_1 === true, classOf[Eq[_]], true) checkFilterPredicate('_1 <=> true, classOf[Eq[_]], true) - checkFilterPredicate('_1 !== true, classOf[NotEq[_]], false) + checkFilterPredicate('_1 =!= true, classOf[NotEq[_]], false) } } @@ -131,7 +131,7 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex checkFilterPredicate('_1 === 1, classOf[Eq[_]], 1) checkFilterPredicate('_1 <=> 1, classOf[Eq[_]], 1) - checkFilterPredicate('_1 !== 1, classOf[NotEq[_]], (2 to 4).map(Row.apply(_))) + checkFilterPredicate('_1 =!= 1, classOf[NotEq[_]], (2 to 4).map(Row.apply(_))) checkFilterPredicate('_1 < 2, classOf[Lt[_]], 1) checkFilterPredicate('_1 > 3, classOf[Gt[_]], 4) @@ -157,7 +157,7 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex checkFilterPredicate('_1 === 1, classOf[Eq[_]], 1) checkFilterPredicate('_1 <=> 1, classOf[Eq[_]], 1) - checkFilterPredicate('_1 !== 1, classOf[NotEq[_]], (2 to 4).map(Row.apply(_))) + checkFilterPredicate('_1 =!= 1, classOf[NotEq[_]], (2 to 4).map(Row.apply(_))) checkFilterPredicate('_1 < 2, classOf[Lt[_]], 1) checkFilterPredicate('_1 > 3, classOf[Gt[_]], 4) @@ -183,7 +183,7 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex checkFilterPredicate('_1 === 1, classOf[Eq[_]], 1) checkFilterPredicate('_1 <=> 1, classOf[Eq[_]], 1) - checkFilterPredicate('_1 !== 1, classOf[NotEq[_]], (2 to 4).map(Row.apply(_))) + checkFilterPredicate('_1 =!= 1, classOf[NotEq[_]], (2 to 4).map(Row.apply(_))) checkFilterPredicate('_1 < 2, classOf[Lt[_]], 1) checkFilterPredicate('_1 > 3, classOf[Gt[_]], 4) @@ -209,7 +209,7 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex checkFilterPredicate('_1 === 1, classOf[Eq[_]], 1) checkFilterPredicate('_1 <=> 1, classOf[Eq[_]], 1) - checkFilterPredicate('_1 !== 1, classOf[NotEq[_]], (2 to 4).map(Row.apply(_))) + checkFilterPredicate('_1 =!= 1, classOf[NotEq[_]], (2 to 4).map(Row.apply(_))) checkFilterPredicate('_1 < 2, classOf[Lt[_]], 1) checkFilterPredicate('_1 > 3, classOf[Gt[_]], 4) @@ -238,7 +238,7 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex checkFilterPredicate('_1 === "1", classOf[Eq[_]], "1") checkFilterPredicate('_1 <=> "1", classOf[Eq[_]], "1") checkFilterPredicate( - '_1 !== "1", classOf[NotEq[_]], (2 to 4).map(i => Row.apply(i.toString))) + '_1 =!= "1", classOf[NotEq[_]], (2 to 4).map(i => Row.apply(i.toString))) checkFilterPredicate('_1 < "2", classOf[Lt[_]], "1") checkFilterPredicate('_1 > "3", classOf[Gt[_]], "4") @@ -272,7 +272,7 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex '_1.isNotNull, classOf[NotEq[_]], (1 to 4).map(i => Row.apply(i.b)).toSeq) checkBinaryFilterPredicate( - '_1 !== 1.b, classOf[NotEq[_]], (2 to 4).map(i => Row.apply(i.b)).toSeq) + '_1 =!= 1.b, classOf[NotEq[_]], (2 to 4).map(i => Row.apply(i.b)).toSeq) checkBinaryFilterPredicate('_1 < 2.b, classOf[Lt[_]], 1.b) checkBinaryFilterPredicate('_1 > 3.b, classOf[Gt[_]], 4.b) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ExpressionSQLBuilderSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ExpressionSQLBuilderSuite.scala index e4b4d1861a..38c84abd7c 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ExpressionSQLBuilderSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ExpressionSQLBuilderSuite.scala @@ -47,7 +47,7 @@ class ExpressionSQLBuilderSuite extends SQLBuilderTest { test("binary comparisons") { checkSQL('a.int === 'b.int, "(`a` = `b`)") checkSQL('a.int <=> 'b.int, "(`a` <=> `b`)") - checkSQL('a.int !== 'b.int, "(NOT (`a` = `b`))") + checkSQL('a.int =!= 'b.int, "(NOT (`a` = `b`))") checkSQL('a.int < 'b.int, "(`a` < `b`)") checkSQL('a.int <= 'b.int, "(`a` <= `b`)") diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcFilterSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcFilterSuite.scala index cb40596040..46c390c436 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcFilterSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcFilterSuite.scala @@ -211,7 +211,7 @@ class OrcFilterSuite extends QueryTest with OrcTest { |expr = (not leaf-0)""".stripMargin.trim ) checkFilterPredicate( - '_1 !== 1, + '_1 =!= 1, """leaf-0 = (EQUALS _1 1) |expr = (not leaf-0)""".stripMargin.trim ) -- GitLab