提交 185b5f6c 编写于 作者: T twalthr

[FLINK-4179] [table] Additional TPCHQuery3Table example improvements

上级 ec4c9bef
...@@ -17,10 +17,9 @@ ...@@ -17,10 +17,9 @@
*/ */
package org.apache.flink.examples.scala package org.apache.flink.examples.scala
import org.apache.flink.api.table.TableEnvironment
import org.apache.flink.api.table.expressions.Literal
import org.apache.flink.api.scala._ import org.apache.flink.api.scala._
import org.apache.flink.api.scala.table._ import org.apache.flink.api.scala.table._
import org.apache.flink.api.table.TableEnvironment
/** /**
* This program implements a modified version of the TPC-H query 3. The * This program implements a modified version of the TPC-H query 3. The
...@@ -49,7 +48,10 @@ import org.apache.flink.api.scala.table._ ...@@ -49,7 +48,10 @@ import org.apache.flink.api.scala.table._
* GROUP BY * GROUP BY
* l_orderkey, * l_orderkey,
* o_orderdate, * o_orderdate,
* o_shippriority; * o_shippriority
* ORDER BY
* revenue desc,
* o_orderdate;
* }}} * }}}
* *
* Compared to the original TPC-H query this version does not sort the result by revenue * Compared to the original TPC-H query this version does not sort the result by revenue
...@@ -64,8 +66,9 @@ import org.apache.flink.api.scala.table._ ...@@ -64,8 +66,9 @@ import org.apache.flink.api.scala.table._
* TPCHQuery3Expression <lineitem-csv path> <customer-csv path> <orders-csv path> <result path> * TPCHQuery3Expression <lineitem-csv path> <customer-csv path> <orders-csv path> <result path>
* }}} * }}}
* *
* This example shows how to use: * This example shows how to:
* - Table API expressions * - Convert DataSets to Tables
* - Use Table API expressions
* *
*/ */
object TPCHQuery3Table { object TPCHQuery3Table {
...@@ -76,23 +79,23 @@ object TPCHQuery3Table { ...@@ -76,23 +79,23 @@ object TPCHQuery3Table {
} }
// set filter date // set filter date
val date = java.sql.Date.valueOf("1995-03-12") val date = "1995-03-12".toDate
// get execution environment // get execution environment
val env = ExecutionEnvironment.getExecutionEnvironment val env = ExecutionEnvironment.getExecutionEnvironment
val tEnv = TableEnvironment.getTableEnvironment(env) val tEnv = TableEnvironment.getTableEnvironment(env)
val lineitems = getLineitemDataSet(env) val lineitems = getLineitemDataSet(env)
.filter( l => java.sql.Date.valueOf(l.shipDate).after(date) ).toTable(tEnv) .toTable(tEnv, 'id, 'extdPrice, 'discount, 'shipDate)
.as('id, 'extdPrice, 'discount, 'shipDate) .filter('shipDate.toDate > date)
val customers = getCustomerDataSet(env).toTable(tEnv) val customers = getCustomerDataSet(env)
.as('id, 'mktSegment) .toTable(tEnv, 'id, 'mktSegment)
.filter( 'mktSegment === "AUTOMOBILE" ) .filter('mktSegment === "AUTOMOBILE")
val orders = getOrdersDataSet(env) val orders = getOrdersDataSet(env)
.filter( o => java.sql.Date.valueOf(o.orderDate).before(date) ).toTable(tEnv) .toTable(tEnv, 'orderId, 'custId, 'orderDate, 'shipPrio)
.as('orderId, 'custId, 'orderDate, 'shipPrio) .filter('orderDate.toDate < date)
val items = val items =
orders.join(customers) orders.join(customers)
...@@ -102,19 +105,20 @@ object TPCHQuery3Table { ...@@ -102,19 +105,20 @@ object TPCHQuery3Table {
.where('orderId === 'id) .where('orderId === 'id)
.select( .select(
'orderId, 'orderId,
'extdPrice * (Literal(1.0f) - 'discount) as 'revenue, 'extdPrice * (1.0f.toExpr - 'discount) as 'revenue,
'orderDate, 'orderDate,
'shipPrio) 'shipPrio)
val result = items val result = items
.groupBy('orderId, 'orderDate, 'shipPrio) .groupBy('orderId, 'orderDate, 'shipPrio)
.select('orderId, 'revenue.sum, 'orderDate, 'shipPrio) .select('orderId, 'revenue.sum as 'revenue, 'orderDate, 'shipPrio)
.orderBy('revenue.desc, 'orderDate.asc)
// emit result // emit result
result.writeAsCsv(outputPath, "\n", "|") result.writeAsCsv(outputPath, "\n", "|")
// execute program // execute program
env.execute("Scala TPCH Query 3 (Expression) Example") env.execute("Scala TPCH Query 3 (Table API Expression) Example")
} }
// ************************************************************************* // *************************************************************************
...@@ -145,7 +149,7 @@ object TPCHQuery3Table { ...@@ -145,7 +149,7 @@ object TPCHQuery3Table {
System.err.println("This program expects data from the TPC-H benchmark as input data.\n" + System.err.println("This program expects data from the TPC-H benchmark as input data.\n" +
" Due to legal restrictions, we can not ship generated data.\n" + " Due to legal restrictions, we can not ship generated data.\n" +
" You can find the TPC-H data generator at http://www.tpc.org/tpch/.\n" + " You can find the TPC-H data generator at http://www.tpc.org/tpch/.\n" +
" Usage: TPCHQuery3 <lineitem-csv path> <customer-csv path>" + " Usage: TPCHQuery3 <lineitem-csv path> <customer-csv path> " +
"<orders-csv path> <result path>") "<orders-csv path> <result path>")
false false
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册